mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Merge #4626
4626: Edit Documents with Rhai r=ManyTheFish a=Kerollmops This PR introduces a first version of [the _Update Documents with Function_ (internal)](https://www.notion.so/meilisearch/Update-Documents-by-Function-45f87b13e61c4435b73943768a490808). It uses [the Rhai programming language](https://rhai.rs/) to let users express the modifications they want apply. You can read more about the way to use this functions on [the Usage PRD Page](https://meilisearch.notion.site/Edit-Documents-with-Rhai-0cff8fea7655436592e7c8a6de932062?pvs=25). The [prototype is available](https://github.com/meilisearch/meilisearch/actions/runs/9038384483) through Docker by using the following command: ``` docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-edit-documents-with-rhai-3 ``` ## TODO - [x] Support the `DocumentEdition` task in dumps. - [x] Remove the unwraps and panics. - [x] Improve error codes for the `function` parameter. - [x] [Update Rhai to v1.19.0](https://github.com/rhaiscript/rhai/releases/tag/v1.19.0) 🚀 - [x] Make it an experimental feature (only restrict the HTTP calls). - [x] It must be possible not to send a context. - [x] Rebase on main. - [x] Check that the script cannot do any io. - [x] ~Introduce a `Documents.edit` action or~ require the `Documents.all` action. - [x] Change the `editionCode` to the clearer `function` field name in the tasks. - [x] Support a user provided context and maybe more (but keep function execution isolated for reproducibility). - [x] Support deleting documents when the `doc` is `()` (nil, null). - [x] Support canceling document edition. - [x] Multithread document edition by using rayon (and [rayon-par-bridge](https://docs.rs/rayon-par-bridge/latest/rayon_par_bridge/)). - [x] Limit the number of instruction by function execution. - [ ] ~Expose the limit of instructions in the settings.~ Not sure, in fact. - [x] Ignore unmodified documents in the tasks count. - [x] Make the `filter` field optional (not forced to be `null`). Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
29b44e5541
888
Cargo.lock
generated
888
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -104,6 +104,11 @@ pub enum KindDump {
|
|||||||
DocumentDeletionByFilter {
|
DocumentDeletionByFilter {
|
||||||
filter: serde_json::Value,
|
filter: serde_json::Value,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
filter: Option<serde_json::Value>,
|
||||||
|
context: Option<serde_json::Map<String, serde_json::Value>>,
|
||||||
|
function: String,
|
||||||
|
},
|
||||||
Settings {
|
Settings {
|
||||||
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
|
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
|
||||||
is_deletion: bool,
|
is_deletion: bool,
|
||||||
@ -172,6 +177,9 @@ impl From<KindWithContent> for KindDump {
|
|||||||
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
|
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
|
||||||
KindDump::DocumentDeletionByFilter { filter: filter_expr }
|
KindDump::DocumentDeletionByFilter { filter: filter_expr }
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { filter_expr, context, function, .. } => {
|
||||||
|
KindDump::DocumentEdition { filter: filter_expr, context, function }
|
||||||
|
}
|
||||||
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
|
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
|
||||||
KindWithContent::SettingsUpdate {
|
KindWithContent::SettingsUpdate {
|
||||||
new_settings,
|
new_settings,
|
||||||
|
@ -24,6 +24,7 @@ enum AutobatchKind {
|
|||||||
allow_index_creation: bool,
|
allow_index_creation: bool,
|
||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
},
|
},
|
||||||
|
DocumentEdition,
|
||||||
DocumentDeletion,
|
DocumentDeletion,
|
||||||
DocumentDeletionByFilter,
|
DocumentDeletionByFilter,
|
||||||
DocumentClear,
|
DocumentClear,
|
||||||
@ -63,6 +64,7 @@ impl From<KindWithContent> for AutobatchKind {
|
|||||||
primary_key,
|
primary_key,
|
||||||
..
|
..
|
||||||
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
||||||
|
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
|
||||||
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
|
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
|
||||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||||
@ -98,6 +100,9 @@ pub enum BatchKind {
|
|||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
operation_ids: Vec<TaskId>,
|
operation_ids: Vec<TaskId>,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
id: TaskId,
|
||||||
|
},
|
||||||
DocumentDeletion {
|
DocumentDeletion {
|
||||||
deletion_ids: Vec<TaskId>,
|
deletion_ids: Vec<TaskId>,
|
||||||
},
|
},
|
||||||
@ -199,6 +204,7 @@ impl BatchKind {
|
|||||||
}),
|
}),
|
||||||
allow_index_creation,
|
allow_index_creation,
|
||||||
),
|
),
|
||||||
|
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
|
||||||
K::DocumentDeletion => {
|
K::DocumentDeletion => {
|
||||||
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
|
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
|
||||||
}
|
}
|
||||||
@ -222,7 +228,7 @@ impl BatchKind {
|
|||||||
|
|
||||||
match (self, kind) {
|
match (self, kind) {
|
||||||
// We don't batch any of these operations
|
// We don't batch any of these operations
|
||||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
|
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this),
|
||||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||||
Break(this)
|
Break(this)
|
||||||
@ -519,6 +525,7 @@ impl BatchKind {
|
|||||||
| BatchKind::IndexDeletion { .. }
|
| BatchKind::IndexDeletion { .. }
|
||||||
| BatchKind::IndexUpdate { .. }
|
| BatchKind::IndexUpdate { .. }
|
||||||
| BatchKind::IndexSwap { .. }
|
| BatchKind::IndexSwap { .. }
|
||||||
|
| BatchKind::DocumentEdition { .. }
|
||||||
| BatchKind::DocumentDeletionByFilter { .. },
|
| BatchKind::DocumentDeletionByFilter { .. },
|
||||||
_,
|
_,
|
||||||
) => {
|
) => {
|
||||||
|
@ -34,7 +34,7 @@ use meilisearch_types::milli::update::{
|
|||||||
use meilisearch_types::milli::vector::parsed_vectors::{
|
use meilisearch_types::milli::vector::parsed_vectors::{
|
||||||
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
||||||
};
|
};
|
||||||
use meilisearch_types::milli::{self, Filter};
|
use meilisearch_types::milli::{self, Filter, Object};
|
||||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||||
@ -106,6 +106,10 @@ pub(crate) enum IndexOperation {
|
|||||||
operations: Vec<DocumentOperation>,
|
operations: Vec<DocumentOperation>,
|
||||||
tasks: Vec<Task>,
|
tasks: Vec<Task>,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
index_uid: String,
|
||||||
|
task: Task,
|
||||||
|
},
|
||||||
IndexDocumentDeletionByFilter {
|
IndexDocumentDeletionByFilter {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
task: Task,
|
task: Task,
|
||||||
@ -164,7 +168,8 @@ impl Batch {
|
|||||||
| IndexOperation::DocumentClear { tasks, .. } => {
|
| IndexOperation::DocumentClear { tasks, .. } => {
|
||||||
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
|
||||||
}
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
|
IndexOperation::DocumentEdition { task, .. }
|
||||||
|
| IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
|
||||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||||
}
|
}
|
||||||
IndexOperation::SettingsAndDocumentOperation {
|
IndexOperation::SettingsAndDocumentOperation {
|
||||||
@ -228,6 +233,7 @@ impl IndexOperation {
|
|||||||
pub fn index_uid(&self) -> &str {
|
pub fn index_uid(&self) -> &str {
|
||||||
match self {
|
match self {
|
||||||
IndexOperation::DocumentOperation { index_uid, .. }
|
IndexOperation::DocumentOperation { index_uid, .. }
|
||||||
|
| IndexOperation::DocumentEdition { index_uid, .. }
|
||||||
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
|
||||||
| IndexOperation::DocumentClear { index_uid, .. }
|
| IndexOperation::DocumentClear { index_uid, .. }
|
||||||
| IndexOperation::Settings { index_uid, .. }
|
| IndexOperation::Settings { index_uid, .. }
|
||||||
@ -243,6 +249,9 @@ impl fmt::Display for IndexOperation {
|
|||||||
IndexOperation::DocumentOperation { .. } => {
|
IndexOperation::DocumentOperation { .. } => {
|
||||||
f.write_str("IndexOperation::DocumentOperation")
|
f.write_str("IndexOperation::DocumentOperation")
|
||||||
}
|
}
|
||||||
|
IndexOperation::DocumentEdition { .. } => {
|
||||||
|
f.write_str("IndexOperation::DocumentEdition")
|
||||||
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
IndexOperation::IndexDocumentDeletionByFilter { .. } => {
|
||||||
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
|
||||||
}
|
}
|
||||||
@ -295,6 +304,21 @@ impl IndexScheduler {
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
BatchKind::DocumentEdition { id } => {
|
||||||
|
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
|
match &task.kind {
|
||||||
|
KindWithContent::DocumentEdition { index_uid, .. } => {
|
||||||
|
Ok(Some(Batch::IndexOperation {
|
||||||
|
op: IndexOperation::DocumentEdition {
|
||||||
|
index_uid: index_uid.clone(),
|
||||||
|
task,
|
||||||
|
},
|
||||||
|
must_create_index: false,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
BatchKind::DocumentOperation { method, operation_ids, .. } => {
|
BatchKind::DocumentOperation { method, operation_ids, .. } => {
|
||||||
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
|
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
|
||||||
let primary_key = tasks
|
let primary_key = tasks
|
||||||
@ -1386,6 +1410,64 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
|
IndexOperation::DocumentEdition { mut task, .. } => {
|
||||||
|
let (filter, context, function) =
|
||||||
|
if let KindWithContent::DocumentEdition {
|
||||||
|
filter_expr, context, function, ..
|
||||||
|
} = &task.kind
|
||||||
|
{
|
||||||
|
(filter_expr, context, function)
|
||||||
|
} else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
let result_count = edit_documents_by_function(
|
||||||
|
index_wtxn,
|
||||||
|
filter,
|
||||||
|
context.clone(),
|
||||||
|
function,
|
||||||
|
self.index_mapper.indexer_config(),
|
||||||
|
self.must_stop_processing.clone(),
|
||||||
|
index,
|
||||||
|
);
|
||||||
|
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
..
|
||||||
|
}) = task.details
|
||||||
|
{
|
||||||
|
(original_filter, context, function)
|
||||||
|
} else {
|
||||||
|
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
|
||||||
|
match result_count {
|
||||||
|
Ok((deleted_documents, edited_documents)) => {
|
||||||
|
task.status = Status::Succeeded;
|
||||||
|
task.details = Some(Details::DocumentEdition {
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
deleted_documents: Some(deleted_documents),
|
||||||
|
edited_documents: Some(edited_documents),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
task.status = Status::Failed;
|
||||||
|
task.details = Some(Details::DocumentEdition {
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
deleted_documents: Some(0),
|
||||||
|
edited_documents: Some(0),
|
||||||
|
});
|
||||||
|
task.error = Some(e.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(vec![task])
|
||||||
|
}
|
||||||
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
||||||
let filter =
|
let filter =
|
||||||
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
|
||||||
@ -1674,3 +1756,44 @@ fn delete_document_by_filter<'a>(
|
|||||||
0
|
0
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn edit_documents_by_function<'a>(
|
||||||
|
wtxn: &mut RwTxn<'a>,
|
||||||
|
filter: &Option<serde_json::Value>,
|
||||||
|
context: Option<Object>,
|
||||||
|
code: &str,
|
||||||
|
indexer_config: &IndexerConfig,
|
||||||
|
must_stop_processing: MustStopProcessing,
|
||||||
|
index: &'a Index,
|
||||||
|
) -> Result<(u64, u64)> {
|
||||||
|
let candidates = match filter.as_ref().map(Filter::from_json) {
|
||||||
|
Some(Ok(Some(filter))) => filter.evaluate(wtxn, index).map_err(|err| match err {
|
||||||
|
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||||
|
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
|
||||||
|
}
|
||||||
|
e => e.into(),
|
||||||
|
})?,
|
||||||
|
None | Some(Ok(None)) => index.documents_ids(wtxn)?,
|
||||||
|
Some(Err(e)) => return Err(e.into()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let config = IndexDocumentsConfig {
|
||||||
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut builder = milli::update::IndexDocuments::new(
|
||||||
|
wtxn,
|
||||||
|
index,
|
||||||
|
indexer_config,
|
||||||
|
config,
|
||||||
|
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||||
|
|| must_stop_processing.get(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let (new_builder, count) = builder.edit_documents(&candidates, context, code)?;
|
||||||
|
builder = new_builder;
|
||||||
|
|
||||||
|
let _ = builder.execute()?;
|
||||||
|
Ok(count.unwrap())
|
||||||
|
}
|
||||||
|
@ -68,6 +68,19 @@ impl RoFeatures {
|
|||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
|
||||||
|
if self.runtime.edit_documents_by_function {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(FeatureNotEnabledError {
|
||||||
|
disabled_action,
|
||||||
|
feature: "edit documents by function",
|
||||||
|
issue_link: "https://github.com/orgs/meilisearch/discussions/762",
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureData {
|
impl FeatureData {
|
||||||
|
@ -177,6 +177,17 @@ fn snapshot_details(d: &Details) -> String {
|
|||||||
} => {
|
} => {
|
||||||
format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}")
|
format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}")
|
||||||
}
|
}
|
||||||
|
Details::DocumentEdition {
|
||||||
|
deleted_documents,
|
||||||
|
edited_documents,
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
} => {
|
||||||
|
format!(
|
||||||
|
"{{ deleted_documents: {deleted_documents:?}, edited_documents: {edited_documents:?}, context: {context:?}, function: {function:?}, original_filter: {original_filter:?} }}"
|
||||||
|
)
|
||||||
|
}
|
||||||
Details::SettingsUpdate { settings } => {
|
Details::SettingsUpdate { settings } => {
|
||||||
format!("{{ settings: {settings:?} }}")
|
format!("{{ settings: {settings:?} }}")
|
||||||
}
|
}
|
||||||
|
@ -1603,6 +1603,14 @@ impl<'a> Dump<'a> {
|
|||||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
KindDump::DocumentEdition { filter, context, function } => {
|
||||||
|
KindWithContent::DocumentEdition {
|
||||||
|
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||||
|
filter_expr: filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
}
|
||||||
|
}
|
||||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||||
},
|
},
|
||||||
@ -4744,6 +4752,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
@ -4775,6 +4784,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
@ -4813,6 +4823,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
@ -4852,6 +4863,7 @@ mod tests {
|
|||||||
"types": {
|
"types": {
|
||||||
"documentAdditionOrUpdate": 0,
|
"documentAdditionOrUpdate": 0,
|
||||||
"documentDeletion": 0,
|
"documentDeletion": 0,
|
||||||
|
"documentEdition": 0,
|
||||||
"dumpCreation": 0,
|
"dumpCreation": 0,
|
||||||
"indexCreation": 3,
|
"indexCreation": 3,
|
||||||
"indexDeletion": 0,
|
"indexDeletion": 0,
|
||||||
|
@ -238,6 +238,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
|||||||
let mut index_uids = vec![];
|
let mut index_uids = vec![];
|
||||||
match &mut task.kind {
|
match &mut task.kind {
|
||||||
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||||
|
K::DocumentEdition { index_uid, .. } => index_uids.push(index_uid),
|
||||||
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
||||||
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
||||||
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
||||||
@ -408,7 +409,26 @@ impl IndexScheduler {
|
|||||||
match status {
|
match status {
|
||||||
Status::Succeeded => assert!(indexed_documents <= received_documents),
|
Status::Succeeded => assert!(indexed_documents <= received_documents),
|
||||||
Status::Failed | Status::Canceled => assert_eq!(indexed_documents, 0),
|
Status::Failed | Status::Canceled => assert_eq!(indexed_documents, 0),
|
||||||
status => panic!("DocumentAddition can't have an indexed_document set if it's {}", status),
|
status => panic!("DocumentAddition can't have an indexed_documents set if it's {}", status),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
assert!(matches!(status, Status::Enqueued | Status::Processing))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Details::DocumentEdition { edited_documents, .. } => {
|
||||||
|
assert_eq!(kind.as_kind(), Kind::DocumentEdition);
|
||||||
|
match edited_documents {
|
||||||
|
Some(edited_documents) => {
|
||||||
|
assert!(matches!(
|
||||||
|
status,
|
||||||
|
Status::Succeeded | Status::Failed | Status::Canceled
|
||||||
|
));
|
||||||
|
match status {
|
||||||
|
Status::Succeeded => (),
|
||||||
|
Status::Failed | Status::Canceled => assert_eq!(edited_documents, 0),
|
||||||
|
status => panic!("DocumentEdition can't have an edited_documents set if it's {}", status),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
@ -155,6 +155,10 @@ make_missing_field_convenience_builder!(
|
|||||||
MissingFacetSearchFacetName,
|
MissingFacetSearchFacetName,
|
||||||
missing_facet_search_facet_name
|
missing_facet_search_facet_name
|
||||||
);
|
);
|
||||||
|
make_missing_field_convenience_builder!(
|
||||||
|
MissingDocumentEditionFunction,
|
||||||
|
missing_document_edition_function
|
||||||
|
);
|
||||||
|
|
||||||
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
|
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
|
||||||
// the default error code (C) from `Self`
|
// the default error code (C) from `Self`
|
||||||
|
@ -224,6 +224,7 @@ InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -336,7 +337,10 @@ UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA
|
|||||||
|
|
||||||
// Experimental features
|
// Experimental features
|
||||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
||||||
NotFoundSimilarId , InvalidRequest , BAD_REQUEST
|
NotFoundSimilarId , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidDocumentEditionContext , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ErrorCode for JoinError {
|
impl ErrorCode for JoinError {
|
||||||
@ -407,6 +411,12 @@ impl ErrorCode for milli::Error {
|
|||||||
}
|
}
|
||||||
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
||||||
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
|
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
|
||||||
|
UserError::DocumentEditionCannotModifyPrimaryKey
|
||||||
|
| UserError::DocumentEditionDocumentMustBeObject
|
||||||
|
| UserError::DocumentEditionRuntimeError(_)
|
||||||
|
| UserError::DocumentEditionCompilationError(_) => {
|
||||||
|
Code::EditDocumentsByFunctionError
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub vector_store: bool,
|
pub vector_store: bool,
|
||||||
pub metrics: bool,
|
pub metrics: bool,
|
||||||
pub logs_route: bool,
|
pub logs_route: bool,
|
||||||
|
pub edit_documents_by_function: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use milli::Object;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
|
|
||||||
@ -54,6 +55,8 @@ pub struct DetailsView {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub indexed_documents: Option<Option<u64>>,
|
pub indexed_documents: Option<Option<u64>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub edited_documents: Option<Option<u64>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub primary_key: Option<Option<String>>,
|
pub primary_key: Option<Option<String>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub provided_ids: Option<usize>,
|
pub provided_ids: Option<usize>,
|
||||||
@ -70,6 +73,10 @@ pub struct DetailsView {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub dump_uid: Option<Option<String>>,
|
pub dump_uid: Option<Option<String>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub context: Option<Option<Object>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub function: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
@ -86,6 +93,20 @@ impl From<Details> for DetailsView {
|
|||||||
..DetailsView::default()
|
..DetailsView::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Details::DocumentEdition {
|
||||||
|
deleted_documents,
|
||||||
|
edited_documents,
|
||||||
|
original_filter,
|
||||||
|
context,
|
||||||
|
function,
|
||||||
|
} => DetailsView {
|
||||||
|
deleted_documents: Some(deleted_documents),
|
||||||
|
edited_documents: Some(edited_documents),
|
||||||
|
original_filter: Some(original_filter),
|
||||||
|
context: Some(context),
|
||||||
|
function: Some(function),
|
||||||
|
..DetailsView::default()
|
||||||
|
},
|
||||||
Details::SettingsUpdate { mut settings } => {
|
Details::SettingsUpdate { mut settings } => {
|
||||||
settings.hide_secrets();
|
settings.hide_secrets();
|
||||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||||
|
@ -5,6 +5,7 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
use milli::update::IndexDocumentsMethod;
|
use milli::update::IndexDocumentsMethod;
|
||||||
|
use milli::Object;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
@ -48,6 +49,7 @@ impl Task {
|
|||||||
| TaskDeletion { .. }
|
| TaskDeletion { .. }
|
||||||
| IndexSwap { .. } => None,
|
| IndexSwap { .. } => None,
|
||||||
DocumentAdditionOrUpdate { index_uid, .. }
|
DocumentAdditionOrUpdate { index_uid, .. }
|
||||||
|
| DocumentEdition { index_uid, .. }
|
||||||
| DocumentDeletion { index_uid, .. }
|
| DocumentDeletion { index_uid, .. }
|
||||||
| DocumentDeletionByFilter { index_uid, .. }
|
| DocumentDeletionByFilter { index_uid, .. }
|
||||||
| DocumentClear { index_uid }
|
| DocumentClear { index_uid }
|
||||||
@ -67,7 +69,8 @@ impl Task {
|
|||||||
pub fn content_uuid(&self) -> Option<Uuid> {
|
pub fn content_uuid(&self) -> Option<Uuid> {
|
||||||
match self.kind {
|
match self.kind {
|
||||||
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
|
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
|
||||||
KindWithContent::DocumentDeletion { .. }
|
KindWithContent::DocumentEdition { .. }
|
||||||
|
| KindWithContent::DocumentDeletion { .. }
|
||||||
| KindWithContent::DocumentDeletionByFilter { .. }
|
| KindWithContent::DocumentDeletionByFilter { .. }
|
||||||
| KindWithContent::DocumentClear { .. }
|
| KindWithContent::DocumentClear { .. }
|
||||||
| KindWithContent::SettingsUpdate { .. }
|
| KindWithContent::SettingsUpdate { .. }
|
||||||
@ -102,6 +105,12 @@ pub enum KindWithContent {
|
|||||||
index_uid: String,
|
index_uid: String,
|
||||||
filter_expr: serde_json::Value,
|
filter_expr: serde_json::Value,
|
||||||
},
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
index_uid: String,
|
||||||
|
filter_expr: Option<serde_json::Value>,
|
||||||
|
context: Option<milli::Object>,
|
||||||
|
function: String,
|
||||||
|
},
|
||||||
DocumentClear {
|
DocumentClear {
|
||||||
index_uid: String,
|
index_uid: String,
|
||||||
},
|
},
|
||||||
@ -150,6 +159,7 @@ impl KindWithContent {
|
|||||||
pub fn as_kind(&self) -> Kind {
|
pub fn as_kind(&self) -> Kind {
|
||||||
match self {
|
match self {
|
||||||
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
|
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
|
||||||
|
KindWithContent::DocumentEdition { .. } => Kind::DocumentEdition,
|
||||||
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
|
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
|
||||||
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
|
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
|
||||||
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
|
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
|
||||||
@ -174,6 +184,7 @@ impl KindWithContent {
|
|||||||
| TaskCancelation { .. }
|
| TaskCancelation { .. }
|
||||||
| TaskDeletion { .. } => vec![],
|
| TaskDeletion { .. } => vec![],
|
||||||
DocumentAdditionOrUpdate { index_uid, .. }
|
DocumentAdditionOrUpdate { index_uid, .. }
|
||||||
|
| DocumentEdition { index_uid, .. }
|
||||||
| DocumentDeletion { index_uid, .. }
|
| DocumentDeletion { index_uid, .. }
|
||||||
| DocumentDeletionByFilter { index_uid, .. }
|
| DocumentDeletionByFilter { index_uid, .. }
|
||||||
| DocumentClear { index_uid }
|
| DocumentClear { index_uid }
|
||||||
@ -202,6 +213,15 @@ impl KindWithContent {
|
|||||||
indexed_documents: None,
|
indexed_documents: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
|
||||||
|
Some(Details::DocumentEdition {
|
||||||
|
deleted_documents: None,
|
||||||
|
edited_documents: None,
|
||||||
|
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
|
||||||
|
context: context.clone(),
|
||||||
|
function: function.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
||||||
Some(Details::DocumentDeletion {
|
Some(Details::DocumentDeletion {
|
||||||
provided_ids: documents_ids.len(),
|
provided_ids: documents_ids.len(),
|
||||||
@ -250,6 +270,15 @@ impl KindWithContent {
|
|||||||
indexed_documents: Some(0),
|
indexed_documents: Some(0),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { index_uid: _, filter_expr, context, function } => {
|
||||||
|
Some(Details::DocumentEdition {
|
||||||
|
deleted_documents: Some(0),
|
||||||
|
edited_documents: Some(0),
|
||||||
|
original_filter: filter_expr.as_ref().map(|v| v.to_string()),
|
||||||
|
context: context.clone(),
|
||||||
|
function: function.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
|
||||||
Some(Details::DocumentDeletion {
|
Some(Details::DocumentDeletion {
|
||||||
provided_ids: documents_ids.len(),
|
provided_ids: documents_ids.len(),
|
||||||
@ -301,6 +330,7 @@ impl From<&KindWithContent> for Option<Details> {
|
|||||||
indexed_documents: None,
|
indexed_documents: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
KindWithContent::DocumentEdition { .. } => None,
|
||||||
KindWithContent::DocumentDeletion { .. } => None,
|
KindWithContent::DocumentDeletion { .. } => None,
|
||||||
KindWithContent::DocumentDeletionByFilter { .. } => None,
|
KindWithContent::DocumentDeletionByFilter { .. } => None,
|
||||||
KindWithContent::DocumentClear { .. } => None,
|
KindWithContent::DocumentClear { .. } => None,
|
||||||
@ -394,6 +424,7 @@ impl std::error::Error for ParseTaskStatusError {}
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub enum Kind {
|
pub enum Kind {
|
||||||
DocumentAdditionOrUpdate,
|
DocumentAdditionOrUpdate,
|
||||||
|
DocumentEdition,
|
||||||
DocumentDeletion,
|
DocumentDeletion,
|
||||||
SettingsUpdate,
|
SettingsUpdate,
|
||||||
IndexCreation,
|
IndexCreation,
|
||||||
@ -410,6 +441,7 @@ impl Kind {
|
|||||||
pub fn related_to_one_index(&self) -> bool {
|
pub fn related_to_one_index(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Kind::DocumentAdditionOrUpdate
|
Kind::DocumentAdditionOrUpdate
|
||||||
|
| Kind::DocumentEdition
|
||||||
| Kind::DocumentDeletion
|
| Kind::DocumentDeletion
|
||||||
| Kind::SettingsUpdate
|
| Kind::SettingsUpdate
|
||||||
| Kind::IndexCreation
|
| Kind::IndexCreation
|
||||||
@ -427,6 +459,7 @@ impl Display for Kind {
|
|||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
|
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
|
||||||
|
Kind::DocumentEdition => write!(f, "documentEdition"),
|
||||||
Kind::DocumentDeletion => write!(f, "documentDeletion"),
|
Kind::DocumentDeletion => write!(f, "documentDeletion"),
|
||||||
Kind::SettingsUpdate => write!(f, "settingsUpdate"),
|
Kind::SettingsUpdate => write!(f, "settingsUpdate"),
|
||||||
Kind::IndexCreation => write!(f, "indexCreation"),
|
Kind::IndexCreation => write!(f, "indexCreation"),
|
||||||
@ -454,6 +487,8 @@ impl FromStr for Kind {
|
|||||||
Ok(Kind::IndexDeletion)
|
Ok(Kind::IndexDeletion)
|
||||||
} else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") {
|
} else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") {
|
||||||
Ok(Kind::DocumentAdditionOrUpdate)
|
Ok(Kind::DocumentAdditionOrUpdate)
|
||||||
|
} else if kind.eq_ignore_ascii_case("documentEdition") {
|
||||||
|
Ok(Kind::DocumentEdition)
|
||||||
} else if kind.eq_ignore_ascii_case("documentDeletion") {
|
} else if kind.eq_ignore_ascii_case("documentDeletion") {
|
||||||
Ok(Kind::DocumentDeletion)
|
Ok(Kind::DocumentDeletion)
|
||||||
} else if kind.eq_ignore_ascii_case("settingsUpdate") {
|
} else if kind.eq_ignore_ascii_case("settingsUpdate") {
|
||||||
@ -495,16 +530,50 @@ impl std::error::Error for ParseTaskKindError {}
|
|||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||||
pub enum Details {
|
pub enum Details {
|
||||||
DocumentAdditionOrUpdate { received_documents: u64, indexed_documents: Option<u64> },
|
DocumentAdditionOrUpdate {
|
||||||
SettingsUpdate { settings: Box<Settings<Unchecked>> },
|
received_documents: u64,
|
||||||
IndexInfo { primary_key: Option<String> },
|
indexed_documents: Option<u64>,
|
||||||
DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
|
},
|
||||||
DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
|
SettingsUpdate {
|
||||||
ClearAll { deleted_documents: Option<u64> },
|
settings: Box<Settings<Unchecked>>,
|
||||||
TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
|
},
|
||||||
TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
|
IndexInfo {
|
||||||
Dump { dump_uid: Option<String> },
|
primary_key: Option<String>,
|
||||||
IndexSwap { swaps: Vec<IndexSwap> },
|
},
|
||||||
|
DocumentDeletion {
|
||||||
|
provided_ids: usize,
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
},
|
||||||
|
DocumentDeletionByFilter {
|
||||||
|
original_filter: String,
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
},
|
||||||
|
DocumentEdition {
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
edited_documents: Option<u64>,
|
||||||
|
original_filter: Option<String>,
|
||||||
|
context: Option<Object>,
|
||||||
|
function: String,
|
||||||
|
},
|
||||||
|
ClearAll {
|
||||||
|
deleted_documents: Option<u64>,
|
||||||
|
},
|
||||||
|
TaskCancelation {
|
||||||
|
matched_tasks: u64,
|
||||||
|
canceled_tasks: Option<u64>,
|
||||||
|
original_filter: String,
|
||||||
|
},
|
||||||
|
TaskDeletion {
|
||||||
|
matched_tasks: u64,
|
||||||
|
deleted_tasks: Option<u64>,
|
||||||
|
original_filter: String,
|
||||||
|
},
|
||||||
|
Dump {
|
||||||
|
dump_uid: Option<String>,
|
||||||
|
},
|
||||||
|
IndexSwap {
|
||||||
|
swaps: Vec<IndexSwap>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Details {
|
impl Details {
|
||||||
@ -514,6 +583,7 @@ impl Details {
|
|||||||
Self::DocumentAdditionOrUpdate { indexed_documents, .. } => {
|
Self::DocumentAdditionOrUpdate { indexed_documents, .. } => {
|
||||||
*indexed_documents = Some(0)
|
*indexed_documents = Some(0)
|
||||||
}
|
}
|
||||||
|
Self::DocumentEdition { edited_documents, .. } => *edited_documents = Some(0),
|
||||||
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
|
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
|
||||||
Self::DocumentDeletionByFilter { deleted_documents, .. } => {
|
Self::DocumentDeletionByFilter { deleted_documents, .. } => {
|
||||||
*deleted_documents = Some(0)
|
*deleted_documents = Some(0)
|
||||||
|
@ -6,7 +6,7 @@ use meilisearch_types::InstanceUid;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
pub struct MockAnalytics {
|
pub struct MockAnalytics {
|
||||||
@ -97,6 +97,13 @@ impl Analytics for MockAnalytics {
|
|||||||
_request: &HttpRequest,
|
_request: &HttpRequest,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
_documents_query: &DocumentEditionByFunction,
|
||||||
|
_index_creation: bool,
|
||||||
|
_request: &HttpRequest,
|
||||||
|
) {
|
||||||
|
}
|
||||||
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,7 @@ use once_cell::sync::Lazy;
|
|||||||
use platform_dirs::AppDirs;
|
use platform_dirs::AppDirs;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
|
|
||||||
// if the analytics feature is disabled
|
// if the analytics feature is disabled
|
||||||
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
||||||
@ -119,11 +119,19 @@ pub trait Analytics: Sync + Send {
|
|||||||
// this method should be called to aggregate a add documents request
|
// this method should be called to aggregate a add documents request
|
||||||
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
|
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
|
||||||
|
|
||||||
// this method should be called to batch a update documents request
|
// this method should be called to batch an update documents request
|
||||||
fn update_documents(
|
fn update_documents(
|
||||||
&self,
|
&self,
|
||||||
documents_query: &UpdateDocumentsQuery,
|
documents_query: &UpdateDocumentsQuery,
|
||||||
index_creation: bool,
|
index_creation: bool,
|
||||||
request: &HttpRequest,
|
request: &HttpRequest,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// this method should be called to batch an update documents by function request
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
documents_query: &DocumentEditionByFunction,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ use crate::analytics::Analytics;
|
|||||||
use crate::option::{
|
use crate::option::{
|
||||||
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
||||||
};
|
};
|
||||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||||
use crate::routes::{create_all_stats, Stats};
|
use crate::routes::{create_all_stats, Stats};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
@ -80,6 +80,7 @@ pub enum AnalyticsMsg {
|
|||||||
AggregateAddDocuments(DocumentsAggregator),
|
AggregateAddDocuments(DocumentsAggregator),
|
||||||
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
||||||
AggregateUpdateDocuments(DocumentsAggregator),
|
AggregateUpdateDocuments(DocumentsAggregator),
|
||||||
|
AggregateEditDocumentsByFunction(EditDocumentsByFunctionAggregator),
|
||||||
AggregateGetFetchDocuments(DocumentsFetchAggregator),
|
AggregateGetFetchDocuments(DocumentsFetchAggregator),
|
||||||
AggregatePostFetchDocuments(DocumentsFetchAggregator),
|
AggregatePostFetchDocuments(DocumentsFetchAggregator),
|
||||||
}
|
}
|
||||||
@ -149,6 +150,7 @@ impl SegmentAnalytics {
|
|||||||
add_documents_aggregator: DocumentsAggregator::default(),
|
add_documents_aggregator: DocumentsAggregator::default(),
|
||||||
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
||||||
update_documents_aggregator: DocumentsAggregator::default(),
|
update_documents_aggregator: DocumentsAggregator::default(),
|
||||||
|
edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator::default(),
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
get_similar_aggregator: SimilarAggregator::default(),
|
get_similar_aggregator: SimilarAggregator::default(),
|
||||||
@ -229,6 +231,17 @@ impl super::Analytics for SegmentAnalytics {
|
|||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_documents_by_function(
|
||||||
|
&self,
|
||||||
|
documents_query: &DocumentEditionByFunction,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
) {
|
||||||
|
let aggregate =
|
||||||
|
EditDocumentsByFunctionAggregator::from_query(documents_query, index_creation, request);
|
||||||
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateEditDocumentsByFunction(aggregate));
|
||||||
|
}
|
||||||
|
|
||||||
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
|
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
|
||||||
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
|
||||||
@ -389,6 +402,7 @@ pub struct Segment {
|
|||||||
add_documents_aggregator: DocumentsAggregator,
|
add_documents_aggregator: DocumentsAggregator,
|
||||||
delete_documents_aggregator: DocumentsDeletionAggregator,
|
delete_documents_aggregator: DocumentsDeletionAggregator,
|
||||||
update_documents_aggregator: DocumentsAggregator,
|
update_documents_aggregator: DocumentsAggregator,
|
||||||
|
edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator,
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
get_similar_aggregator: SimilarAggregator,
|
get_similar_aggregator: SimilarAggregator,
|
||||||
@ -453,6 +467,7 @@ impl Segment {
|
|||||||
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||||
|
Some(AnalyticsMsg::AggregateEditDocumentsByFunction(agreg)) => self.edit_documents_by_function_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
|
||||||
@ -508,6 +523,7 @@ impl Segment {
|
|||||||
add_documents_aggregator,
|
add_documents_aggregator,
|
||||||
delete_documents_aggregator,
|
delete_documents_aggregator,
|
||||||
update_documents_aggregator,
|
update_documents_aggregator,
|
||||||
|
edit_documents_by_function_aggregator,
|
||||||
get_fetch_documents_aggregator,
|
get_fetch_documents_aggregator,
|
||||||
post_fetch_documents_aggregator,
|
post_fetch_documents_aggregator,
|
||||||
get_similar_aggregator,
|
get_similar_aggregator,
|
||||||
@ -549,6 +565,11 @@ impl Segment {
|
|||||||
{
|
{
|
||||||
let _ = self.batcher.push(update_documents).await;
|
let _ = self.batcher.push(update_documents).await;
|
||||||
}
|
}
|
||||||
|
if let Some(edit_documents_by_function) = take(edit_documents_by_function_aggregator)
|
||||||
|
.into_event(user, "Documents Edited By Function")
|
||||||
|
{
|
||||||
|
let _ = self.batcher.push(edit_documents_by_function).await;
|
||||||
|
}
|
||||||
if let Some(get_fetch_documents) =
|
if let Some(get_fetch_documents) =
|
||||||
take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET")
|
take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET")
|
||||||
{
|
{
|
||||||
@ -1465,6 +1486,75 @@ impl DocumentsAggregator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct EditDocumentsByFunctionAggregator {
|
||||||
|
timestamp: Option<OffsetDateTime>,
|
||||||
|
|
||||||
|
// Set to true if at least one request was filtered
|
||||||
|
filtered: bool,
|
||||||
|
// Set to true if at least one request contained a context
|
||||||
|
with_context: bool,
|
||||||
|
|
||||||
|
// context
|
||||||
|
user_agents: HashSet<String>,
|
||||||
|
|
||||||
|
index_creation: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EditDocumentsByFunctionAggregator {
|
||||||
|
pub fn from_query(
|
||||||
|
documents_query: &DocumentEditionByFunction,
|
||||||
|
index_creation: bool,
|
||||||
|
request: &HttpRequest,
|
||||||
|
) -> Self {
|
||||||
|
let DocumentEditionByFunction { filter, context, function: _ } = documents_query;
|
||||||
|
|
||||||
|
Self {
|
||||||
|
timestamp: Some(OffsetDateTime::now_utc()),
|
||||||
|
user_agents: extract_user_agents(request).into_iter().collect(),
|
||||||
|
filtered: filter.is_some(),
|
||||||
|
with_context: context.is_some(),
|
||||||
|
index_creation,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregate one [DocumentsAggregator] into another.
|
||||||
|
pub fn aggregate(&mut self, other: Self) {
|
||||||
|
let Self { timestamp, user_agents, index_creation, filtered, with_context } = other;
|
||||||
|
|
||||||
|
if self.timestamp.is_none() {
|
||||||
|
self.timestamp = timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we can't create a union because there is no `into_union` method
|
||||||
|
for user_agent in user_agents {
|
||||||
|
self.user_agents.insert(user_agent);
|
||||||
|
}
|
||||||
|
self.index_creation |= index_creation;
|
||||||
|
self.filtered |= filtered;
|
||||||
|
self.with_context |= with_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||||
|
let Self { timestamp, user_agents, index_creation, filtered, with_context } = self;
|
||||||
|
|
||||||
|
let properties = json!({
|
||||||
|
"user-agent": user_agents,
|
||||||
|
"filtered": filtered,
|
||||||
|
"with_context": with_context,
|
||||||
|
"index_creation": index_creation,
|
||||||
|
});
|
||||||
|
|
||||||
|
Some(Track {
|
||||||
|
timestamp,
|
||||||
|
user: user.clone(),
|
||||||
|
event: event_name.to_string(),
|
||||||
|
properties,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default, Serialize)]
|
#[derive(Default, Serialize)]
|
||||||
pub struct DocumentsDeletionAggregator {
|
pub struct DocumentsDeletionAggregator {
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
|
@ -47,6 +47,8 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub metrics: Option<bool>,
|
pub metrics: Option<bool>,
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub logs_route: Option<bool>,
|
pub logs_route: Option<bool>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub edit_documents_by_function: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn patch_features(
|
async fn patch_features(
|
||||||
@ -66,13 +68,21 @@ async fn patch_features(
|
|||||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||||
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
||||||
|
edit_documents_by_function: new_features
|
||||||
|
.0
|
||||||
|
.edit_documents_by_function
|
||||||
|
.unwrap_or(old_features.edit_documents_by_function),
|
||||||
};
|
};
|
||||||
|
|
||||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||||
// the it renames to camelCase, which we don't want for analytics.
|
// the it renames to camelCase, which we don't want for analytics.
|
||||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||||
let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
|
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||||
new_features;
|
vector_store,
|
||||||
|
metrics,
|
||||||
|
logs_route,
|
||||||
|
edit_documents_by_function,
|
||||||
|
} = new_features;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
"Experimental features Updated".to_string(),
|
"Experimental features Updated".to_string(),
|
||||||
@ -80,6 +90,7 @@ async fn patch_features(
|
|||||||
"vector_store": vector_store,
|
"vector_store": vector_store,
|
||||||
"metrics": metrics,
|
"metrics": metrics,
|
||||||
"logs_route": logs_route,
|
"logs_route": logs_route,
|
||||||
|
"edit_documents_by_function": edit_documents_by_function,
|
||||||
}),
|
}),
|
||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
@ -82,6 +82,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
|
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
|
||||||
)
|
)
|
||||||
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
|
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
|
||||||
|
.service(web::resource("/edit").route(web::post().to(SeqHandler(edit_documents_by_function))))
|
||||||
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
|
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
|
||||||
.service(
|
.service(
|
||||||
web::resource("/{document_id}")
|
web::resource("/{document_id}")
|
||||||
@ -574,6 +575,82 @@ pub async fn delete_documents_by_filter(
|
|||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct DocumentEditionByFunction {
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||||
|
pub filter: Option<Value>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidDocumentEditionContext>)]
|
||||||
|
pub context: Option<Value>,
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidDocumentEditionFunctionFilter>, missing_field_error = DeserrJsonError::missing_document_edition_function)]
|
||||||
|
pub function: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn edit_documents_by_function(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
||||||
|
index_uid: web::Path<String>,
|
||||||
|
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||||
|
req: HttpRequest,
|
||||||
|
opt: web::Data<Opt>,
|
||||||
|
analytics: web::Data<dyn Analytics>,
|
||||||
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
debug!(parameters = ?params, "Edit documents by function");
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.features()
|
||||||
|
.check_edit_documents_by_function("Using the documents edit route")?;
|
||||||
|
|
||||||
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
let index_uid = index_uid.into_inner();
|
||||||
|
let params = params.into_inner();
|
||||||
|
|
||||||
|
analytics.update_documents_by_function(
|
||||||
|
¶ms,
|
||||||
|
index_scheduler.index(&index_uid).is_err(),
|
||||||
|
&req,
|
||||||
|
);
|
||||||
|
|
||||||
|
let DocumentEditionByFunction { filter, context, function } = params;
|
||||||
|
let engine = milli::rhai::Engine::new();
|
||||||
|
if let Err(e) = engine.compile(&function) {
|
||||||
|
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref filter) = filter {
|
||||||
|
// we ensure the filter is well formed before enqueuing it
|
||||||
|
|| -> Result<_, ResponseError> {
|
||||||
|
Ok(crate::search::parse_filter(filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
|
||||||
|
}()
|
||||||
|
// and whatever was the error, the error code should always be an InvalidDocumentFilter
|
||||||
|
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
||||||
|
}
|
||||||
|
let task = KindWithContent::DocumentEdition {
|
||||||
|
index_uid,
|
||||||
|
filter_expr: filter,
|
||||||
|
context: match context {
|
||||||
|
Some(Value::Object(m)) => Some(m),
|
||||||
|
None => None,
|
||||||
|
_ => {
|
||||||
|
return Err(ResponseError::from_msg(
|
||||||
|
"The context must be an object".to_string(),
|
||||||
|
Code::InvalidDocumentEditionContext,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
function,
|
||||||
|
};
|
||||||
|
|
||||||
|
let uid = get_task_id(&req, &opt)?;
|
||||||
|
let dry_run = is_dry_run(&req, &opt)?;
|
||||||
|
let task: SummarizedTaskView =
|
||||||
|
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||||
|
.await??
|
||||||
|
.into();
|
||||||
|
|
||||||
|
debug!(returns = ?task, "Edit documents by function");
|
||||||
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn clear_all_documents(
|
pub async fn clear_all_documents(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||||
index_uid: web::Path<String>,
|
index_uid: web::Path<String>,
|
||||||
|
@ -591,7 +591,7 @@ mod tests {
|
|||||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||||
snapshot!(meili_snap::json_string!(err), @r###"
|
snapshot!(meili_snap::json_string!(err), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||||
"code": "invalid_task_types",
|
"code": "invalid_task_types",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||||
|
@ -535,7 +535,8 @@ async fn get_document_with_vectors() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -1859,7 +1859,8 @@ async fn import_dump_v6_containing_experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -1952,7 +1953,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
let index = server.index("pets");
|
let index = server.index("pets");
|
||||||
@ -2022,7 +2024,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -20,7 +20,8 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -31,7 +32,8 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -42,7 +44,8 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -54,7 +57,8 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -66,7 +70,8 @@ async fn experimental_features() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -85,7 +90,8 @@ async fn experimental_feature_metrics() {
|
|||||||
{
|
{
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": true,
|
"metrics": true,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -140,7 +146,7 @@ async fn errors() {
|
|||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`",
|
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`",
|
||||||
"code": "bad_request",
|
"code": "bad_request",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||||
|
@ -18,7 +18,8 @@ async fn index_with_documents_user_provided<'a>(
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -46,7 +47,8 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -98,7 +98,8 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -55,7 +55,8 @@ async fn basic() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -244,7 +245,8 @@ async fn ranking_score_threshold() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -523,7 +525,8 @@ async fn filter() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -650,7 +653,8 @@ async fn limit_and_offset() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ async fn task_bad_types() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||||
"code": "invalid_task_types",
|
"code": "invalid_task_types",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||||
@ -108,7 +108,7 @@ async fn task_bad_types() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||||
"code": "invalid_task_types",
|
"code": "invalid_task_types",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||||
@ -119,7 +119,7 @@ async fn task_bad_types() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r###"
|
snapshot!(json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||||
"code": "invalid_task_types",
|
"code": "invalid_task_types",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||||
|
@ -16,7 +16,8 @@ async fn add_remove_user_provided() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -159,7 +160,8 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -516,7 +518,8 @@ async fn add_remove_one_vector_4588() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -14,7 +14,8 @@ async fn update_embedder() {
|
|||||||
{
|
{
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"logsRoute": false
|
"logsRoute": false,
|
||||||
|
"editDocumentsByFunction": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -78,11 +78,13 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
|||||||
] }
|
] }
|
||||||
tiktoken-rs = "0.5.9"
|
tiktoken-rs = "0.5.9"
|
||||||
liquid = "0.26.6"
|
liquid = "0.26.6"
|
||||||
|
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
||||||
arroy = "0.4.0"
|
arroy = "0.4.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
ureq = { version = "2.10.0", features = ["json"] }
|
ureq = { version = "2.10.0", features = ["json"] }
|
||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
|
rayon-par-bridge = "0.1.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
|
@ -5,6 +5,7 @@ use std::{io, str};
|
|||||||
|
|
||||||
use heed::{Error as HeedError, MdbError};
|
use heed::{Error as HeedError, MdbError};
|
||||||
use rayon::ThreadPoolBuildError;
|
use rayon::ThreadPoolBuildError;
|
||||||
|
use rhai::EvalAltResult;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
@ -259,6 +260,14 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
InvalidSettingsDimensions { embedder_name: String },
|
InvalidSettingsDimensions { embedder_name: String },
|
||||||
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
||||||
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
|
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
|
||||||
|
#[error("Document editions cannot modify a document's primary key")]
|
||||||
|
DocumentEditionCannotModifyPrimaryKey,
|
||||||
|
#[error("Document editions must keep documents as objects")]
|
||||||
|
DocumentEditionDocumentMustBeObject,
|
||||||
|
#[error("Document edition runtime error encountered while running the function: {0}")]
|
||||||
|
DocumentEditionRuntimeError(Box<EvalAltResult>),
|
||||||
|
#[error("Document edition runtime error encountered while compiling the function: {0}")]
|
||||||
|
DocumentEditionCompilationError(rhai::ParseError),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<crate::vector::Error> for Error {
|
impl From<crate::vector::Error> for Error {
|
||||||
|
@ -45,7 +45,7 @@ pub use search::new::{
|
|||||||
};
|
};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||||
pub use {charabia as tokenizer, heed};
|
pub use {charabia as tokenizer, heed, rhai};
|
||||||
|
|
||||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||||
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
mod enrich;
|
mod enrich;
|
||||||
mod extract;
|
mod extract;
|
||||||
mod helpers;
|
mod helpers;
|
||||||
|
mod parallel;
|
||||||
mod transform;
|
mod transform;
|
||||||
mod typed_chunk;
|
mod typed_chunk;
|
||||||
|
|
||||||
@ -16,6 +17,8 @@ use grenad::{Merger, MergerBuilder};
|
|||||||
use heed::types::Str;
|
use heed::types::Str;
|
||||||
use heed::Database;
|
use heed::Database;
|
||||||
use rand::SeedableRng;
|
use rand::SeedableRng;
|
||||||
|
use rayon::iter::{ParallelBridge, ParallelIterator};
|
||||||
|
use rhai::{Dynamic, Engine, OptimizationLevel, Scope};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
@ -32,15 +35,16 @@ pub use self::helpers::{
|
|||||||
};
|
};
|
||||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
use crate::documents::{obkv_to_object, DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use crate::error::{Error, InternalError, UserError};
|
use crate::error::{Error, InternalError, UserError};
|
||||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
|
use crate::update::index_documents::parallel::ImmutableObkvs;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||||
};
|
};
|
||||||
use crate::vector::EmbeddingConfigs;
|
use crate::vector::EmbeddingConfigs;
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
|
||||||
|
|
||||||
static MERGED_DATABASE_COUNT: usize = 7;
|
static MERGED_DATABASE_COUNT: usize = 7;
|
||||||
static PREFIX_DATABASE_COUNT: usize = 4;
|
static PREFIX_DATABASE_COUNT: usize = 4;
|
||||||
@ -172,6 +176,141 @@ where
|
|||||||
Ok((self, Ok(indexed_documents)))
|
Ok((self, Ok(indexed_documents)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
|
||||||
|
pub fn edit_documents(
|
||||||
|
self,
|
||||||
|
documents: &RoaringBitmap,
|
||||||
|
context: Option<Object>,
|
||||||
|
code: &str,
|
||||||
|
) -> Result<(Self, StdResult<(u64, u64), UserError>)> {
|
||||||
|
// Early return when there is no document to edit
|
||||||
|
if documents.is_empty() {
|
||||||
|
return Ok((self, Ok((0, 0))));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rhaimap_to_object(map: rhai::Map) -> Object {
|
||||||
|
let mut output = Object::new();
|
||||||
|
for (key, value) in map {
|
||||||
|
let value = serde_json::to_value(&value).unwrap();
|
||||||
|
output.insert(key.into(), value);
|
||||||
|
}
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup the security and limits of the Engine
|
||||||
|
let mut engine = Engine::new();
|
||||||
|
engine.set_optimization_level(OptimizationLevel::Full);
|
||||||
|
engine.set_max_call_levels(1000);
|
||||||
|
// It is an arbitrary value. We need to let users define this in the settings.
|
||||||
|
engine.set_max_operations(1_000_000);
|
||||||
|
engine.set_max_variables(1000);
|
||||||
|
engine.set_max_functions(30);
|
||||||
|
engine.set_max_expr_depths(100, 1000);
|
||||||
|
engine.set_max_string_size(1024 * 1024 * 1024); // 1 GiB
|
||||||
|
engine.set_max_array_size(10_000);
|
||||||
|
engine.set_max_map_size(10_000);
|
||||||
|
|
||||||
|
let ast = engine.compile(code).map_err(UserError::DocumentEditionCompilationError)?;
|
||||||
|
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
|
let primary_key = self.index.primary_key(self.wtxn)?.unwrap();
|
||||||
|
let mut documents_batch_builder = tempfile::tempfile().map(DocumentsBatchBuilder::new)?;
|
||||||
|
let mut documents_to_remove = RoaringBitmap::new();
|
||||||
|
|
||||||
|
let context: Option<Dynamic> = match context {
|
||||||
|
Some(context) => {
|
||||||
|
Some(serde_json::from_value(context.into()).map_err(InternalError::SerdeJson)?)
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum DocumentEdition {
|
||||||
|
Deleted(crate::DocumentId),
|
||||||
|
Edited(Object),
|
||||||
|
Nothing,
|
||||||
|
}
|
||||||
|
|
||||||
|
let immutable_obkvs = ImmutableObkvs::new(
|
||||||
|
self.wtxn,
|
||||||
|
self.index.documents,
|
||||||
|
fields_ids_map.clone(),
|
||||||
|
documents.clone(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let processing = documents.into_iter().par_bridge().map(|docid| {
|
||||||
|
// safety: Both documents *must* exists in the database as
|
||||||
|
// their IDs comes from the list of documents ids.
|
||||||
|
let rhai_document = immutable_obkvs.rhai_map(docid)?.unwrap();
|
||||||
|
let json_document = immutable_obkvs.json_map(docid)?.unwrap();
|
||||||
|
let document_id = &json_document[primary_key];
|
||||||
|
|
||||||
|
let mut scope = Scope::new();
|
||||||
|
if let Some(context) = context.as_ref().cloned() {
|
||||||
|
scope.push_constant_dynamic("context", context.clone());
|
||||||
|
}
|
||||||
|
scope.push("doc", rhai_document);
|
||||||
|
// That's were the magic happens. We run the user script
|
||||||
|
// which edits "doc" scope variable reprensenting the document
|
||||||
|
// and ignore the output and even the type of it, i.e., Dynamic.
|
||||||
|
let _ = engine
|
||||||
|
.eval_ast_with_scope::<Dynamic>(&mut scope, &ast)
|
||||||
|
.map_err(UserError::DocumentEditionRuntimeError)?;
|
||||||
|
|
||||||
|
match scope.remove::<Dynamic>("doc") {
|
||||||
|
// If the "doc" variable has set to (), we effectively delete the document.
|
||||||
|
Some(doc) if doc.is_unit() => Ok(DocumentEdition::Deleted(docid)),
|
||||||
|
None => unreachable!("missing doc variable from the Rhai scope"),
|
||||||
|
Some(document) => match document.try_cast() {
|
||||||
|
Some(document) => {
|
||||||
|
let new_document = rhaimap_to_object(document);
|
||||||
|
// Note: This condition is not perfect. Sometimes it detect changes
|
||||||
|
// like with floating points numbers and consider updating
|
||||||
|
// the document even if nothing actually changed.
|
||||||
|
if json_document != new_document {
|
||||||
|
if Some(document_id) != new_document.get(primary_key) {
|
||||||
|
Err(Error::UserError(
|
||||||
|
UserError::DocumentEditionCannotModifyPrimaryKey,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(DocumentEdition::Edited(new_document))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(DocumentEdition::Nothing)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => Err(Error::UserError(UserError::DocumentEditionDocumentMustBeObject)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
rayon_par_bridge::par_bridge(100, processing, |iterator| {
|
||||||
|
for result in iterator {
|
||||||
|
if (self.should_abort)() {
|
||||||
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
|
}
|
||||||
|
|
||||||
|
match result? {
|
||||||
|
DocumentEdition::Deleted(docid) => {
|
||||||
|
documents_to_remove.push(docid);
|
||||||
|
}
|
||||||
|
DocumentEdition::Edited(new_document) => {
|
||||||
|
documents_batch_builder.append_json_object(&new_document)?;
|
||||||
|
}
|
||||||
|
DocumentEdition::Nothing => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let file = documents_batch_builder.into_inner()?;
|
||||||
|
let reader = DocumentsBatchReader::from_reader(file)?;
|
||||||
|
|
||||||
|
let (this, removed) = self.remove_documents_from_db_no_batch(&documents_to_remove)?;
|
||||||
|
let (this, result) = this.add_documents(reader)?;
|
||||||
|
|
||||||
|
Ok((this, result.map(|added| (removed, added))))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn with_embedders(mut self, embedders: EmbeddingConfigs) -> Self {
|
pub fn with_embedders(mut self, embedders: EmbeddingConfigs) -> Self {
|
||||||
self.embedders = embedders;
|
self.embedders = embedders;
|
||||||
self
|
self
|
||||||
|
86
milli/src/update/index_documents/parallel.rs
Normal file
86
milli/src/update/index_documents/parallel.rs
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
use heed::types::Bytes;
|
||||||
|
use heed::{Database, RoTxn};
|
||||||
|
use obkv::KvReaderU16;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::{all_obkv_to_json, DocumentId, FieldsIdsMap, Object, ObkvCodec, Result, BEU32};
|
||||||
|
|
||||||
|
pub struct ImmutableObkvs<'t> {
|
||||||
|
ids: RoaringBitmap,
|
||||||
|
fields_ids_map: FieldsIdsMap,
|
||||||
|
slices: Vec<&'t [u8]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> ImmutableObkvs<'t> {
|
||||||
|
/// Creates the structure by fetching all the OBKVs
|
||||||
|
/// and keeping the transaction making the pointers valid.
|
||||||
|
pub fn new(
|
||||||
|
rtxn: &'t RoTxn,
|
||||||
|
documents_database: Database<BEU32, ObkvCodec>,
|
||||||
|
fields_ids_map: FieldsIdsMap,
|
||||||
|
subset: RoaringBitmap,
|
||||||
|
) -> heed::Result<Self> {
|
||||||
|
let mut slices = Vec::new();
|
||||||
|
let documents_database = documents_database.remap_data_type::<Bytes>();
|
||||||
|
for docid in &subset {
|
||||||
|
let slice = documents_database.get(rtxn, &docid)?.unwrap();
|
||||||
|
slices.push(slice);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ImmutableObkvs { ids: subset, fields_ids_map, slices })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the OBKVs identified by the given ID.
|
||||||
|
pub fn obkv(&self, docid: DocumentId) -> heed::Result<Option<KvReaderU16<'t>>> {
|
||||||
|
match self
|
||||||
|
.ids
|
||||||
|
.rank(docid)
|
||||||
|
.checked_sub(1)
|
||||||
|
.and_then(|offset| self.slices.get(offset as usize))
|
||||||
|
{
|
||||||
|
Some(bytes) => Ok(Some(KvReaderU16::new(bytes))),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the owned rhai::Map identified by the given ID.
|
||||||
|
pub fn rhai_map(&self, docid: DocumentId) -> Result<Option<rhai::Map>> {
|
||||||
|
let obkv = match self.obkv(docid) {
|
||||||
|
Ok(Some(obkv)) => obkv,
|
||||||
|
Ok(None) => return Ok(None),
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
|
||||||
|
let map: Result<rhai::Map> = all_keys
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
|
||||||
|
.map(|(id, value)| {
|
||||||
|
let name = self.fields_ids_map.name(id).ok_or(
|
||||||
|
crate::error::FieldIdMapMissingEntry::FieldId {
|
||||||
|
field_id: id,
|
||||||
|
process: "all_obkv_to_rhaimap",
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
let value = serde_json::from_slice(value)
|
||||||
|
.map_err(crate::error::InternalError::SerdeJson)?;
|
||||||
|
Ok((name.into(), value))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
map.map(Some)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn json_map(&self, docid: DocumentId) -> Result<Option<Object>> {
|
||||||
|
let obkv = match self.obkv(docid) {
|
||||||
|
Ok(Some(obkv)) => obkv,
|
||||||
|
Ok(None) => return Ok(None),
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
};
|
||||||
|
|
||||||
|
all_obkv_to_json(obkv, &self.fields_ids_map).map(Some)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Sync for ImmutableObkvs<'_> {}
|
Loading…
Reference in New Issue
Block a user