diff --git a/dump/src/lib.rs b/dump/src/lib.rs index b1b6807a0..ed76d708e 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -101,6 +101,9 @@ pub enum KindDump { documents_ids: Vec, }, DocumentClear, + DocumentDeletionByFilter { + filter: serde_json::Value, + }, Settings { settings: Box>, is_deletion: bool, @@ -166,6 +169,9 @@ impl From for KindDump { KindWithContent::DocumentDeletion { documents_ids, .. } => { KindDump::DocumentDeletion { documents_ids } } + KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => { + KindDump::DocumentDeletionByFilter { filter: filter_expr } + } KindWithContent::DocumentClear { .. } => KindDump::DocumentClear, KindWithContent::SettingsUpdate { new_settings, diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs index 24625a7fb..d738cc5e4 100644 --- a/index-scheduler/src/autobatcher.rs +++ b/index-scheduler/src/autobatcher.rs @@ -25,6 +25,7 @@ enum AutobatchKind { primary_key: Option, }, DocumentDeletion, + DocumentDeletionByFilter, DocumentClear, Settings { allow_index_creation: bool, @@ -64,6 +65,9 @@ impl From for AutobatchKind { } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion, KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, + KindWithContent::DocumentDeletionByFilter { .. } => { + AutobatchKind::DocumentDeletionByFilter + } KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { AutobatchKind::Settings { allow_index_creation: allow_index_creation && !is_deletion, @@ -97,6 +101,9 @@ pub enum BatchKind { DocumentDeletion { deletion_ids: Vec, }, + DocumentDeletionByFilter { + id: TaskId, + }, ClearAndSettings { other: Vec, allow_index_creation: bool, @@ -195,6 +202,9 @@ impl BatchKind { K::DocumentDeletion => { (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false) } + K::DocumentDeletionByFilter => { + (Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false) + } K::Settings { allow_index_creation } => ( Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), allow_index_creation, @@ -212,7 +222,7 @@ impl BatchKind { match (self, kind) { // We don't batch any of these operations - (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this), + (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this), // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { Break(this) @@ -471,7 +481,8 @@ impl BatchKind { BatchKind::IndexCreation { .. } | BatchKind::IndexDeletion { .. } | BatchKind::IndexUpdate { .. } - | BatchKind::IndexSwap { .. }, + | BatchKind::IndexSwap { .. } + | BatchKind::DocumentDeletionByFilter { .. }, _, ) => { unreachable!() diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 9c24e8e9d..c88234809 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -28,9 +28,10 @@ use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::update::{ - DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings, + DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, + Settings as MilliSettings, }; -use meilisearch_types::milli::{self, BEU32}; +use meilisearch_types::milli::{self, Filter, BEU32}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; @@ -65,6 +66,10 @@ pub(crate) enum Batch { op: IndexOperation, must_create_index: bool, }, + IndexDocumentDeletionByFilter { + index_uid: String, + task: Task, + }, IndexCreation { index_uid: String, primary_key: Option, @@ -149,6 +154,7 @@ impl Batch { | Batch::TaskDeletion(task) | Batch::Dump(task) | Batch::IndexCreation { task, .. } + | Batch::IndexDocumentDeletionByFilter { task, .. } | Batch::IndexUpdate { task, .. } => vec![task.uid], Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => { tasks.iter().map(|task| task.uid).collect() @@ -187,7 +193,8 @@ impl Batch { IndexOperation { op, .. } => Some(op.index_uid()), IndexCreation { index_uid, .. } | IndexUpdate { index_uid, .. } - | IndexDeletion { index_uid, .. } => Some(index_uid), + | IndexDeletion { index_uid, .. } + | IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid), } } } @@ -227,6 +234,18 @@ impl IndexScheduler { }, must_create_index, })), + BatchKind::DocumentDeletionByFilter { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + match &task.kind { + KindWithContent::DocumentDeletionByFilter { index_uid, .. } => { + Ok(Some(Batch::IndexDocumentDeletionByFilter { + index_uid: index_uid.clone(), + task, + })) + } + _ => unreachable!(), + } + } BatchKind::DocumentOperation { method, operation_ids, .. } => { let tasks = self.get_existing_tasks(rtxn, operation_ids)?; let primary_key = tasks @@ -867,6 +886,51 @@ impl IndexScheduler { Ok(tasks) } + Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => { + let (index_uid, filter) = + if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } = + &task.kind + { + (index_uid, filter_expr) + } else { + unreachable!() + }; + let index = { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, index_uid)? + }; + let deleted_documents = delete_document_by_filter(filter, index); + let original_filter = if let Some(Details::DocumentDeletionByFilter { + original_filter, + deleted_documents: _, + }) = task.details + { + original_filter + } else { + // In the case of a `documentDeleteByFilter` the details MUST be set + unreachable!(); + }; + + match deleted_documents { + Ok(deleted_documents) => { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentDeletionByFilter { + original_filter, + deleted_documents: Some(deleted_documents), + }); + } + Err(e) => { + task.status = Status::Failed; + task.details = Some(Details::DocumentDeletionByFilter { + original_filter, + deleted_documents: Some(0), + }); + task.error = Some(e.into()); + } + } + + Ok(vec![task]) + } Batch::IndexCreation { index_uid, primary_key, task } => { let wtxn = self.env.write_txn()?; if self.index_mapper.exists(&wtxn, &index_uid)? { @@ -1421,3 +1485,20 @@ impl IndexScheduler { Ok(content_files_to_delete) } } + +fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result { + let filter = Filter::from_json(filter)?; + Ok(if let Some(filter) = filter { + let mut wtxn = index.write_txn()?; + + let candidates = filter.evaluate(&wtxn, &index)?; + let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?; + delete_operation.delete_documents(&candidates); + let deleted_documents = + delete_operation.execute().map(|result| result.deleted_documents)?; + wtxn.commit()?; + deleted_documents + } else { + 0 + }) +} diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 4adea97e3..8369047b0 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -184,6 +184,9 @@ fn snapshot_details(d: &Details) -> String { provided_ids: received_document_ids, deleted_documents, } => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"), + Details::DocumentDeletionByFilter { original_filter, deleted_documents } => format!( + "{{ original_filter: {original_filter}, deleted_documents: {deleted_documents:?} }}" + ), Details::ClearAll { deleted_documents } => { format!("{{ deleted_documents: {deleted_documents:?} }}") }, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 3dc3eb14c..af20ba1ae 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1264,6 +1264,12 @@ impl<'a> Dump<'a> { documents_ids, index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, }, + KindDump::DocumentDeletionByFilter { filter } => { + KindWithContent::DocumentDeletionByFilter { + filter_expr: filter, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + } + } KindDump::DocumentClear => KindWithContent::DocumentClear { index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, }, diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index 7718e1af0..97f437bed 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -239,6 +239,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) { match &mut task.kind { K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid), K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid), + K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid), K::DocumentClear { index_uid } => index_uids.push(index_uid), K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid), K::IndexDeletion { index_uid } => index_uids.push(index_uid), @@ -464,6 +465,29 @@ impl IndexScheduler { } } } + Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => { + assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter); + let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter { + ref index_uid, + ref filter_expr, + } = kind + { + (index_uid, filter_expr) + } else { + unreachable!() + }; + assert_eq!(&task_index_uid.unwrap(), index_uid); + + match status { + Status::Enqueued | Status::Processing => (), + Status::Succeeded => { + assert!(deleted_documents.is_some()); + } + Status::Failed | Status::Canceled => { + assert!(deleted_documents == Some(0)); + } + } + } Details::ClearAll { deleted_documents } => { assert!(matches!( kind.as_kind(), diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index a201db7ac..e4ca39c8b 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -218,6 +218,7 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; +InvalidDocumentDeleteFilter , InvalidRequest , BAD_REQUEST ; InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; @@ -315,6 +316,7 @@ impl ErrorCode for milli::Error { UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached, UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::InvalidSearchFilter, + UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { Code::InvalidDocumentId diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index 3b7efb97b..e746a53b8 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -49,6 +49,7 @@ impl Task { | IndexSwap { .. } => None, DocumentAdditionOrUpdate { index_uid, .. } | DocumentDeletion { index_uid, .. } + | DocumentDeletionByFilter { index_uid, .. } | DocumentClear { index_uid } | SettingsUpdate { index_uid, .. } | IndexCreation { index_uid, .. } @@ -67,6 +68,7 @@ impl Task { match self.kind { KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file), KindWithContent::DocumentDeletion { .. } + | KindWithContent::DocumentDeletionByFilter { .. } | KindWithContent::DocumentClear { .. } | KindWithContent::SettingsUpdate { .. } | KindWithContent::IndexDeletion { .. } @@ -96,6 +98,10 @@ pub enum KindWithContent { index_uid: String, documents_ids: Vec, }, + DocumentDeletionByFilter { + index_uid: String, + filter_expr: serde_json::Value, + }, DocumentClear { index_uid: String, }, @@ -145,6 +151,7 @@ impl KindWithContent { match self { KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate, KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion, + KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion, KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion, KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate, KindWithContent::IndexCreation { .. } => Kind::IndexCreation, @@ -168,6 +175,7 @@ impl KindWithContent { | TaskDeletion { .. } => vec![], DocumentAdditionOrUpdate { index_uid, .. } | DocumentDeletion { index_uid, .. } + | DocumentDeletionByFilter { index_uid, .. } | DocumentClear { index_uid } | SettingsUpdate { index_uid, .. } | IndexCreation { index_uid, .. } @@ -200,6 +208,12 @@ impl KindWithContent { deleted_documents: None, }) } + KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { + Some(Details::DocumentDeletionByFilter { + original_filter: filter_expr.to_string(), + deleted_documents: None, + }) + } KindWithContent::DocumentClear { .. } | KindWithContent::IndexDeletion { .. } => { Some(Details::ClearAll { deleted_documents: None }) } @@ -242,6 +256,12 @@ impl KindWithContent { deleted_documents: Some(0), }) } + KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { + Some(Details::DocumentDeletionByFilter { + original_filter: filter_expr.to_string(), + deleted_documents: Some(0), + }) + } KindWithContent::DocumentClear { .. } => { Some(Details::ClearAll { deleted_documents: None }) } @@ -282,6 +302,7 @@ impl From<&KindWithContent> for Option
{ }) } KindWithContent::DocumentDeletion { .. } => None, + KindWithContent::DocumentDeletionByFilter { .. } => None, KindWithContent::DocumentClear { .. } => None, KindWithContent::SettingsUpdate { new_settings, .. } => { Some(Details::SettingsUpdate { settings: new_settings.clone() }) @@ -374,6 +395,7 @@ impl std::error::Error for ParseTaskStatusError {} pub enum Kind { DocumentAdditionOrUpdate, DocumentDeletion, + DocumentDeletionByFilter, SettingsUpdate, IndexCreation, IndexDeletion, @@ -390,6 +412,7 @@ impl Kind { match self { Kind::DocumentAdditionOrUpdate | Kind::DocumentDeletion + | Kind::DocumentDeletionByFilter | Kind::SettingsUpdate | Kind::IndexCreation | Kind::IndexDeletion @@ -407,6 +430,7 @@ impl Display for Kind { match self { Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"), Kind::DocumentDeletion => write!(f, "documentDeletion"), + Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"), Kind::SettingsUpdate => write!(f, "settingsUpdate"), Kind::IndexCreation => write!(f, "indexCreation"), Kind::IndexDeletion => write!(f, "indexDeletion"), @@ -478,6 +502,7 @@ pub enum Details { SettingsUpdate { settings: Box> }, IndexInfo { primary_key: Option }, DocumentDeletion { provided_ids: usize, deleted_documents: Option }, + DocumentDeletionByFilter { original_filter: String, deleted_documents: Option }, ClearAll { deleted_documents: Option }, TaskCancelation { matched_tasks: u64, canceled_tasks: Option, original_filter: String }, TaskDeletion { matched_tasks: u64, deleted_tasks: Option, original_filter: String }, @@ -493,6 +518,9 @@ impl Details { *indexed_documents = Some(0) } Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0), + Self::DocumentDeletionByFilter { deleted_documents, .. } => { + *deleted_documents = Some(0) + } Self::ClearAll { deleted_documents } => *deleted_documents = Some(0), Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0), Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0), diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 4d295db57..6223b9db7 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -64,6 +64,7 @@ pub enum DocumentDeletionKind { PerDocumentId, ClearAll, PerBatch, + PerFilter, } pub trait Analytics: Sync + Send { diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 4f02ed60c..3e40c09e8 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -949,6 +949,7 @@ pub struct DocumentsDeletionAggregator { per_document_id: bool, clear_all: bool, per_batch: bool, + per_filter: bool, } impl DocumentsDeletionAggregator { @@ -962,6 +963,7 @@ impl DocumentsDeletionAggregator { DocumentDeletionKind::PerDocumentId => ret.per_document_id = true, DocumentDeletionKind::ClearAll => ret.clear_all = true, DocumentDeletionKind::PerBatch => ret.per_batch = true, + DocumentDeletionKind::PerFilter => ret.per_filter = true, } ret @@ -981,6 +983,7 @@ impl DocumentsDeletionAggregator { self.per_document_id |= other.per_document_id; self.clear_all |= other.clear_all; self.per_batch |= other.per_batch; + self.per_filter |= other.per_filter; } pub fn into_event(self, user: &User, event_name: &str) -> Option { diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 850803a07..9d0a03c34 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -20,6 +20,8 @@ pub enum MeilisearchHttpError { InvalidContentType(String, Vec), #[error("Document `{0}` not found.")] DocumentNotFound(String), + #[error("Sending an empty filter is forbidden.")] + EmptyFilter, #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] InvalidExpression(&'static [&'static str], Value), #[error("A {0} payload is missing.")] @@ -58,6 +60,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound, + MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentDeleteFilter, MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter, MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge, MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes, diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index f222f5b69..a6ee8d16e 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -4,13 +4,13 @@ use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; use bstr::ByteSlice; -use deserr::actix_web::AwebQueryParameter; +use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; use index_scheduler::IndexScheduler; use log::debug; use meilisearch_types::deserr::query_params::Param; -use meilisearch_types::deserr::DeserrQueryParamError; +use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::{Code, ResponseError}; @@ -71,8 +71,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .route(web::put().to(SeqHandler(update_documents))) .route(web::delete().to(SeqHandler(clear_all_documents))), ) - // this route needs to be before the /documents/{document_id} to match properly - .service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents)))) + // these routes need to be before the /documents/{document_id} to match properly + .service( + web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))), + ) + .service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter)))) .service( web::resource("/{document_id}") .route(web::get().to(SeqHandler(get_document))) @@ -373,7 +376,7 @@ async fn document_addition( Ok(task.into()) } -pub async fn delete_documents( +pub async fn delete_documents_batch( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: web::Json>, @@ -399,6 +402,42 @@ pub async fn delete_documents( Ok(HttpResponse::Accepted().json(task)) } +#[derive(Debug, Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct DocumentDeletionByFilter { + #[deserr(error = DeserrJsonError)] + filter: Value, +} + +pub async fn delete_documents_by_filter( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, + body: AwebJson, + req: HttpRequest, + analytics: web::Data, +) -> Result { + debug!("called with params: {:?}", body); + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + let index_uid = index_uid.into_inner(); + let filter = body.into_inner().filter; + + analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); + + // we ensure the filter is well formed before enqueuing it + || -> Result<_, ResponseError> { + Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?) + }() + // and whatever was the error, the error code should always be an InvalidDocumentDeleteFilter + .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentDeleteFilter))?; + let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; + + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + + debug!("returns: {:?}", task); + Ok(HttpResponse::Accepted().json(task)) +} + pub async fn clear_all_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 1356ff722..cab0f7197 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -133,6 +133,14 @@ impl From
for DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }, + Details::DocumentDeletionByFilter { original_filter, deleted_documents } => { + DetailsView { + provided_ids: Some(0), + original_filter: Some(original_filter), + deleted_documents: Some(deleted_documents), + ..DetailsView::default() + } + } Details::ClearAll { deleted_documents } => { DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() } } @@ -721,7 +729,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 7e4a7da6a..81c5972b8 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -745,7 +745,7 @@ fn format_value>( } } -fn parse_filter(facets: &Value) -> Result, MeilisearchHttpError> { +pub fn parse_filter(facets: &Value) -> Result, MeilisearchHttpError> { match facets { Value::String(expr) => { let condition = Filter::from_str(expr)?; diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 96ac5ce2e..38b43e910 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -225,6 +225,11 @@ impl Index<'_> { self.service.delete(url).await } + pub async fn delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) { + let url = format!("/indexes/{}/documents/delete", urlencode(self.uid.as_ref())); + self.service.post_encoded(url, body, self.encoder).await + } + pub async fn clear_all_documents(&self) -> (Value, StatusCode) { let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref())); self.service.delete(url).await diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index e36e2f033..8f6ae1985 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -1,3 +1,4 @@ +use meili_snap::{json_string, snapshot}; use serde_json::json; use crate::common::{GetAllDocumentsOptions, Server}; @@ -135,3 +136,254 @@ async fn delete_no_document_batch() { assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 3); } + +#[actix_rt::test] +async fn delete_document_by_filter() { + let server = Server::new().await; + let index = server.index("doggo"); + index.update_settings_filterable_attributes(json!(["color"])).await; + index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3 }, + ]), + Some("id"), + ) + .await; + index.wait_task(1).await; + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "color = blue"})).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 2, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(2).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 2, + "originalFilter": "\"color = blue\"" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + }, + { + "id": 3 + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); + + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "color NOT EXISTS"})).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "taskUid": 3, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(3).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 3, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 1, + "originalFilter": "\"color NOT EXISTS\"" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } + "###); +} + +#[actix_rt::test] +async fn delete_document_by_complex_filter() { + let server = Server::new().await; + let index = server.index("doggo"); + index.update_settings_filterable_attributes(json!(["color"])).await; + index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3, "color": "green" }, + { "id": 4 }, + ]), + Some("id"), + ) + .await; + index.wait_task(1).await; + let (response, code) = index + .delete_document_by_filter( + json!({ "filter": ["color != red", "color != green", "color EXISTS"] }), + ) + .await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 2, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(2).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 2, + "originalFilter": "[\"color != red\",\"color != green\",\"color EXISTS\"]" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + }, + { + "id": 3, + "color": "green" + }, + { + "id": 4 + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); + + let (response, code) = index + .delete_document_by_filter(json!({ "filter": [["color = green", "color NOT EXISTS"]] })) + .await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "taskUid": 3, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(3).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 3, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 4, + "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } + "###); +} diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index bf55188ba..b857d4e1d 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -418,3 +418,155 @@ async fn update_documents_csv_delimiter_with_bad_content_type() { } "###); } + +#[actix_rt::test] +async fn delete_document_by_filter() { + let server = Server::new().await; + let index = server.index("doggo"); + + // send a bad payload type + let (response, code) = index.delete_document_by_filter(json!("hello")).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type: expected an object, but found a string: `\"hello\"`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // send bad payload type + let (response, code) = index.delete_document_by_filter(json!({ "filter": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.", + "code": "invalid_document_delete_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + } + "###); + + // send bad filter + let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "code": "invalid_document_delete_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + } + "###); + + // send empty filter + let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Sending an empty filter is forbidden.", + "code": "invalid_document_delete_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + } + "###); + + // index does not exists + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Index `doggo` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index.create(None).await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response["taskUid"].as_u64().unwrap()).await; + + // no filterable are set + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index.update_settings_filterable_attributes(json!(["doggo"])).await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response["taskUid"].as_u64().unwrap()).await; + + // not filterable while there is a filterable attribute + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "catto = jorts"})).await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###" + { + "uid": 4, + "indexUid": "doggo", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"catto = jorts\"" + }, + "error": { + "message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} diff --git a/meilisearch/tests/tasks/errors.rs b/meilisearch/tests/tasks/errors.rs index 830c4c8e7..065ff1aa9 100644 --- a/meilisearch/tests/tasks/errors.rs +++ b/meilisearch/tests/tasks/errors.rs @@ -97,7 +97,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -108,7 +108,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -119,7 +119,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/milli/src/error.rs b/milli/src/error.rs index 9b1ec0a87..7f0faf2fd 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -112,6 +112,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidGeoField(#[from] GeoError), #[error("{0}")] InvalidFilter(String), + #[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))] + InvalidFilterExpression(&'static [&'static str], Value), #[error("Attribute `{}` is not sortable. {}", .field, match .valid_fields.is_empty() { diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 19d763107..fac7b68ea 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -5,6 +5,7 @@ use std::ops::Bound::{self, Excluded, Included}; use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; use roaring::RoaringBitmap; +use serde_json::Value; use super::facet_range_search; use crate::error::{Error, UserError}; @@ -112,6 +113,52 @@ impl<'a> From> for FilterCondition<'a> { } impl<'a> Filter<'a> { + pub fn from_json(facets: &'a Value) -> Result> { + match facets { + Value::String(expr) => { + let condition = Filter::from_str(expr)?; + Ok(condition) + } + Value::Array(arr) => Self::parse_filter_array(arr), + v => Err(Error::UserError(UserError::InvalidFilterExpression( + &["String", "Array"], + v.clone(), + ))), + } + } + + fn parse_filter_array(arr: &'a [Value]) -> Result> { + let mut ands = Vec::new(); + for value in arr { + match value { + Value::String(s) => ands.push(Either::Right(s.as_str())), + Value::Array(arr) => { + let mut ors = Vec::new(); + for value in arr { + match value { + Value::String(s) => ors.push(s.as_str()), + v => { + return Err(Error::UserError(UserError::InvalidFilterExpression( + &["String"], + v.clone(), + ))) + } + } + } + ands.push(Either::Left(ors)); + } + v => { + return Err(Error::UserError(UserError::InvalidFilterExpression( + &["String", "[String]"], + v.clone(), + ))) + } + } + } + + Filter::from_array(ands) + } + pub fn from_array(array: I) -> Result> where I: IntoIterator>,