From 05cc463fbc68d94e146ad8a12284b53ff961abf5 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 1 Mar 2023 15:20:53 +0100 Subject: [PATCH 01/17] Draft implementation of filter support for /delete-by-batch route --- meilisearch/src/routes/indexes/documents.rs | 85 +++++++++++++++++++-- meilisearch/src/search.rs | 2 +- 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index f222f5b69..5c1e385fd 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -17,6 +17,7 @@ use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::IndexDocumentsMethod; +use meilisearch_types::milli::InternalError; use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::tasks::KindWithContent; use meilisearch_types::{milli, Document, Index}; @@ -373,22 +374,96 @@ async fn document_addition( Ok(task.into()) } +#[derive(Debug, Deserialize)] +#[serde(untagged)] +pub enum DocumentDeletionQuery { + Ids(Vec), + Object { filter: Option }, +} + +/// Parses a Json encoded document id and validate it, returning a user error when it is one. +/// FIXME: stolen from milli +fn validate_document_id_value(document_id: Value) -> String { + match document_id { + Value::String(string) => match validate_document_id(&string) { + Some(s) if s.len() == string.len() => string, + Some(s) => s.to_string(), + None => panic!(), + }, + Value::Number(number) if number.is_i64() => number.to_string(), + _content => panic!(), + } +} + +/// FIXME: stolen from milli +fn validate_document_id(document_id: &str) -> Option<&str> { + if !document_id.is_empty() + && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_')) + { + Some(document_id) + } else { + None + } +} + pub async fn delete_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, - body: web::Json>, + body: web::Json, req: HttpRequest, analytics: web::Data, ) -> Result { debug!("called with params: {:?}", body); let index_uid = IndexUid::try_from(index_uid.into_inner())?; + let index_uid = index_uid.into_inner(); analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); - let ids = body - .iter() - .map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string())) - .collect(); + let ids = match body.into_inner() { + DocumentDeletionQuery::Ids(body) => body + .iter() + .map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string())) + .collect(), + DocumentDeletionQuery::Object { filter } => { + debug!("filter: {:?}", filter); + + // FIXME: spawn_blocking + if let Some(ref filter) = filter { + if let Some(facets) = crate::search::parse_filter(filter)? { + debug!("facets: {:?}", facets); + + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let filtered_candidates = facets.evaluate(&rtxn, &index)?; + debug!("filtered_candidates.len(): {:?}", filtered_candidates.len()); + + // FIXME: unwraps + let primary_key = index.primary_key(&rtxn)?.unwrap(); + let primary_key = index.fields_ids_map(&rtxn)?.id(primary_key).unwrap(); + + let documents = index.documents(&rtxn, filtered_candidates.into_iter())?; + debug!("documents.len(): {:?}", documents.len()); + let documents: Vec = documents + .into_iter() + .map(|(_, document)| { + let value = document.get(primary_key).unwrap(); + let value: Value = serde_json::from_slice(value) + .map_err(InternalError::SerdeJson) + .unwrap(); + + validate_document_id_value(value) + }) + .collect(); + debug!("documents: {:?}", documents); + documents + } else { + vec![] + } + } else { + vec![] + } + } + }; let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 7e4a7da6a..81c5972b8 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -745,7 +745,7 @@ fn format_value>( } } -fn parse_filter(facets: &Value) -> Result, MeilisearchHttpError> { +pub fn parse_filter(facets: &Value) -> Result, MeilisearchHttpError> { match facets { Value::String(expr) => { let condition = Filter::from_str(expr)?; From 732c52093d732124812e8b3770f836183481a499 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 7 Mar 2023 10:02:04 +0100 Subject: [PATCH 02/17] Processing time without autobatching implementation --- dump/src/lib.rs | 6 ++ index-scheduler/src/autobatcher.rs | 15 +++- index-scheduler/src/batch.rs | 83 ++++++++++++++++++++- index-scheduler/src/insta_snapshot.rs | 3 + index-scheduler/src/lib.rs | 7 ++ index-scheduler/src/utils.rs | 24 ++++++ meilisearch-types/src/error.rs | 1 + meilisearch-types/src/tasks.rs | 33 ++++++++ meilisearch/src/routes/indexes/documents.rs | 62 +++------------ meilisearch/src/routes/tasks.rs | 7 ++ milli/src/error.rs | 2 + milli/src/search/facet/filter.rs | 47 ++++++++++++ 12 files changed, 235 insertions(+), 55 deletions(-) diff --git a/dump/src/lib.rs b/dump/src/lib.rs index b1b6807a0..a3e892c03 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -101,6 +101,9 @@ pub enum KindDump { documents_ids: Vec, }, DocumentClear, + DocumentDeletionByFilter { + filter: String, + }, Settings { settings: Box>, is_deletion: bool, @@ -166,6 +169,9 @@ impl From for KindDump { KindWithContent::DocumentDeletion { documents_ids, .. } => { KindDump::DocumentDeletion { documents_ids } } + KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => { + KindDump::DocumentDeletionByFilter { filter: filter_expr.to_string() } + } KindWithContent::DocumentClear { .. } => KindDump::DocumentClear, KindWithContent::SettingsUpdate { new_settings, diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs index 24625a7fb..d738cc5e4 100644 --- a/index-scheduler/src/autobatcher.rs +++ b/index-scheduler/src/autobatcher.rs @@ -25,6 +25,7 @@ enum AutobatchKind { primary_key: Option, }, DocumentDeletion, + DocumentDeletionByFilter, DocumentClear, Settings { allow_index_creation: bool, @@ -64,6 +65,9 @@ impl From for AutobatchKind { } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion, KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, + KindWithContent::DocumentDeletionByFilter { .. } => { + AutobatchKind::DocumentDeletionByFilter + } KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { AutobatchKind::Settings { allow_index_creation: allow_index_creation && !is_deletion, @@ -97,6 +101,9 @@ pub enum BatchKind { DocumentDeletion { deletion_ids: Vec, }, + DocumentDeletionByFilter { + id: TaskId, + }, ClearAndSettings { other: Vec, allow_index_creation: bool, @@ -195,6 +202,9 @@ impl BatchKind { K::DocumentDeletion => { (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false) } + K::DocumentDeletionByFilter => { + (Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false) + } K::Settings { allow_index_creation } => ( Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), allow_index_creation, @@ -212,7 +222,7 @@ impl BatchKind { match (self, kind) { // We don't batch any of these operations - (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this), + (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this), // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { Break(this) @@ -471,7 +481,8 @@ impl BatchKind { BatchKind::IndexCreation { .. } | BatchKind::IndexDeletion { .. } | BatchKind::IndexUpdate { .. } - | BatchKind::IndexSwap { .. }, + | BatchKind::IndexSwap { .. } + | BatchKind::DocumentDeletionByFilter { .. }, _, ) => { unreachable!() diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 9c24e8e9d..ff123f1e3 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -28,9 +28,10 @@ use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::update::{ - DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings, + DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, + Settings as MilliSettings, }; -use meilisearch_types::milli::{self, BEU32}; +use meilisearch_types::milli::{self, Filter, BEU32}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; @@ -65,6 +66,10 @@ pub(crate) enum Batch { op: IndexOperation, must_create_index: bool, }, + IndexDocumentDeletionByFilter { + index_uid: String, + task: Task, + }, IndexCreation { index_uid: String, primary_key: Option, @@ -149,6 +154,7 @@ impl Batch { | Batch::TaskDeletion(task) | Batch::Dump(task) | Batch::IndexCreation { task, .. } + | Batch::IndexDocumentDeletionByFilter { task, .. } | Batch::IndexUpdate { task, .. } => vec![task.uid], Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => { tasks.iter().map(|task| task.uid).collect() @@ -187,7 +193,8 @@ impl Batch { IndexOperation { op, .. } => Some(op.index_uid()), IndexCreation { index_uid, .. } | IndexUpdate { index_uid, .. } - | IndexDeletion { index_uid, .. } => Some(index_uid), + | IndexDeletion { index_uid, .. } + | IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid), } } } @@ -227,6 +234,18 @@ impl IndexScheduler { }, must_create_index, })), + BatchKind::DocumentDeletionByFilter { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + match &task.kind { + KindWithContent::DocumentDeletionByFilter { index_uid, .. } => { + Ok(Some(Batch::IndexDocumentDeletionByFilter { + index_uid: index_uid.clone(), + task, + })) + } + _ => unreachable!(), + } + } BatchKind::DocumentOperation { method, operation_ids, .. } => { let tasks = self.get_existing_tasks(rtxn, operation_ids)?; let primary_key = tasks @@ -867,6 +886,64 @@ impl IndexScheduler { Ok(tasks) } + Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => { + let (index_uid, filter) = + if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } = + &task.kind + { + (index_uid, filter_expr) + } else { + unreachable!() + }; + let index = { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, index_uid)? + }; + let filter = Filter::from_json(filter)?; + let deleted_documents = if let Some(filter) = filter { + let index_rtxn = index.read_txn()?; + + let candidates = filter.evaluate(&index_rtxn, &index)?; + let mut wtxn = index.write_txn()?; + let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?; + delete_operation.delete_documents(&candidates); + let result = delete_operation.execute().map(|result| result.deleted_documents); + wtxn.commit()?; + result + } else { + Ok(0) + }; + let original_filter = if let Some(Details::DocumentDeletionByFilter { + original_filter, + deleted_documents: _, + }) = task.details + { + original_filter + } else { + // In the case of a `documentDeleteByFilter` the details MUST be set + unreachable!(); + }; + + match deleted_documents { + Ok(deleted_documents) => { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentDeletionByFilter { + original_filter, + deleted_documents: Some(deleted_documents), + }); + } + Err(e) => { + task.status = Status::Failed; + task.details = Some(Details::DocumentDeletionByFilter { + original_filter, + deleted_documents: Some(0), + }); + task.error = Some(e.into()); + } + } + + Ok(vec![task]) + } Batch::IndexCreation { index_uid, primary_key, task } => { let wtxn = self.env.write_txn()?; if self.index_mapper.exists(&wtxn, &index_uid)? { diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 43509aa84..3dae2800b 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -183,6 +183,9 @@ fn snapshot_details(d: &Details) -> String { provided_ids: received_document_ids, deleted_documents, } => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"), + Details::DocumentDeletionByFilter { original_filter, deleted_documents } => format!( + "{{ original_filter: {original_filter}, deleted_documents: {deleted_documents:?} }}" + ), Details::ClearAll { deleted_documents } => { format!("{{ deleted_documents: {deleted_documents:?} }}") }, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index d713fca17..4aef427e5 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1208,6 +1208,13 @@ impl<'a> Dump<'a> { documents_ids, index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, }, + KindDump::DocumentDeletionByFilter { filter } => { + KindWithContent::DocumentDeletionByFilter { + filter_expr: serde_json::from_str(&filter) + .map_err(|_| Error::CorruptedDump)?, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + } + } KindDump::DocumentClear => KindWithContent::DocumentClear { index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, }, diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index 7718e1af0..97f437bed 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -239,6 +239,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) { match &mut task.kind { K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid), K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid), + K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid), K::DocumentClear { index_uid } => index_uids.push(index_uid), K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid), K::IndexDeletion { index_uid } => index_uids.push(index_uid), @@ -464,6 +465,29 @@ impl IndexScheduler { } } } + Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => { + assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter); + let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter { + ref index_uid, + ref filter_expr, + } = kind + { + (index_uid, filter_expr) + } else { + unreachable!() + }; + assert_eq!(&task_index_uid.unwrap(), index_uid); + + match status { + Status::Enqueued | Status::Processing => (), + Status::Succeeded => { + assert!(deleted_documents.is_some()); + } + Status::Failed | Status::Canceled => { + assert!(deleted_documents == Some(0)); + } + } + } Details::ClearAll { deleted_documents } => { assert!(matches!( kind.as_kind(), diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index a201db7ac..c36d27529 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -315,6 +315,7 @@ impl ErrorCode for milli::Error { UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached, UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::InvalidSearchFilter, + UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { Code::InvalidDocumentId diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index 3b7efb97b..88263d150 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -49,6 +49,7 @@ impl Task { | IndexSwap { .. } => None, DocumentAdditionOrUpdate { index_uid, .. } | DocumentDeletion { index_uid, .. } + | DocumentDeletionByFilter { index_uid, .. } | DocumentClear { index_uid } | SettingsUpdate { index_uid, .. } | IndexCreation { index_uid, .. } @@ -67,6 +68,7 @@ impl Task { match self.kind { KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file), KindWithContent::DocumentDeletion { .. } + | KindWithContent::DocumentDeletionByFilter { .. } | KindWithContent::DocumentClear { .. } | KindWithContent::SettingsUpdate { .. } | KindWithContent::IndexDeletion { .. } @@ -81,6 +83,11 @@ impl Task { } } +pub enum DocumentDeletionContent { + ByDocumentIds(Vec), + ByFilter(serde_json::Value), +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub enum KindWithContent { @@ -96,6 +103,10 @@ pub enum KindWithContent { index_uid: String, documents_ids: Vec, }, + DocumentDeletionByFilter { + index_uid: String, + filter_expr: serde_json::Value, + }, DocumentClear { index_uid: String, }, @@ -145,6 +156,7 @@ impl KindWithContent { match self { KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate, KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion, + KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion, KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion, KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate, KindWithContent::IndexCreation { .. } => Kind::IndexCreation, @@ -168,6 +180,7 @@ impl KindWithContent { | TaskDeletion { .. } => vec![], DocumentAdditionOrUpdate { index_uid, .. } | DocumentDeletion { index_uid, .. } + | DocumentDeletionByFilter { index_uid, .. } | DocumentClear { index_uid } | SettingsUpdate { index_uid, .. } | IndexCreation { index_uid, .. } @@ -200,6 +213,12 @@ impl KindWithContent { deleted_documents: None, }) } + KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { + Some(Details::DocumentDeletionByFilter { + original_filter: filter_expr.to_string(), + deleted_documents: None, + }) + } KindWithContent::DocumentClear { .. } | KindWithContent::IndexDeletion { .. } => { Some(Details::ClearAll { deleted_documents: None }) } @@ -242,6 +261,12 @@ impl KindWithContent { deleted_documents: Some(0), }) } + KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { + Some(Details::DocumentDeletionByFilter { + original_filter: filter_expr.to_string(), + deleted_documents: Some(0), + }) + } KindWithContent::DocumentClear { .. } => { Some(Details::ClearAll { deleted_documents: None }) } @@ -282,6 +307,7 @@ impl From<&KindWithContent> for Option
{ }) } KindWithContent::DocumentDeletion { .. } => None, + KindWithContent::DocumentDeletionByFilter { .. } => None, KindWithContent::DocumentClear { .. } => None, KindWithContent::SettingsUpdate { new_settings, .. } => { Some(Details::SettingsUpdate { settings: new_settings.clone() }) @@ -374,6 +400,7 @@ impl std::error::Error for ParseTaskStatusError {} pub enum Kind { DocumentAdditionOrUpdate, DocumentDeletion, + DocumentDeletionByFilter, SettingsUpdate, IndexCreation, IndexDeletion, @@ -390,6 +417,7 @@ impl Kind { match self { Kind::DocumentAdditionOrUpdate | Kind::DocumentDeletion + | Kind::DocumentDeletionByFilter | Kind::SettingsUpdate | Kind::IndexCreation | Kind::IndexDeletion @@ -407,6 +435,7 @@ impl Display for Kind { match self { Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"), Kind::DocumentDeletion => write!(f, "documentDeletion"), + Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"), Kind::SettingsUpdate => write!(f, "settingsUpdate"), Kind::IndexCreation => write!(f, "indexCreation"), Kind::IndexDeletion => write!(f, "indexDeletion"), @@ -478,6 +507,7 @@ pub enum Details { SettingsUpdate { settings: Box> }, IndexInfo { primary_key: Option }, DocumentDeletion { provided_ids: usize, deleted_documents: Option }, + DocumentDeletionByFilter { original_filter: String, deleted_documents: Option }, ClearAll { deleted_documents: Option }, TaskCancelation { matched_tasks: u64, canceled_tasks: Option, original_filter: String }, TaskDeletion { matched_tasks: u64, deleted_tasks: Option, original_filter: String }, @@ -493,6 +523,9 @@ impl Details { *indexed_documents = Some(0) } Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0), + Self::DocumentDeletionByFilter { deleted_documents, .. } => { + *deleted_documents = Some(0) + } Self::ClearAll { deleted_documents } => *deleted_documents = Some(0), Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0), Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0), diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 5c1e385fd..8c8229429 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -17,7 +17,6 @@ use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::IndexDocumentsMethod; -use meilisearch_types::milli::InternalError; use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::tasks::KindWithContent; use meilisearch_types::{milli, Document, Index}; @@ -381,31 +380,6 @@ pub enum DocumentDeletionQuery { Object { filter: Option }, } -/// Parses a Json encoded document id and validate it, returning a user error when it is one. -/// FIXME: stolen from milli -fn validate_document_id_value(document_id: Value) -> String { - match document_id { - Value::String(string) => match validate_document_id(&string) { - Some(s) if s.len() == string.len() => string, - Some(s) => s.to_string(), - None => panic!(), - }, - Value::Number(number) if number.is_i64() => number.to_string(), - _content => panic!(), - } -} - -/// FIXME: stolen from milli -fn validate_document_id(document_id: &str) -> Option<&str> { - if !document_id.is_empty() - && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_')) - { - Some(document_id) - } else { - None - } -} - pub async fn delete_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -428,34 +402,22 @@ pub async fn delete_documents( debug!("filter: {:?}", filter); // FIXME: spawn_blocking - if let Some(ref filter) = filter { - if let Some(facets) = crate::search::parse_filter(filter)? { + if let Some(mut filter) = filter { + if let Some(facets) = crate::search::parse_filter(&filter)? { debug!("facets: {:?}", facets); - let index = index_scheduler.index(&index_uid)?; - let rtxn = index.read_txn()?; - let filtered_candidates = facets.evaluate(&rtxn, &index)?; - debug!("filtered_candidates.len(): {:?}", filtered_candidates.len()); + let task = KindWithContent::DocumentDeletionByFilter { + index_uid: index_uid.to_string(), + filter_expr: filter.take(), + }; - // FIXME: unwraps - let primary_key = index.primary_key(&rtxn)?.unwrap(); - let primary_key = index.fields_ids_map(&rtxn)?.id(primary_key).unwrap(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); - let documents = index.documents(&rtxn, filtered_candidates.into_iter())?; - debug!("documents.len(): {:?}", documents.len()); - let documents: Vec = documents - .into_iter() - .map(|(_, document)| { - let value = document.get(primary_key).unwrap(); - let value: Value = serde_json::from_slice(value) - .map_err(InternalError::SerdeJson) - .unwrap(); - - validate_document_id_value(value) - }) - .collect(); - debug!("documents: {:?}", documents); - documents + debug!("returns: {:?}", task); + return Ok(HttpResponse::Accepted().json(task)); } else { vec![] } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 1356ff722..122225d0a 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -133,6 +133,13 @@ impl From
for DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }, + Details::DocumentDeletionByFilter { original_filter, deleted_documents } => { + DetailsView { + original_filter: Some(original_filter), + deleted_documents: Some(deleted_documents), + ..DetailsView::default() + } + } Details::ClearAll { deleted_documents } => { DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() } } diff --git a/milli/src/error.rs b/milli/src/error.rs index 9b1ec0a87..7f0faf2fd 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -112,6 +112,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidGeoField(#[from] GeoError), #[error("{0}")] InvalidFilter(String), + #[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))] + InvalidFilterExpression(&'static [&'static str], Value), #[error("Attribute `{}` is not sortable. {}", .field, match .valid_fields.is_empty() { diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 19d763107..fac7b68ea 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -5,6 +5,7 @@ use std::ops::Bound::{self, Excluded, Included}; use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; use roaring::RoaringBitmap; +use serde_json::Value; use super::facet_range_search; use crate::error::{Error, UserError}; @@ -112,6 +113,52 @@ impl<'a> From> for FilterCondition<'a> { } impl<'a> Filter<'a> { + pub fn from_json(facets: &'a Value) -> Result> { + match facets { + Value::String(expr) => { + let condition = Filter::from_str(expr)?; + Ok(condition) + } + Value::Array(arr) => Self::parse_filter_array(arr), + v => Err(Error::UserError(UserError::InvalidFilterExpression( + &["String", "Array"], + v.clone(), + ))), + } + } + + fn parse_filter_array(arr: &'a [Value]) -> Result> { + let mut ands = Vec::new(); + for value in arr { + match value { + Value::String(s) => ands.push(Either::Right(s.as_str())), + Value::Array(arr) => { + let mut ors = Vec::new(); + for value in arr { + match value { + Value::String(s) => ors.push(s.as_str()), + v => { + return Err(Error::UserError(UserError::InvalidFilterExpression( + &["String"], + v.clone(), + ))) + } + } + } + ands.push(Either::Left(ors)); + } + v => { + return Err(Error::UserError(UserError::InvalidFilterExpression( + &["String", "[String]"], + v.clone(), + ))) + } + } + } + + Filter::from_array(ands) + } + pub fn from_array(array: I) -> Result> where I: IntoIterator>, From 3680a6bf1e844c9cbf7b4412dc3cbca30300425c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 7 Mar 2023 11:14:23 +0100 Subject: [PATCH 03/17] extract impl to a function --- index-scheduler/src/batch.rs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index ff123f1e3..87f93b399 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -899,20 +899,7 @@ impl IndexScheduler { let rtxn = self.env.read_txn()?; self.index_mapper.index(&rtxn, index_uid)? }; - let filter = Filter::from_json(filter)?; - let deleted_documents = if let Some(filter) = filter { - let index_rtxn = index.read_txn()?; - - let candidates = filter.evaluate(&index_rtxn, &index)?; - let mut wtxn = index.write_txn()?; - let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?; - delete_operation.delete_documents(&candidates); - let result = delete_operation.execute().map(|result| result.deleted_documents); - wtxn.commit()?; - result - } else { - Ok(0) - }; + let deleted_documents = delete_document_by_filter(filter, index); let original_filter = if let Some(Details::DocumentDeletionByFilter { original_filter, deleted_documents: _, @@ -1498,3 +1485,21 @@ impl IndexScheduler { Ok(content_files_to_delete) } } + +fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result { + let filter = Filter::from_json(filter)?; + Ok(if let Some(filter) = filter { + let index_rtxn = index.read_txn()?; + + let candidates = filter.evaluate(&index_rtxn, &index)?; + let mut wtxn = index.write_txn()?; + let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?; + delete_operation.delete_documents(&candidates); + let deleted_documents = + delete_operation.execute().map(|result| result.deleted_documents)?; + wtxn.commit()?; + deleted_documents + } else { + 0 + }) +} From 6df2ba93a9b5f9b38f90251bf66b465448a7d662 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 6 Apr 2023 17:12:43 +0200 Subject: [PATCH 04/17] remove one useless txn --- index-scheduler/src/batch.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 87f93b399..c88234809 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1489,10 +1489,9 @@ impl IndexScheduler { fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result { let filter = Filter::from_json(filter)?; Ok(if let Some(filter) = filter { - let index_rtxn = index.read_txn()?; - - let candidates = filter.evaluate(&index_rtxn, &index)?; let mut wtxn = index.write_txn()?; + + let candidates = filter.evaluate(&wtxn, &index)?; let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?; delete_operation.delete_documents(&candidates); let deleted_documents = From 8af8aa5a334d26c5dfd06f09f34863de5b52bb89 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 11 Apr 2023 19:34:25 +0200 Subject: [PATCH 05/17] add a test --- meilisearch/tests/common/index.rs | 5 + .../tests/documents/delete_documents.rs | 120 ++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 96ac5ce2e..5d38b84af 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -225,6 +225,11 @@ impl Index<'_> { self.service.delete(url).await } + pub async fn delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) { + let url = format!("/indexes/{}/documents/delete-batch", urlencode(self.uid.as_ref())); + self.service.post_encoded(url, body, self.encoder).await + } + pub async fn clear_all_documents(&self) -> (Value, StatusCode) { let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref())); self.service.delete(url).await diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index e36e2f033..a34bcfa95 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -1,3 +1,4 @@ +use meili_snap::{json_string, snapshot}; use serde_json::json; use crate::common::{GetAllDocumentsOptions, Server}; @@ -135,3 +136,122 @@ async fn delete_no_document_batch() { assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 3); } + +#[actix_rt::test] +async fn delete_document_by_filter() { + let server = Server::new().await; + let index = server.index("doggo"); + index.update_settings_filterable_attributes(json!(["color"])).await; + index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3 }, + ]), + Some("id"), + ) + .await; + index.wait_task(1).await; + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "color = blue"})).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 2, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(2).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 2, + "originalFilter": "\"color = blue\"" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + }, + { + "id": 3 + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); + + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "color NOT EXISTS"})).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "taskUid": 3, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(2).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 2, + "originalFilter": "\"color = blue\"" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } + "###); +} From c12a1cd956df980748c40b3407e7d22e7bdfe07b Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 11 Apr 2023 19:51:17 +0200 Subject: [PATCH 06/17] test all the error messages --- .../tests/documents/delete_documents.rs | 5 +- meilisearch/tests/documents/errors.rs | 125 ++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index a34bcfa95..4624f9d60 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -247,11 +247,14 @@ async fn delete_document_by_filter() { { "id": 0, "color": "red" + }, + { + "id": 3 } ], "offset": 0, "limit": 20, - "total": 1 + "total": 2 } "###); } diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index bf55188ba..2452a0ebd 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -418,3 +418,128 @@ async fn update_documents_csv_delimiter_with_bad_content_type() { } "###); } + +#[actix_rt::test] +async fn delete_document_by_filter() { + let server = Server::new().await; + let index = server.index("doggo"); + + // send a bad payload type + let (response, code) = index.delete_document_by_filter(json!("hello")).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Json deserialize error: data did not match any variant of untagged enum DocumentDeletionQuery", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // send bad filter + let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + + // index does not exists + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Index `doggo` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index.create(None).await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response["taskUid"].as_u64().unwrap()).await; + + // no filterable are set + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index.update_settings_filterable_attributes(json!(["doggo"])).await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response["taskUid"].as_u64().unwrap()).await; + + // not filterable while there is a filterable attribute + let (response, code) = + index.delete_document_by_filter(json!({ "filter": "catto = jorts"})).await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###" + { + "uid": 4, + "indexUid": "doggo", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 0, + "originalFilter": "\"catto = jorts\"" + }, + "error": { + "message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} From 4b92f1b2692f7214656a886afa4321c1cd15d455 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 27 Apr 2023 13:51:02 +0200 Subject: [PATCH 07/17] wip --- meilisearch/src/routes/indexes/documents.rs | 99 ++++++++++--------- meilisearch/tests/common/index.rs | 2 +- .../tests/documents/delete_documents.rs | 11 +-- meilisearch/tests/documents/errors.rs | 6 +- 4 files changed, 61 insertions(+), 57 deletions(-) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 8c8229429..0ca6cf066 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -4,13 +4,13 @@ use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; use bstr::ByteSlice; -use deserr::actix_web::AwebQueryParameter; +use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; use index_scheduler::IndexScheduler; use log::debug; use meilisearch_types::deserr::query_params::Param; -use meilisearch_types::deserr::DeserrQueryParamError; +use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::{Code, ResponseError}; @@ -71,8 +71,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .route(web::put().to(SeqHandler(update_documents))) .route(web::delete().to(SeqHandler(clear_all_documents))), ) - // this route needs to be before the /documents/{document_id} to match properly - .service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents)))) + // these routes needs to be before the /documents/{document_id} to match properly + .service( + web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))), + ) + .service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter)))) .service( web::resource("/{document_id}") .route(web::get().to(SeqHandler(get_document))) @@ -373,59 +376,22 @@ async fn document_addition( Ok(task.into()) } -#[derive(Debug, Deserialize)] -#[serde(untagged)] -pub enum DocumentDeletionQuery { - Ids(Vec), - Object { filter: Option }, -} - -pub async fn delete_documents( +pub async fn delete_documents_batch( index_scheduler: GuardedData, Data>, index_uid: web::Path, - body: web::Json, + body: web::Json>, req: HttpRequest, analytics: web::Data, ) -> Result { debug!("called with params: {:?}", body); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let index_uid = index_uid.into_inner(); analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); - let ids = match body.into_inner() { - DocumentDeletionQuery::Ids(body) => body - .iter() - .map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string())) - .collect(), - DocumentDeletionQuery::Object { filter } => { - debug!("filter: {:?}", filter); - - // FIXME: spawn_blocking - if let Some(mut filter) = filter { - if let Some(facets) = crate::search::parse_filter(&filter)? { - debug!("facets: {:?}", facets); - - let task = KindWithContent::DocumentDeletionByFilter { - index_uid: index_uid.to_string(), - filter_expr: filter.take(), - }; - - let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)) - .await?? - .into(); - - debug!("returns: {:?}", task); - return Ok(HttpResponse::Accepted().json(task)); - } else { - vec![] - } - } else { - vec![] - } - } - }; + let ids = body + .iter() + .map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string())) + .collect(); let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; @@ -436,6 +402,45 @@ pub async fn delete_documents( Ok(HttpResponse::Accepted().json(task)) } +#[derive(Debug, Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct DocumentDeletionByFilter { + // TODO: Update the error code to something more appropriate + #[deserr(error = DeserrJsonError)] + filter: Value, +} + +pub async fn delete_documents_by_filter( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, + body: AwebJson, + req: HttpRequest, + analytics: web::Data, +) -> Result { + println!("here"); + debug!("called with params: {:?}", body); + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + let index_uid = index_uid.into_inner(); + let filter = body.into_inner().filter; + + analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); + + debug!("filter: {:?}", filter); + + // FIXME: spawn_blocking => tamo: but why, it's making zero IO and almost no allocation? + // TODO: what should we do in case of an empty filter? Create a task that does nothing (then we should be able to store a None in the task queue) + // or refuse the payload with a cool™️ error message 😎 + let _ = crate::search::parse_filter(&filter)?.expect("You can't send an empty filter"); + + let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; + + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + + debug!("returns: {:?}", task); + return Ok(HttpResponse::Accepted().json(task)); +} + pub async fn clear_all_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 5d38b84af..38b43e910 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -226,7 +226,7 @@ impl Index<'_> { } pub async fn delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) { - let url = format!("/indexes/{}/documents/delete-batch", urlencode(self.uid.as_ref())); + let url = format!("/indexes/{}/documents/delete", urlencode(self.uid.as_ref())); self.service.post_encoded(url, body, self.encoder).await } diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index 4624f9d60..ef7d3e7a5 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -156,14 +156,13 @@ async fn delete_document_by_filter() { index.wait_task(1).await; let (response, code) = index.delete_document_by_filter(json!({ "filter": "color = blue"})).await; - snapshot!(code, @"202 Accepted"); + // snapshot!(code, @"202 Accepted"); snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" { - "taskUid": 2, - "indexUid": "doggo", - "status": "enqueued", - "type": "documentDeletion", - "enqueuedAt": "[date]" + "message": "Missing fied `filter`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" } "###); diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 2452a0ebd..267e4ccbb 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -429,10 +429,10 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Json deserialize error: data did not match any variant of untagged enum DocumentDeletionQuery", - "code": "bad_request", + "message": "Invalid syntax for the filter parameter: `expected String, Array, found: null`.", + "code": "invalid_search_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#bad_request" + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" } "###); From 143acb9cdc3d67615c4026dc2d3d7a5615331376 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 27 Apr 2023 19:28:05 +0200 Subject: [PATCH 08/17] update the tests --- meilisearch/src/routes/tasks.rs | 2 +- meilisearch/tests/documents/delete_documents.rs | 9 +++++---- meilisearch/tests/documents/errors.rs | 6 +++--- meilisearch/tests/tasks/errors.rs | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 122225d0a..958f15dd8 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -728,7 +728,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index ef7d3e7a5..267552069 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -159,10 +159,11 @@ async fn delete_document_by_filter() { // snapshot!(code, @"202 Accepted"); snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" { - "message": "Missing fied `filter`", - "code": "bad_request", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#bad_request" + "taskUid": 2, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" } "###); diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 267e4ccbb..5ba286c70 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -429,10 +429,10 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid syntax for the filter parameter: `expected String, Array, found: null`.", - "code": "invalid_search_filter", + "message": "Invalid value type: expected an object, but found a string: `\"hello\"`", + "code": "bad_request", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#bad_request" } "###); diff --git a/meilisearch/tests/tasks/errors.rs b/meilisearch/tests/tasks/errors.rs index 830c4c8e7..065ff1aa9 100644 --- a/meilisearch/tests/tasks/errors.rs +++ b/meilisearch/tests/tasks/errors.rs @@ -97,7 +97,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -108,7 +108,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -119,7 +119,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" From 0548ab9038cdb1a7f4739e90a275fc216ad72d6f Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 2 May 2023 17:46:04 +0200 Subject: [PATCH 09/17] create and use the error code --- meilisearch-types/src/error.rs | 1 + meilisearch/src/analytics/mod.rs | 1 + meilisearch/src/error.rs | 3 ++ meilisearch/src/routes/indexes/documents.rs | 21 ++++++-------- .../tests/documents/delete_documents.rs | 7 ++--- meilisearch/tests/documents/errors.rs | 28 +++++++++++++++++-- 6 files changed, 42 insertions(+), 19 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index c36d27529..e4ca39c8b 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -218,6 +218,7 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; +InvalidDocumentDeleteFilter , InvalidRequest , BAD_REQUEST ; InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 4d295db57..6223b9db7 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -64,6 +64,7 @@ pub enum DocumentDeletionKind { PerDocumentId, ClearAll, PerBatch, + PerFilter, } pub trait Analytics: Sync + Send { diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 850803a07..9d0a03c34 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -20,6 +20,8 @@ pub enum MeilisearchHttpError { InvalidContentType(String, Vec), #[error("Document `{0}` not found.")] DocumentNotFound(String), + #[error("Sending an empty filter is forbidden.")] + EmptyFilter, #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] InvalidExpression(&'static [&'static str], Value), #[error("A {0} payload is missing.")] @@ -58,6 +60,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound, + MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentDeleteFilter, MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter, MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge, MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes, diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 0ca6cf066..5793b6fb4 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -405,8 +405,7 @@ pub async fn delete_documents_batch( #[derive(Debug, Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct DocumentDeletionByFilter { - // TODO: Update the error code to something more appropriate - #[deserr(error = DeserrJsonError)] + #[deserr(error = DeserrJsonError)] filter: Value, } @@ -417,28 +416,26 @@ pub async fn delete_documents_by_filter( req: HttpRequest, analytics: web::Data, ) -> Result { - println!("here"); debug!("called with params: {:?}", body); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; - analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); - - debug!("filter: {:?}", filter); - - // FIXME: spawn_blocking => tamo: but why, it's making zero IO and almost no allocation? - // TODO: what should we do in case of an empty filter? Create a task that does nothing (then we should be able to store a None in the task queue) - // or refuse the payload with a cool™️ error message 😎 - let _ = crate::search::parse_filter(&filter)?.expect("You can't send an empty filter"); + analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); + // we ensure the filter is well formed before enqueuing it + || -> Result<_, ResponseError> { + Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?) + }() + // and whatever was the error, the error code should always be an InvalidDocumentDeleteFilter + .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentDeleteFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); - return Ok(HttpResponse::Accepted().json(task)); + Ok(HttpResponse::Accepted().json(task)) } pub async fn clear_all_documents( diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index 267552069..a34bcfa95 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -156,7 +156,7 @@ async fn delete_document_by_filter() { index.wait_task(1).await; let (response, code) = index.delete_document_by_filter(json!({ "filter": "color = blue"})).await; - // snapshot!(code, @"202 Accepted"); + snapshot!(code, @"202 Accepted"); snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" { "taskUid": 2, @@ -247,14 +247,11 @@ async fn delete_document_by_filter() { { "id": 0, "color": "red" - }, - { - "id": 3 } ], "offset": 0, "limit": 20, - "total": 2 + "total": 1 } "###); } diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 5ba286c70..18c01623b 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -436,15 +436,39 @@ async fn delete_document_by_filter() { } "###); + // send bad payload type + let (response, code) = index.delete_document_by_filter(json!({ "filter": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.", + "code": "invalid_document_delete_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + } + "###); + // send bad filter let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", - "code": "invalid_search_filter", + "code": "invalid_document_delete_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + } + "###); + + // send empty filter + let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Sending an empty filter is forbidden.", + "code": "invalid_document_delete_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" } "###); From fc8c1d118d593e847ba25a962ec6ac2ad96c19e0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 2 May 2023 21:32:56 +0200 Subject: [PATCH 10/17] fix the analytics --- meilisearch/src/analytics/segment_analytics.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 4f02ed60c..eedf30c7c 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -949,6 +949,7 @@ pub struct DocumentsDeletionAggregator { per_document_id: bool, clear_all: bool, per_batch: bool, + per_filter: bool, } impl DocumentsDeletionAggregator { @@ -962,6 +963,7 @@ impl DocumentsDeletionAggregator { DocumentDeletionKind::PerDocumentId => ret.per_document_id = true, DocumentDeletionKind::ClearAll => ret.clear_all = true, DocumentDeletionKind::PerBatch => ret.per_batch = true, + DocumentDeletionKind::PerFilter => ret.per_batch = true, } ret @@ -981,6 +983,7 @@ impl DocumentsDeletionAggregator { self.per_document_id |= other.per_document_id; self.clear_all |= other.clear_all; self.per_batch |= other.per_batch; + self.per_filter |= other.per_filter; } pub fn into_event(self, user: &User, event_name: &str) -> Option { From 0f0cd2d9297da6357237783b3c7b500ee1448d52 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 2 May 2023 22:36:56 +0200 Subject: [PATCH 11/17] handle the array of array form of filter in the dumps --- dump/src/lib.rs | 4 ++-- index-scheduler/src/lib.rs | 3 +-- meilisearch-types/src/tasks.rs | 5 ----- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/dump/src/lib.rs b/dump/src/lib.rs index a3e892c03..ed76d708e 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -102,7 +102,7 @@ pub enum KindDump { }, DocumentClear, DocumentDeletionByFilter { - filter: String, + filter: serde_json::Value, }, Settings { settings: Box>, @@ -170,7 +170,7 @@ impl From for KindDump { KindDump::DocumentDeletion { documents_ids } } KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => { - KindDump::DocumentDeletionByFilter { filter: filter_expr.to_string() } + KindDump::DocumentDeletionByFilter { filter: filter_expr } } KindWithContent::DocumentClear { .. } => KindDump::DocumentClear, KindWithContent::SettingsUpdate { diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 4aef427e5..d4b405d2b 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1210,8 +1210,7 @@ impl<'a> Dump<'a> { }, KindDump::DocumentDeletionByFilter { filter } => { KindWithContent::DocumentDeletionByFilter { - filter_expr: serde_json::from_str(&filter) - .map_err(|_| Error::CorruptedDump)?, + filter_expr: filter, index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, } } diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index 88263d150..e746a53b8 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -83,11 +83,6 @@ impl Task { } } -pub enum DocumentDeletionContent { - ByDocumentIds(Vec), - ByFilter(serde_json::Value), -} - #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub enum KindWithContent { From b5fe0b2b07de28b6b55bb0241c4770f5963108d2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 2 May 2023 22:53:16 +0200 Subject: [PATCH 12/17] fix the details --- meilisearch/src/routes/tasks.rs | 1 + meilisearch/tests/documents/delete_documents.rs | 2 ++ meilisearch/tests/documents/errors.rs | 3 +++ 3 files changed, 6 insertions(+) diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 958f15dd8..cab0f7197 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -135,6 +135,7 @@ impl From
for DetailsView { }, Details::DocumentDeletionByFilter { original_filter, deleted_documents } => { DetailsView { + provided_ids: Some(0), original_filter: Some(original_filter), deleted_documents: Some(deleted_documents), ..DetailsView::default() diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index a34bcfa95..f16a84a1e 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -176,6 +176,7 @@ async fn delete_document_by_filter() { "type": "documentDeletion", "canceledBy": null, "details": { + "providedIds": 0, "deletedDocuments": 2, "originalFilter": "\"color = blue\"" }, @@ -228,6 +229,7 @@ async fn delete_document_by_filter() { "type": "documentDeletion", "canceledBy": null, "details": { + "providedIds": 0, "deletedDocuments": 2, "originalFilter": "\"color = blue\"" }, diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 18c01623b..ac9bc132b 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -485,6 +485,7 @@ async fn delete_document_by_filter() { "type": "documentDeletion", "canceledBy": null, "details": { + "providedIds": 0, "deletedDocuments": 0, "originalFilter": "\"doggo = bernese\"" }, @@ -518,6 +519,7 @@ async fn delete_document_by_filter() { "type": "documentDeletion", "canceledBy": null, "details": { + "providedIds": 0, "deletedDocuments": 0, "originalFilter": "\"doggo = bernese\"" }, @@ -551,6 +553,7 @@ async fn delete_document_by_filter() { "type": "documentDeletion", "canceledBy": null, "details": { + "providedIds": 0, "deletedDocuments": 0, "originalFilter": "\"catto = jorts\"" }, From 2b74e4d116d691560aa3aed8d82ee36ce6d14544 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 3 May 2023 17:41:35 +0200 Subject: [PATCH 13/17] Fix test --- meilisearch/tests/documents/delete_documents.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index f16a84a1e..8de36490d 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -220,18 +220,18 @@ async fn delete_document_by_filter() { } "###); - let response = index.wait_task(2).await; + let response = index.wait_task(3).await; snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" { - "uid": 2, + "uid": 3, "indexUid": "doggo", "status": "succeeded", "type": "documentDeletion", "canceledBy": null, "details": { "providedIds": 0, - "deletedDocuments": 2, - "originalFilter": "\"color = blue\"" + "deletedDocuments": 1, + "originalFilter": "\"color NOT EXISTS\"" }, "error": null, "duration": "[duration]", From 84e7bd934296f3a5ce0d161e4de881252019d055 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 3 May 2023 17:51:28 +0200 Subject: [PATCH 14/17] Fix test after rebase on filter additions --- meilisearch/tests/documents/errors.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index ac9bc132b..b857d4e1d 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -453,7 +453,7 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "code": "invalid_document_delete_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" From d2d2bacaf2a972190f798c7a362db760e224d74e Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 3 May 2023 20:07:08 +0200 Subject: [PATCH 15/17] add a test on the complex filter --- .../tests/documents/delete_documents.rs | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/meilisearch/tests/documents/delete_documents.rs b/meilisearch/tests/documents/delete_documents.rs index 8de36490d..8f6ae1985 100644 --- a/meilisearch/tests/documents/delete_documents.rs +++ b/meilisearch/tests/documents/delete_documents.rs @@ -257,3 +257,133 @@ async fn delete_document_by_filter() { } "###); } + +#[actix_rt::test] +async fn delete_document_by_complex_filter() { + let server = Server::new().await; + let index = server.index("doggo"); + index.update_settings_filterable_attributes(json!(["color"])).await; + index + .add_documents( + json!([ + { "id": 0, "color": "red" }, + { "id": 1, "color": "blue" }, + { "id": 2, "color": "blue" }, + { "id": 3, "color": "green" }, + { "id": 4 }, + ]), + Some("id"), + ) + .await; + index.wait_task(1).await; + let (response, code) = index + .delete_document_by_filter( + json!({ "filter": ["color != red", "color != green", "color EXISTS"] }), + ) + .await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 2, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(2).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 2, + "originalFilter": "[\"color != red\",\"color != green\",\"color EXISTS\"]" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + }, + { + "id": 3, + "color": "green" + }, + { + "id": 4 + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); + + let (response, code) = index + .delete_document_by_filter(json!({ "filter": [["color = green", "color NOT EXISTS"]] })) + .await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "taskUid": 3, + "indexUid": "doggo", + "status": "enqueued", + "type": "documentDeletion", + "enqueuedAt": "[date]" + } + "###); + + let response = index.wait_task(3).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 3, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 4, + "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "color": "red" + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } + "###); +} From 1c3642c9b22fbec3b7d804d22de5e59b282d1ed7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 3 May 2023 22:26:51 +0200 Subject: [PATCH 16/17] Fix deletion per filter analytics --- meilisearch/src/analytics/segment_analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index eedf30c7c..3e40c09e8 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -963,7 +963,7 @@ impl DocumentsDeletionAggregator { DocumentDeletionKind::PerDocumentId => ret.per_document_id = true, DocumentDeletionKind::ClearAll => ret.clear_all = true, DocumentDeletionKind::PerBatch => ret.per_batch = true, - DocumentDeletionKind::PerFilter => ret.per_batch = true, + DocumentDeletionKind::PerFilter => ret.per_filter = true, } ret From d5059520aa812a7850c252bdad61f49315d9b5b5 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 3 May 2023 22:27:03 +0200 Subject: [PATCH 17/17] Fix typo --- meilisearch/src/routes/indexes/documents.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 5793b6fb4..a6ee8d16e 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -71,7 +71,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .route(web::put().to(SeqHandler(update_documents))) .route(web::delete().to(SeqHandler(clear_all_documents))), ) - // these routes needs to be before the /documents/{document_id} to match properly + // these routes need to be before the /documents/{document_id} to match properly .service( web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))), )