From aace587dd1967e7b0481ca97519a6707fedd6aac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 4 Jul 2024 17:48:03 +0200 Subject: [PATCH] Create errors for the internal processing ones --- meilisearch-types/src/error.rs | 9 ++++- milli/src/error.rs | 9 +++++ milli/src/update/index_documents/mod.rs | 48 +++++++++++++++---------- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index f529238e4..0640c74b5 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -336,7 +336,8 @@ UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA // Experimental features VectorEmbeddingError , InvalidRequest , BAD_REQUEST ; -NotFoundSimilarId , InvalidRequest , BAD_REQUEST +NotFoundSimilarId , InvalidRequest , BAD_REQUEST ; +EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST } impl ErrorCode for JoinError { @@ -407,6 +408,12 @@ impl ErrorCode for milli::Error { } UserError::InvalidEmbedder(_) => Code::InvalidEmbedder, UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError, + UserError::DocumentEditionCannotModifyPrimaryKey + | UserError::DocumentEditionDocumentMustBeObject + | UserError::DocumentEditionRuntimeError(_) + | UserError::DocumentEditionCompilationError(_) => { + Code::EditDocumentsByFunctionError + } } } } diff --git a/milli/src/error.rs b/milli/src/error.rs index 8e03fde4e..0effb7be7 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -5,6 +5,7 @@ use std::{io, str}; use heed::{Error as HeedError, MdbError}; use rayon::ThreadPoolBuildError; +use rhai::EvalAltResult; use serde_json::Value; use thiserror::Error; @@ -259,6 +260,14 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidSettingsDimensions { embedder_name: String }, #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String }, + #[error("Document editions cannot modify a document's primary key")] + DocumentEditionCannotModifyPrimaryKey, + #[error("Document editions must keep documents as objects")] + DocumentEditionDocumentMustBeObject, + #[error("Document edition runtime error encountered while running the function: {0}")] + DocumentEditionRuntimeError(Box), + #[error("Document edition runtime error encountered while compiling the function: {0}")] + DocumentEditionCompilationError(rhai::ParseError), } impl From for Error { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 7994abff3..d19b29852 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -183,7 +183,7 @@ where context: Option, code: &str, ) -> Result<(Self, StdResult<(u64, u64), UserError>)> { - // Early return when there is no document to add + // Early return when there is no document to edit if documents.is_empty() { return Ok((self, Ok((0, 0)))); } @@ -202,14 +202,16 @@ where // It is an arbitrary value. We need to let users define this in the settings. engine.set_max_operations(1_000_000); - let ast = engine.compile(code).unwrap(); + let ast = engine.compile(code).map_err(UserError::DocumentEditionCompilationError)?; let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let primary_key = self.index.primary_key(self.wtxn)?.unwrap(); let mut documents_batch_builder = tempfile::tempfile().map(DocumentsBatchBuilder::new)?; let mut documents_to_remove = RoaringBitmap::new(); let context: Dynamic = match context { - Some(context) => serde_json::from_value(context.into()).unwrap(), + Some(context) => { + serde_json::from_value(context.into()).map_err(InternalError::SerdeJson)? + } None => Dynamic::from(()), }; @@ -227,6 +229,8 @@ where )?; let processing = documents.into_iter().par_bridge().map(|docid| { + // safety: Both documents *must* exists in the database as + // their IDs comes from the list of documents ids. let rhai_document = immutable_obkvs.rhai_map(docid)?.unwrap(); let json_document = immutable_obkvs.json_map(docid)?.unwrap(); let document_id = &json_document[primary_key]; @@ -234,32 +238,38 @@ where let mut scope = Scope::new(); scope.push_constant_dynamic("context", context.clone()); scope.push("doc", rhai_document); - let _ = engine.eval_ast_with_scope::(&mut scope, &ast).unwrap(); + // That's were the magic happens. We run the user script + // which edits "doc" scope variable reprensenting the document + // and ignore the output and even the type of it, i.e., Dynamic. + let _ = engine + .eval_ast_with_scope::(&mut scope, &ast) + .map_err(UserError::DocumentEditionRuntimeError)?; match scope.remove::("doc") { - // If the "doc" variable has been removed from the scope - // or set to (), we effectively delete the document. - Some(doc) if doc.is_unit() => { - return Ok(DocumentEdition::Deleted(docid)); - } - None => unreachable!(), + // If the "doc" variable has set to (), we effectively delete the document. + Some(doc) if doc.is_unit() => Ok(DocumentEdition::Deleted(docid)), + None => unreachable!("missing doc variable from the Rhai scope"), Some(document) => match document.try_cast() { Some(document) => { let new_document = rhaimap_to_object(document); + // Note: This condition is not perfect. Sometimes it detect changes + // like with floating points numbers and consider updating + // the document even if nothing actually changed. if json_document != new_document { - assert_eq!( - Some(document_id), - new_document.get(primary_key), - "you cannot change the document id when editing documents" - ); - return Ok(DocumentEdition::Edited(new_document)); + if Some(document_id) != new_document.get(primary_key) { + Err(Error::UserError( + UserError::DocumentEditionCannotModifyPrimaryKey, + )) + } else { + Ok(DocumentEdition::Edited(new_document)) + } + } else { + Ok(DocumentEdition::Nothing) } } - None => panic!("Why is \"doc\" no longer a Map?"), + None => Err(Error::UserError(UserError::DocumentEditionDocumentMustBeObject)), }, } - - Ok(DocumentEdition::Nothing) as Result<_> }); rayon_par_bridge::par_bridge(100, processing, |iterator| {