Create errors for the internal processing ones

This commit is contained in:
Clément Renault 2024-07-04 17:48:03 +02:00
parent e706023969
commit aace587dd1
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 46 additions and 20 deletions

View File

@ -336,7 +336,8 @@ UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA
// Experimental features // Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ; VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
NotFoundSimilarId , InvalidRequest , BAD_REQUEST NotFoundSimilarId , InvalidRequest , BAD_REQUEST ;
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST
} }
impl ErrorCode for JoinError { impl ErrorCode for JoinError {
@ -407,6 +408,12 @@ impl ErrorCode for milli::Error {
} }
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder, UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError, UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
UserError::DocumentEditionCannotModifyPrimaryKey
| UserError::DocumentEditionDocumentMustBeObject
| UserError::DocumentEditionRuntimeError(_)
| UserError::DocumentEditionCompilationError(_) => {
Code::EditDocumentsByFunctionError
}
} }
} }
} }

View File

@ -5,6 +5,7 @@ use std::{io, str};
use heed::{Error as HeedError, MdbError}; use heed::{Error as HeedError, MdbError};
use rayon::ThreadPoolBuildError; use rayon::ThreadPoolBuildError;
use rhai::EvalAltResult;
use serde_json::Value; use serde_json::Value;
use thiserror::Error; use thiserror::Error;
@ -259,6 +260,14 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidSettingsDimensions { embedder_name: String }, InvalidSettingsDimensions { embedder_name: String },
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String }, InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
#[error("Document editions cannot modify a document's primary key")]
DocumentEditionCannotModifyPrimaryKey,
#[error("Document editions must keep documents as objects")]
DocumentEditionDocumentMustBeObject,
#[error("Document edition runtime error encountered while running the function: {0}")]
DocumentEditionRuntimeError(Box<EvalAltResult>),
#[error("Document edition runtime error encountered while compiling the function: {0}")]
DocumentEditionCompilationError(rhai::ParseError),
} }
impl From<crate::vector::Error> for Error { impl From<crate::vector::Error> for Error {

View File

@ -183,7 +183,7 @@ where
context: Option<Object>, context: Option<Object>,
code: &str, code: &str,
) -> Result<(Self, StdResult<(u64, u64), UserError>)> { ) -> Result<(Self, StdResult<(u64, u64), UserError>)> {
// Early return when there is no document to add // Early return when there is no document to edit
if documents.is_empty() { if documents.is_empty() {
return Ok((self, Ok((0, 0)))); return Ok((self, Ok((0, 0))));
} }
@ -202,14 +202,16 @@ where
// It is an arbitrary value. We need to let users define this in the settings. // It is an arbitrary value. We need to let users define this in the settings.
engine.set_max_operations(1_000_000); engine.set_max_operations(1_000_000);
let ast = engine.compile(code).unwrap(); let ast = engine.compile(code).map_err(UserError::DocumentEditionCompilationError)?;
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let primary_key = self.index.primary_key(self.wtxn)?.unwrap(); let primary_key = self.index.primary_key(self.wtxn)?.unwrap();
let mut documents_batch_builder = tempfile::tempfile().map(DocumentsBatchBuilder::new)?; let mut documents_batch_builder = tempfile::tempfile().map(DocumentsBatchBuilder::new)?;
let mut documents_to_remove = RoaringBitmap::new(); let mut documents_to_remove = RoaringBitmap::new();
let context: Dynamic = match context { let context: Dynamic = match context {
Some(context) => serde_json::from_value(context.into()).unwrap(), Some(context) => {
serde_json::from_value(context.into()).map_err(InternalError::SerdeJson)?
}
None => Dynamic::from(()), None => Dynamic::from(()),
}; };
@ -227,6 +229,8 @@ where
)?; )?;
let processing = documents.into_iter().par_bridge().map(|docid| { let processing = documents.into_iter().par_bridge().map(|docid| {
// safety: Both documents *must* exists in the database as
// their IDs comes from the list of documents ids.
let rhai_document = immutable_obkvs.rhai_map(docid)?.unwrap(); let rhai_document = immutable_obkvs.rhai_map(docid)?.unwrap();
let json_document = immutable_obkvs.json_map(docid)?.unwrap(); let json_document = immutable_obkvs.json_map(docid)?.unwrap();
let document_id = &json_document[primary_key]; let document_id = &json_document[primary_key];
@ -234,32 +238,38 @@ where
let mut scope = Scope::new(); let mut scope = Scope::new();
scope.push_constant_dynamic("context", context.clone()); scope.push_constant_dynamic("context", context.clone());
scope.push("doc", rhai_document); scope.push("doc", rhai_document);
let _ = engine.eval_ast_with_scope::<Dynamic>(&mut scope, &ast).unwrap(); // That's were the magic happens. We run the user script
// which edits "doc" scope variable reprensenting the document
// and ignore the output and even the type of it, i.e., Dynamic.
let _ = engine
.eval_ast_with_scope::<Dynamic>(&mut scope, &ast)
.map_err(UserError::DocumentEditionRuntimeError)?;
match scope.remove::<Dynamic>("doc") { match scope.remove::<Dynamic>("doc") {
// If the "doc" variable has been removed from the scope // If the "doc" variable has set to (), we effectively delete the document.
// or set to (), we effectively delete the document. Some(doc) if doc.is_unit() => Ok(DocumentEdition::Deleted(docid)),
Some(doc) if doc.is_unit() => { None => unreachable!("missing doc variable from the Rhai scope"),
return Ok(DocumentEdition::Deleted(docid));
}
None => unreachable!(),
Some(document) => match document.try_cast() { Some(document) => match document.try_cast() {
Some(document) => { Some(document) => {
let new_document = rhaimap_to_object(document); let new_document = rhaimap_to_object(document);
// Note: This condition is not perfect. Sometimes it detect changes
// like with floating points numbers and consider updating
// the document even if nothing actually changed.
if json_document != new_document { if json_document != new_document {
assert_eq!( if Some(document_id) != new_document.get(primary_key) {
Some(document_id), Err(Error::UserError(
new_document.get(primary_key), UserError::DocumentEditionCannotModifyPrimaryKey,
"you cannot change the document id when editing documents" ))
); } else {
return Ok(DocumentEdition::Edited(new_document)); Ok(DocumentEdition::Edited(new_document))
}
} else {
Ok(DocumentEdition::Nothing)
} }
} }
None => panic!("Why is \"doc\" no longer a Map?"), None => Err(Error::UserError(UserError::DocumentEditionDocumentMustBeObject)),
}, },
} }
Ok(DocumentEdition::Nothing) as Result<_>
}); });
rayon_par_bridge::par_bridge(100, processing, |iterator| { rayon_par_bridge::par_bridge(100, processing, |iterator| {