mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Merge #2523
2523: Improve the tasks error reporting when processed in batches r=irevoire a=Kerollmops This fixes #2478 by changing the behavior of the task handler when there is an error in a batch of document addition or update. What changes is that when there is a user error in a task in a batch we now report this task as failed with the right error message but we continue to process the other tasks. A user error can be when a geo field is invalid, a document id is invalid, or missing. fixes #2582, #2478 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
b5f91b91c3
23 changed files with 251 additions and 222 deletions
|
@ -1,17 +0,0 @@
|
|||
use meilisearch_lib::heed::Env;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub trait EnvSizer {
|
||||
fn size(&self) -> u64;
|
||||
}
|
||||
|
||||
impl EnvSizer for Env {
|
||||
fn size(&self) -> u64 {
|
||||
WalkDir::new(self.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len())
|
||||
}
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
mod env;
|
||||
|
||||
pub use env::EnvSizer;
|
|
@ -5,7 +5,6 @@ pub mod analytics;
|
|||
pub mod task;
|
||||
#[macro_use]
|
||||
pub mod extractors;
|
||||
pub mod helpers;
|
||||
pub mod option;
|
||||
pub mod routes;
|
||||
|
||||
|
@ -30,9 +29,9 @@ pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
|
|||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
||||
let mut meilisearch = MeiliSearch::builder();
|
||||
|
||||
// enable autobatching?
|
||||
// disable autobatching?
|
||||
AUTOBATCHING_ENABLED.store(
|
||||
opt.scheduler_options.enable_auto_batching,
|
||||
!opt.scheduler_options.disable_auto_batching,
|
||||
std::sync::atomic::Ordering::Relaxed,
|
||||
);
|
||||
|
||||
|
|
|
@ -231,7 +231,7 @@ pub struct TaskView {
|
|||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||
finished_at: Option<OffsetDateTime>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
batch_uid: Option<Option<BatchId>>,
|
||||
batch_uid: Option<BatchId>,
|
||||
}
|
||||
|
||||
impl From<Task> for TaskView {
|
||||
|
@ -380,15 +380,15 @@ impl From<Task> for TaskView {
|
|||
|
||||
let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts));
|
||||
|
||||
let batch_uid = if AUTOBATCHING_ENABLED.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
let id = events.iter().find_map(|e| match e {
|
||||
TaskEvent::Batched { batch_id, .. } => Some(*batch_id),
|
||||
_ => None,
|
||||
});
|
||||
Some(id)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let batch_uid = AUTOBATCHING_ENABLED
|
||||
.load(std::sync::atomic::Ordering::Relaxed)
|
||||
.then(|| {
|
||||
events.iter().find_map(|e| match e {
|
||||
TaskEvent::Batched { batch_id, .. } => Some(*batch_id),
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Self {
|
||||
uid: id,
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use crate::common::{GetAllDocumentsOptions, Server};
|
||||
use actix_web::test;
|
||||
|
||||
use meilisearch_http::{analytics, create_app};
|
||||
use serde_json::{json, Value};
|
||||
use time::{format_description::well_known::Rfc3339, OffsetDateTime};
|
||||
|
@ -326,7 +327,7 @@ async fn error_add_malformed_json_documents() {
|
|||
assert_eq!(
|
||||
response["message"],
|
||||
json!(
|
||||
r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789", expected a documents, or a sequence of documents. at line 1 column 102`."#
|
||||
r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789012345678901234567890123456789", expected a sequence at line 1 column 102`."#
|
||||
)
|
||||
);
|
||||
assert_eq!(response["code"], json!("malformed_payload"));
|
||||
|
@ -349,9 +350,7 @@ async fn error_add_malformed_json_documents() {
|
|||
assert_eq!(status_code, 400);
|
||||
assert_eq!(
|
||||
response["message"],
|
||||
json!(
|
||||
r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...90123456789m", expected a documents, or a sequence of documents. at line 1 column 103`."#
|
||||
)
|
||||
json!("The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string \"0123456789012345678901234567...90123456789012345678901234567890123456789m\", expected a sequence at line 1 column 103`.")
|
||||
);
|
||||
assert_eq!(response["code"], json!("malformed_payload"));
|
||||
assert_eq!(response["type"], json!("invalid_request"));
|
||||
|
@ -388,7 +387,7 @@ async fn error_add_malformed_ndjson_documents() {
|
|||
assert_eq!(
|
||||
response["message"],
|
||||
json!(
|
||||
r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."#
|
||||
r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`."#
|
||||
)
|
||||
);
|
||||
assert_eq!(response["code"], json!("malformed_payload"));
|
||||
|
@ -411,9 +410,7 @@ async fn error_add_malformed_ndjson_documents() {
|
|||
assert_eq!(status_code, 400);
|
||||
assert_eq!(
|
||||
response["message"],
|
||||
json!(
|
||||
r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."#
|
||||
)
|
||||
json!("The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`.")
|
||||
);
|
||||
assert_eq!(response["code"], json!("malformed_payload"));
|
||||
assert_eq!(response["type"], json!("invalid_request"));
|
||||
|
@ -1020,7 +1017,7 @@ async fn add_documents_invalid_geo_field() {
|
|||
index.wait_task(2).await;
|
||||
let (response, code) = index.get_task(2).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -1099,3 +1096,62 @@ async fn add_documents_with_primary_key_twice() {
|
|||
let (response, _code) = index.get_task(1).await;
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn batch_several_documents_addition() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let mut documents: Vec<_> = (0..150usize)
|
||||
.into_iter()
|
||||
.map(|id| {
|
||||
json!(
|
||||
{
|
||||
"id": id,
|
||||
"title": "foo",
|
||||
"desc": "bar"
|
||||
}
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
documents[100] = json!({"title": "error", "desc": "error"});
|
||||
|
||||
// enqueue batch of documents
|
||||
let mut waiter = Vec::new();
|
||||
for chunk in documents.chunks(30) {
|
||||
waiter.push(index.add_documents(json!(chunk), Some("id")));
|
||||
}
|
||||
|
||||
// wait first batch of documents to finish
|
||||
futures::future::join_all(waiter).await;
|
||||
index.wait_task(4).await;
|
||||
|
||||
// run a second completely failing batch
|
||||
documents[40] = json!({"title": "error", "desc": "error"});
|
||||
documents[70] = json!({"title": "error", "desc": "error"});
|
||||
documents[130] = json!({"title": "error", "desc": "error"});
|
||||
let mut waiter = Vec::new();
|
||||
for chunk in documents.chunks(30) {
|
||||
waiter.push(index.add_documents(json!(chunk), Some("id")));
|
||||
}
|
||||
// wait second batch of documents to finish
|
||||
futures::future::join_all(waiter).await;
|
||||
index.wait_task(9).await;
|
||||
|
||||
let (response, _code) = index.filtered_tasks(&[], &["failed"]).await;
|
||||
|
||||
// Check if only the 6th task failed
|
||||
println!("{}", &response);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 5);
|
||||
|
||||
// Check if there are exactly 120 documents (150 - 30) in the index;
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
limit: Some(200),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
assert_eq!(code, 200, "failed with `{}`", response);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 120);
|
||||
}
|
||||
|
|
|
@ -708,9 +708,7 @@ async fn faceting_max_values_per_facet() {
|
|||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let numbers = dbg!(&response)["facetDistribution"]["number"]
|
||||
.as_object()
|
||||
.unwrap();
|
||||
let numbers = &response["facetDistribution"]["number"].as_object().unwrap();
|
||||
assert_eq!(numbers.len(), 10_000);
|
||||
},
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue