From 8c074f5028c70c8a8757dd287ac3af62946b7624 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 16 Feb 2023 13:12:18 +0100 Subject: [PATCH 1/5] implements the csv delimiter without tests Co-authored-by: Maxi Barmetler --- Cargo.lock | 8 +-- meilisearch-types/Cargo.toml | 2 +- meilisearch-types/src/document_formats.rs | 10 ++-- meilisearch-types/src/error.rs | 1 + meilisearch/Cargo.toml | 2 +- meilisearch/src/error.rs | 3 + meilisearch/src/routes/indexes/documents.rs | 66 ++++++++++++++++----- milli/Cargo.toml | 2 +- 8 files changed, 66 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 28ac2ca63..eb3b27aa7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1113,9 +1113,9 @@ dependencies = [ [[package]] name = "deserr" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eee2844f21cf7fb5693aae1fb8f1658127acfdb2fc072167d68a9152584ae64" +checksum = "c71c14985c842bf1e520b1ebcd22daff6aeece32f510e11f063cecf9b308c04b" dependencies = [ "actix-http", "actix-utils", @@ -1130,9 +1130,9 @@ dependencies = [ [[package]] name = "deserr-internal" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c27246f8ca9eeba9dd70d614b664dc43b529251ed7bd9e633131010d340da4b9" +checksum = "cae1c51b191528c9e4e5d6cff671de94f61fcda1c206cc891251e0cf438c941a" dependencies = [ "convert_case 0.5.0", "proc-macro2", diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 3ed9464d3..13298b092 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -9,7 +9,7 @@ actix-web = { version = "4.2.1", default-features = false } anyhow = "1.0.65" convert_case = "0.6.0" csv = "1.1.6" -deserr = "0.4.1" +deserr = "0.5.0" either = { version = "1.6.1", features = ["serde"] } enum-iterator = "1.1.3" file-store = { path = "../file-store" } diff --git a/meilisearch-types/src/document_formats.rs b/meilisearch-types/src/document_formats.rs index 7a98dc0a3..206cfdcc0 100644 --- a/meilisearch-types/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -19,7 +19,7 @@ type Result = std::result::Result; pub enum PayloadType { Ndjson, Json, - Csv, + Csv(u8), } impl fmt::Display for PayloadType { @@ -27,7 +27,7 @@ impl fmt::Display for PayloadType { match self { PayloadType::Ndjson => f.write_str("ndjson"), PayloadType::Json => f.write_str("json"), - PayloadType::Csv => f.write_str("csv"), + PayloadType::Csv(_) => f.write_str("csv"), } } } @@ -105,11 +105,11 @@ impl ErrorCode for DocumentFormatError { } /// Reads CSV from input and write an obkv batch to writer. -pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result { +pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result { let mut builder = DocumentsBatchBuilder::new(writer); let mmap = unsafe { MmapOptions::new().map(file)? }; - let csv = csv::Reader::from_reader(mmap.as_ref()); - builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?; + let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref()); + builder.append_csv(csv).map_err(|e| (PayloadType::Csv(delimiter), e))?; let count = builder.documents_count(); let _ = builder.into_inner().map_err(DocumentFormatError::Io)?; diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 46368c29e..f1ce0f217 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -220,6 +220,7 @@ InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; +InvalidIndexCsvDelimiter , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index b4a87c632..bc120719a 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -19,7 +19,7 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", " bytes = "1.2.1" clap = { version = "4.0.9", features = ["derive", "env"] } crossbeam-channel = "0.5.6" -deserr = "0.4.1" +deserr = "0.5.0" dump = { path = "../dump" } either = "1.8.0" env_logger = "0.9.1" diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index fcfe4b7fc..850803a07 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -11,6 +11,8 @@ pub enum MeilisearchHttpError { #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", "))] MissingContentType(Vec), + #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] + CsvDelimiterWithWrongContentType(String), #[error( "The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}", .1.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") @@ -52,6 +54,7 @@ impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, + MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound, diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index fb75cfcf8..67d3f6a31 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -10,10 +10,10 @@ use futures::StreamExt; use index_scheduler::IndexScheduler; use log::debug; use meilisearch_types::deserr::query_params::Param; -use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; +use meilisearch_types::deserr::DeserrQueryParamError; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::error::deserr_codes::*; -use meilisearch_types::error::ResponseError; +use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::IndexDocumentsMethod; @@ -67,7 +67,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") .route(web::get().to(SeqHandler(get_all_documents))) - .route(web::post().to(SeqHandler(add_documents))) + .route(web::post().to(SeqHandler(replace_documents))) .route(web::put().to(SeqHandler(update_documents))) .route(web::delete().to(SeqHandler(clear_all_documents))), ) @@ -156,16 +156,31 @@ pub async fn get_all_documents( } #[derive(Deserialize, Debug, Deserr)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)] pub struct UpdateDocumentsQuery { - #[deserr(default, error = DeserrJsonError)] + #[deserr(default, error = DeserrQueryParamError)] pub primary_key: Option, + #[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError, error = DeserrQueryParamError)] + pub csv_delimiter: Option, } -pub async fn add_documents( +fn from_char_csv_delimiter( + c: char, +) -> Result, DeserrQueryParamError> { + if c.is_ascii() { + Ok(Some(c as u8)) + } else { + Err(DeserrQueryParamError::new( + format!("csv delimiter must be an ascii character. Found: `{}`", c), + Code::InvalidIndexCsvDelimiter, + )) + } +} + +pub async fn replace_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, - params: AwebQueryParameter, + params: AwebQueryParameter, body: Payload, req: HttpRequest, analytics: web::Data, @@ -183,6 +198,7 @@ pub async fn add_documents( index_scheduler, index_uid, params.primary_key, + params.csv_delimiter, body, IndexDocumentsMethod::ReplaceDocuments, allow_index_creation, @@ -195,7 +211,7 @@ pub async fn add_documents( pub async fn update_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, - params: AwebQueryParameter, + params: AwebQueryParameter, body: Payload, req: HttpRequest, analytics: web::Data, @@ -203,6 +219,7 @@ pub async fn update_documents( let index_uid = IndexUid::try_from(index_uid.into_inner())?; debug!("called with params: {:?}", params); + let params = params.into_inner(); analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); @@ -211,7 +228,8 @@ pub async fn update_documents( extract_mime_type(&req)?, index_scheduler, index_uid, - params.into_inner().primary_key, + params.primary_key, + params.csv_delimiter, body, IndexDocumentsMethod::UpdateDocuments, allow_index_creation, @@ -226,21 +244,37 @@ async fn document_addition( index_scheduler: GuardedData, Data>, index_uid: IndexUid, primary_key: Option, + csv_delimiter: Option, mut body: Payload, method: IndexDocumentsMethod, allow_index_creation: bool, ) -> Result { - let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) { - Some(("application", "json")) => PayloadType::Json, - Some(("application", "x-ndjson")) => PayloadType::Ndjson, - Some(("text", "csv")) => PayloadType::Csv, - Some((type_, subtype)) => { + let format = match ( + mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())), + csv_delimiter, + ) { + (Some(("application", "json")), None) => PayloadType::Json, + (Some(("application", "x-ndjson")), None) => PayloadType::Ndjson, + (Some(("text", "csv")), None) => PayloadType::Csv(b','), + (Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv(delimiter), + + (Some(("application", "json")), Some(_)) => { + return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from( + "application/json", + ))) + } + (Some(("application", "x-ndjson")), Some(_)) => { + return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from( + "application/x-ndjson", + ))) + } + (Some((type_, subtype)), _) => { return Err(MeilisearchHttpError::InvalidContentType( format!("{}/{}", type_, subtype), ACCEPTED_CONTENT_TYPE.clone(), )) } - None => { + (None, _) => { return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())) } }; @@ -285,7 +319,7 @@ async fn document_addition( let documents_count = tokio::task::spawn_blocking(move || { let documents_count = match format { PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?, - PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?, + PayloadType::Csv(delim) => read_csv(&read_file, update_file.as_file_mut(), delim)?, PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?, }; // we NEED to persist the file here because we moved the `udpate_file` in another task. diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 1752cb3d9..4c76e9034 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -12,7 +12,7 @@ byteorder = "1.4.3" charabia = { version = "0.7.0", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.6" -deserr = "0.4.1" +deserr = "0.5.0" either = "1.8.0" flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" From 52686da028b9bd79fdb82f8bdc4bbc8ab82ddaa1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 16 Feb 2023 13:59:58 +0100 Subject: [PATCH 2/5] test various error on the document ressource --- meilisearch/tests/common/index.rs | 34 ++- meilisearch/tests/common/service.rs | 28 ++- meilisearch/tests/documents/errors.rs | 315 ++++++++++++++++++++++++++ 3 files changed, 369 insertions(+), 8 deletions(-) diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index c127af921..96ac5ce2e 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -30,7 +30,7 @@ impl Index<'_> { .post_str( url, include_str!("../assets/test_set.json"), - ("content-type", "application/json"), + vec![("content-type", "application/json")], ) .await; assert_eq!(code, 202); @@ -46,7 +46,7 @@ impl Index<'_> { .post_str( url, include_str!("../assets/test_set.ndjson"), - ("content-type", "application/x-ndjson"), + vec![("content-type", "application/x-ndjson")], ) .await; assert_eq!(code, 202); @@ -96,6 +96,21 @@ impl Index<'_> { self.service.post_encoded(url, documents, self.encoder).await } + pub async fn raw_add_documents( + &self, + payload: &str, + content_type: Option<&str>, + query_parameter: &str, + ) -> (Value, StatusCode) { + let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter); + + if let Some(content_type) = content_type { + self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await + } else { + self.service.post_str(url, payload, Vec::new()).await + } + } + pub async fn update_documents( &self, documents: Value, @@ -110,6 +125,21 @@ impl Index<'_> { self.service.put_encoded(url, documents, self.encoder).await } + pub async fn raw_update_documents( + &self, + payload: &str, + content_type: Option<&str>, + query_parameter: &str, + ) -> (Value, StatusCode) { + let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter); + + if let Some(content_type) = content_type { + self.service.put_str(url, payload, vec![("Content-Type", content_type)]).await + } else { + self.service.put_str(url, payload, Vec::new()).await + } + } + pub async fn wait_task(&self, update_id: u64) -> Value { // try several times to get status, or panic to not wait forever let url = format!("/tasks/{}", update_id); diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index f1b800753..c6ac65418 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -34,17 +34,18 @@ impl Service { self.request(req).await } - /// Send a test post request from a text body, with a `content-type:application/json` header. + /// Send a test post request from a text body. pub async fn post_str( &self, url: impl AsRef, body: impl AsRef, - header: (&str, &str), + headers: Vec<(&str, &str)>, ) -> (Value, StatusCode) { - let req = test::TestRequest::post() - .uri(url.as_ref()) - .set_payload(body.as_ref().to_string()) - .insert_header(header); + let mut req = + test::TestRequest::post().uri(url.as_ref()).set_payload(body.as_ref().to_string()); + for header in headers { + req = req.insert_header(header); + } self.request(req).await } @@ -57,6 +58,21 @@ impl Service { self.put_encoded(url, body, Encoder::Plain).await } + /// Send a test put request from a text body. + pub async fn put_str( + &self, + url: impl AsRef, + body: impl AsRef, + headers: Vec<(&str, &str)>, + ) -> (Value, StatusCode) { + let mut req = + test::TestRequest::put().uri(url.as_ref()).set_payload(body.as_ref().to_string()); + for header in headers { + req = req.insert_header(header); + } + self.request(req).await + } + pub async fn put_encoded( &self, url: impl AsRef, diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index ffec01062..fcccf8e15 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -1,5 +1,6 @@ use meili_snap::*; use serde_json::json; +use urlencoding::encode; use crate::common::Server; @@ -97,3 +98,317 @@ async fn delete_documents_batch() { } "###); } + +#[actix_rt::test] +async fn replace_documents_missing_payload() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.raw_add_documents("", Some("application/json"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "A json payload is missing.", + "code": "missing_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_payload" + } + "###); + + let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "A ndjson payload is missing.", + "code": "missing_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_payload" + } + "###); + + let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "A csv payload is missing.", + "code": "missing_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_payload" + } + "###); +} + +#[actix_rt::test] +async fn update_documents_missing_payload() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.raw_update_documents("", Some("application/json"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "A json payload is missing.", + "code": "missing_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_payload" + } + "###); + + let (response, code) = index.raw_update_documents("", Some("application/x-ndjson"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "A ndjson payload is missing.", + "code": "missing_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_payload" + } + "###); + + let (response, code) = index.raw_update_documents("", Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "A csv payload is missing.", + "code": "missing_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_payload" + } + "###); +} + +#[actix_rt::test] +async fn replace_documents_missing_content_type() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.raw_add_documents("", None, "").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`", + "code": "missing_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_content_type" + } + "###); + + // even with a csv delimiter specified this error is triggered first + let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`", + "code": "missing_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_content_type" + } + "###); +} + +#[actix_rt::test] +async fn update_documents_missing_content_type() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.raw_update_documents("", None, "").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`", + "code": "missing_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_content_type" + } + "###); + + // even with a csv delimiter specified this error is triggered first + let (response, code) = index.raw_update_documents("", None, "?csvDelimiter=;").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`", + "code": "missing_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_content_type" + } + "###); +} + +#[actix_rt::test] +async fn replace_documents_bad_content_type() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.raw_add_documents("", Some("doggo"), "").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`", + "code": "invalid_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_content_type" + } + "###); +} + +#[actix_rt::test] +async fn update_documents_bad_content_type() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.raw_update_documents("", Some("doggo"), "").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`", + "code": "invalid_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_content_type" + } + "###); +} + +#[actix_rt::test] +async fn replace_documents_bad_csv_delimiter() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = + index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string", + "code": "invalid_index_csv_delimiter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter" + } + "###); + + let (response, code) = + index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`", + "code": "invalid_index_csv_delimiter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter" + } + "###); + + let (response, code) = + index.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "csv delimiter must be an ascii character. Found: `🍰`", + "code": "invalid_index_csv_delimiter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter" + } + "###); +} + +#[actix_rt::test] +async fn update_documents_bad_csv_delimiter() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = + index.raw_update_documents("", Some("application/json"), "?csvDelimiter").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string", + "code": "invalid_index_csv_delimiter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter" + } + "###); + + let (response, code) = + index.raw_update_documents("", Some("application/json"), "?csvDelimiter=doggo").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`", + "code": "invalid_index_csv_delimiter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter" + } + "###); + + let (response, code) = + index.raw_update_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "csv delimiter must be an ascii character. Found: `🍰`", + "code": "invalid_index_csv_delimiter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter" + } + "###); +} + +#[actix_rt::test] +async fn replace_documents_csv_delimiter_with_bad_content_type() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = + index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.", + "code": "invalid_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_content_type" + } + "###); + + let (response, code) = + index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.", + "code": "invalid_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_content_type" + } + "###); +} + +#[actix_rt::test] +async fn update_documents_csv_delimiter_with_bad_content_type() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = + index.raw_update_documents("", Some("application/json"), "?csvDelimiter=a").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.", + "code": "invalid_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_content_type" + } + "###); + + let (response, code) = + index.raw_update_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await; + snapshot!(code, @"415 Unsupported Media Type"); + snapshot!(json_string!(response), @r###" + { + "message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.", + "code": "invalid_content_type", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_content_type" + } + "###); +} From 5367d8f05aea361fd9279c7aeb4fb1156e497dda Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 16 Feb 2023 14:38:03 +0100 Subject: [PATCH 3/5] add two tests on the indexing of csvs --- meilisearch/tests/documents/add_documents.rs | 127 +++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index e553dcacd..56c0ffac2 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -216,6 +216,133 @@ async fn add_single_document_with_every_encoding() { } } +#[actix_rt::test] +async fn add_csv_document() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id,name,race +0,jean,bernese mountain +1,jorts,orange cat"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "pets", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 0, + "indexUid": "pets", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 2, + "indexedDocuments": 2 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "#id": "0", + "name": "jean", + "race": "bernese mountain" + }, + { + "#id": "1", + "name": "jorts", + "race": "orange cat" + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + +#[actix_rt::test] +async fn add_csv_document_with_custom_delimiter() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id|name|race +0|jean|bernese mountain +1|jorts|orange cat"; + + let (response, code) = + index.raw_update_documents(document, Some("text/csv"), "?csvDelimiter=|").await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "pets", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 0, + "indexUid": "pets", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 2, + "indexedDocuments": 2 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "#id": "0", + "name": "jean", + "race": "bernese mountain" + }, + { + "#id": "1", + "name": "jorts", + "race": "orange cat" + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + /// any other content-type is must be refused #[actix_rt::test] async fn error_add_documents_test_bad_content_types() { From e79f6f87f6482079df7026751483821be03f40c2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 16 Feb 2023 18:00:40 +0100 Subject: [PATCH 4/5] make cargo fmt&clippy happy --- meilisearch/src/routes/indexes/documents.rs | 1 + meilisearch/tests/documents/errors.rs | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 67d3f6a31..529635704 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -239,6 +239,7 @@ pub async fn update_documents( Ok(HttpResponse::Accepted().json(task)) } +#[allow(clippy::too_many_arguments)] async fn document_addition( mime_type: Option, index_scheduler: GuardedData, Data>, diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index fcccf8e15..7ee3132d6 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -298,8 +298,9 @@ async fn replace_documents_bad_csv_delimiter() { } "###); - let (response, code) = - index.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))).await; + let (response, code) = index + .raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))) + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -340,8 +341,13 @@ async fn update_documents_bad_csv_delimiter() { } "###); - let (response, code) = - index.raw_update_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))).await; + let (response, code) = index + .raw_update_documents( + "", + Some("application/json"), + &format!("?csvDelimiter={}", encode("🍰")), + ) + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { From 1479050f7ad01a32c8735100040b26032fddefe8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 20 Feb 2023 14:53:37 +0100 Subject: [PATCH 5/5] apply review suggestions --- meilisearch-types/src/document_formats.rs | 6 +++--- meilisearch/src/routes/indexes/documents.rs | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/meilisearch-types/src/document_formats.rs b/meilisearch-types/src/document_formats.rs index 206cfdcc0..0b5f96ec0 100644 --- a/meilisearch-types/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -19,7 +19,7 @@ type Result = std::result::Result; pub enum PayloadType { Ndjson, Json, - Csv(u8), + Csv { delimiter: u8 }, } impl fmt::Display for PayloadType { @@ -27,7 +27,7 @@ impl fmt::Display for PayloadType { match self { PayloadType::Ndjson => f.write_str("ndjson"), PayloadType::Json => f.write_str("json"), - PayloadType::Csv(_) => f.write_str("csv"), + PayloadType::Csv { .. } => f.write_str("csv"), } } } @@ -109,7 +109,7 @@ pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result let mut builder = DocumentsBatchBuilder::new(writer); let mmap = unsafe { MmapOptions::new().map(file)? }; let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref()); - builder.append_csv(csv).map_err(|e| (PayloadType::Csv(delimiter), e))?; + builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?; let count = builder.documents_count(); let _ = builder.into_inner().map_err(DocumentFormatError::Io)?; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 529635704..0c649ea5d 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -256,8 +256,8 @@ async fn document_addition( ) { (Some(("application", "json")), None) => PayloadType::Json, (Some(("application", "x-ndjson")), None) => PayloadType::Ndjson, - (Some(("text", "csv")), None) => PayloadType::Csv(b','), - (Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv(delimiter), + (Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' }, + (Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter }, (Some(("application", "json")), Some(_)) => { return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from( @@ -320,7 +320,9 @@ async fn document_addition( let documents_count = tokio::task::spawn_blocking(move || { let documents_count = match format { PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?, - PayloadType::Csv(delim) => read_csv(&read_file, update_file.as_file_mut(), delim)?, + PayloadType::Csv { delimiter } => { + read_csv(&read_file, update_file.as_file_mut(), delimiter)? + } PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?, }; // we NEED to persist the file here because we moved the `udpate_file` in another task.