diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 612a2cdb6..164d68582 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -279,6 +279,81 @@ async fn add_csv_document() { "###); } +#[actix_rt::test] +async fn add_csv_document_with_types() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id:number,name:string,race:string,age:number,cute:boolean +0,jean,bernese mountain,2.5,true +1,,,, +2,lilou,pug,-2,false"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "pets", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 0, + "indexUid": "pets", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 3, + "indexedDocuments": 3 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "#id": 0, + "name": "jean", + "race": "bernese mountain", + "age": 2.5, + "cute": true + }, + { + "#id": 1, + "name": null, + "race": null, + "age": null, + "cute": null + }, + { + "#id": 2, + "name": "lilou", + "race": "pug", + "age": -2, + "cute": false + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); +} + #[actix_rt::test] async fn add_csv_document_with_custom_delimiter() { let server = Server::new().await; @@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() { "###); } +#[actix_rt::test] +async fn add_csv_document_with_types_error() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id:number,a:boolean,b:number +0,doggo,1"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.", + "code": "malformed_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#malformed_payload" + } + "###); + + let document = "#id:number,a:boolean,b:number +0,true,doggo"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.", + "code": "malformed_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#malformed_payload" + } + "###); +} + /// any other content-type is must be refused #[actix_rt::test] async fn error_add_documents_test_bad_content_types() { diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index 1fa59168e..ace9340d7 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -116,12 +116,13 @@ impl DocumentsBatchBuilder { let value = &record[*i]; match type_ { AllowedType::Number => { - if value.trim().is_empty() { + let trimmed_value = value.trim(); + if trimmed_value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; - } else if let Ok(integer) = value.trim().parse::() { + } else if let Ok(integer) = trimmed_value.parse::() { to_writer(&mut self.value_buffer, &integer)?; } else { - match value.trim().parse::() { + match trimmed_value.parse::() { Ok(float) => { to_writer(&mut self.value_buffer, &float)?; } @@ -135,6 +136,25 @@ impl DocumentsBatchBuilder { } } } + AllowedType::Boolean => { + let trimmed_value = value.trim(); + if trimmed_value.is_empty() { + to_writer(&mut self.value_buffer, &Value::Null)?; + } else { + match trimmed_value.parse::() { + Ok(bool) => { + to_writer(&mut self.value_buffer, &bool)?; + } + Err(error) => { + return Err(Error::ParseBool { + error, + line, + value: value.to_string(), + }); + } + } + } + } AllowedType::String => { if value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; @@ -173,6 +193,7 @@ impl DocumentsBatchBuilder { #[derive(Debug)] enum AllowedType { String, + Boolean, Number, } @@ -181,6 +202,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) { match header.rsplit_once(':') { Some((field_name, field_type)) => match field_type { "string" => (field_name, AllowedType::String), + "boolean" => (field_name, AllowedType::Boolean), "number" => (field_name, AllowedType::Number), // if the pattern isn't reconized, we keep the whole field. _otherwise => (header, AllowedType::String), diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index da3a07942..67b99db9a 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -90,6 +90,7 @@ impl DocumentsBatchIndex { #[derive(Debug)] pub enum Error { ParseFloat { error: std::num::ParseFloatError, line: usize, value: String }, + ParseBool { error: std::str::ParseBoolError, line: usize, value: String }, InvalidDocumentFormat, InvalidEnrichedData, InvalidUtf8(Utf8Error), @@ -136,6 +137,9 @@ impl fmt::Display for Error { Error::ParseFloat { error, line, value } => { write!(f, "Error parsing number {:?} at line {}: {}", value, line, error) } + Error::ParseBool { error, line, value } => { + write!(f, "Error parsing boolean {:?} at line {}: {}", value, line, error) + } Error::InvalidDocumentFormat => { f.write_str("Invalid document addition format, missing the documents batch index.") } @@ -274,6 +278,19 @@ mod test { ]); } + #[test] + fn csv_types_dont_panic() { + let csv1_content = + "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5"; + let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); + + let mut builder = DocumentsBatchBuilder::new(Vec::new()); + builder.append_csv(csv1).unwrap(); + let vector = builder.into_inner().unwrap(); + + DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); + } + #[test] fn out_of_order_csv_fields() { let csv1_content = "id:number,b\n1,0";