mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Merge #3576
3576: Add boolean support for csv documents r=irevoire a=irevoire Fixes https://github.com/meilisearch/meilisearch/issues/3572 ## What does this PR do? Add support for the boolean types in csv documents. The type definition is `boolean` and the possible values are - `true` for true - `false` for false - ` ` for null Here is an example: ```csv #id,cute:boolean 0,true 1,false 2, ``` Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
70c906d4b4
@ -279,6 +279,81 @@ async fn add_csv_document() {
|
|||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn add_csv_document_with_types() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("pets");
|
||||||
|
|
||||||
|
let document = "#id:number,name:string,race:string,age:number,cute:boolean
|
||||||
|
0,jean,bernese mountain,2.5,true
|
||||||
|
1,,,,
|
||||||
|
2,lilou,pug,-2,false";
|
||||||
|
|
||||||
|
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||||
|
{
|
||||||
|
"taskUid": 0,
|
||||||
|
"indexUid": "pets",
|
||||||
|
"status": "enqueued",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"enqueuedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||||
|
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||||
|
{
|
||||||
|
"uid": 0,
|
||||||
|
"indexUid": "pets",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 3,
|
||||||
|
"indexedDocuments": 3
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"#id": 0,
|
||||||
|
"name": "jean",
|
||||||
|
"race": "bernese mountain",
|
||||||
|
"age": 2.5,
|
||||||
|
"cute": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#id": 1,
|
||||||
|
"name": null,
|
||||||
|
"race": null,
|
||||||
|
"age": null,
|
||||||
|
"cute": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"#id": 2,
|
||||||
|
"name": "lilou",
|
||||||
|
"race": "pug",
|
||||||
|
"age": -2,
|
||||||
|
"cute": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 3
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn add_csv_document_with_custom_delimiter() {
|
async fn add_csv_document_with_custom_delimiter() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() {
|
|||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn add_csv_document_with_types_error() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("pets");
|
||||||
|
|
||||||
|
let document = "#id:number,a:boolean,b:number
|
||||||
|
0,doggo,1";
|
||||||
|
|
||||||
|
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||||
|
{
|
||||||
|
"message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.",
|
||||||
|
"code": "malformed_payload",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let document = "#id:number,a:boolean,b:number
|
||||||
|
0,true,doggo";
|
||||||
|
|
||||||
|
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||||
|
{
|
||||||
|
"message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.",
|
||||||
|
"code": "malformed_payload",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
/// any other content-type is must be refused
|
/// any other content-type is must be refused
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn error_add_documents_test_bad_content_types() {
|
async fn error_add_documents_test_bad_content_types() {
|
||||||
|
@ -114,14 +114,15 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
|||||||
self.value_buffer.clear();
|
self.value_buffer.clear();
|
||||||
|
|
||||||
let value = &record[*i];
|
let value = &record[*i];
|
||||||
|
let trimmed_value = value.trim();
|
||||||
match type_ {
|
match type_ {
|
||||||
AllowedType::Number => {
|
AllowedType::Number => {
|
||||||
if value.trim().is_empty() {
|
if trimmed_value.is_empty() {
|
||||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||||
} else if let Ok(integer) = value.trim().parse::<i64>() {
|
} else if let Ok(integer) = trimmed_value.parse::<i64>() {
|
||||||
to_writer(&mut self.value_buffer, &integer)?;
|
to_writer(&mut self.value_buffer, &integer)?;
|
||||||
} else {
|
} else {
|
||||||
match value.trim().parse::<f64>() {
|
match trimmed_value.parse::<f64>() {
|
||||||
Ok(float) => {
|
Ok(float) => {
|
||||||
to_writer(&mut self.value_buffer, &float)?;
|
to_writer(&mut self.value_buffer, &float)?;
|
||||||
}
|
}
|
||||||
@ -135,6 +136,24 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
AllowedType::Boolean => {
|
||||||
|
if trimmed_value.is_empty() {
|
||||||
|
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||||
|
} else {
|
||||||
|
match trimmed_value.parse::<bool>() {
|
||||||
|
Ok(bool) => {
|
||||||
|
to_writer(&mut self.value_buffer, &bool)?;
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
return Err(Error::ParseBool {
|
||||||
|
error,
|
||||||
|
line,
|
||||||
|
value: value.to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
AllowedType::String => {
|
AllowedType::String => {
|
||||||
if value.is_empty() {
|
if value.is_empty() {
|
||||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||||
@ -173,6 +192,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum AllowedType {
|
enum AllowedType {
|
||||||
String,
|
String,
|
||||||
|
Boolean,
|
||||||
Number,
|
Number,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,6 +201,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
|
|||||||
match header.rsplit_once(':') {
|
match header.rsplit_once(':') {
|
||||||
Some((field_name, field_type)) => match field_type {
|
Some((field_name, field_type)) => match field_type {
|
||||||
"string" => (field_name, AllowedType::String),
|
"string" => (field_name, AllowedType::String),
|
||||||
|
"boolean" => (field_name, AllowedType::Boolean),
|
||||||
"number" => (field_name, AllowedType::Number),
|
"number" => (field_name, AllowedType::Number),
|
||||||
// if the pattern isn't reconized, we keep the whole field.
|
// if the pattern isn't reconized, we keep the whole field.
|
||||||
_otherwise => (header, AllowedType::String),
|
_otherwise => (header, AllowedType::String),
|
||||||
|
@ -3,7 +3,7 @@ mod enriched;
|
|||||||
mod reader;
|
mod reader;
|
||||||
mod serde_impl;
|
mod serde_impl;
|
||||||
|
|
||||||
use std::fmt::{self, Debug};
|
use std::fmt::Debug;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::str::Utf8Error;
|
use std::str::Utf8Error;
|
||||||
|
|
||||||
@ -87,71 +87,30 @@ impl DocumentsBatchIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
|
#[error("Error parsing number {value:?} at line {line}: {error}")]
|
||||||
ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
|
ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
|
||||||
|
#[error("Error parsing boolean {value:?} at line {line}: {error}")]
|
||||||
|
ParseBool { error: std::str::ParseBoolError, line: usize, value: String },
|
||||||
|
#[error("Invalid document addition format, missing the documents batch index.")]
|
||||||
InvalidDocumentFormat,
|
InvalidDocumentFormat,
|
||||||
|
#[error("Invalid enriched data.")]
|
||||||
InvalidEnrichedData,
|
InvalidEnrichedData,
|
||||||
InvalidUtf8(Utf8Error),
|
#[error(transparent)]
|
||||||
Csv(csv::Error),
|
InvalidUtf8(#[from] Utf8Error),
|
||||||
Json(serde_json::Error),
|
#[error(transparent)]
|
||||||
|
Csv(#[from] csv::Error),
|
||||||
|
#[error(transparent)]
|
||||||
|
Json(#[from] serde_json::Error),
|
||||||
|
#[error(transparent)]
|
||||||
Serialize(serde_json::Error),
|
Serialize(serde_json::Error),
|
||||||
Grenad(grenad::Error),
|
#[error(transparent)]
|
||||||
Io(io::Error),
|
Grenad(#[from] grenad::Error),
|
||||||
|
#[error(transparent)]
|
||||||
|
Io(#[from] io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<csv::Error> for Error {
|
|
||||||
fn from(e: csv::Error) -> Self {
|
|
||||||
Self::Csv(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<io::Error> for Error {
|
|
||||||
fn from(other: io::Error) -> Self {
|
|
||||||
Self::Io(other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<serde_json::Error> for Error {
|
|
||||||
fn from(other: serde_json::Error) -> Self {
|
|
||||||
Self::Json(other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<grenad::Error> for Error {
|
|
||||||
fn from(other: grenad::Error) -> Self {
|
|
||||||
Self::Grenad(other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Utf8Error> for Error {
|
|
||||||
fn from(other: Utf8Error) -> Self {
|
|
||||||
Self::InvalidUtf8(other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for Error {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
Error::ParseFloat { error, line, value } => {
|
|
||||||
write!(f, "Error parsing number {:?} at line {}: {}", value, line, error)
|
|
||||||
}
|
|
||||||
Error::InvalidDocumentFormat => {
|
|
||||||
f.write_str("Invalid document addition format, missing the documents batch index.")
|
|
||||||
}
|
|
||||||
Error::InvalidEnrichedData => f.write_str("Invalid enriched data."),
|
|
||||||
Error::InvalidUtf8(e) => write!(f, "{}", e),
|
|
||||||
Error::Io(e) => write!(f, "{}", e),
|
|
||||||
Error::Serialize(e) => write!(f, "{}", e),
|
|
||||||
Error::Grenad(e) => write!(f, "{}", e),
|
|
||||||
Error::Csv(e) => write!(f, "{}", e),
|
|
||||||
Error::Json(e) => write!(f, "{}", e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for Error {}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
||||||
let documents = match json {
|
let documents = match json {
|
||||||
@ -274,6 +233,19 @@ mod test {
|
|||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn csv_types_dont_panic() {
|
||||||
|
let csv1_content =
|
||||||
|
"id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5";
|
||||||
|
let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
|
||||||
|
|
||||||
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
builder.append_csv(csv1).unwrap();
|
||||||
|
let vector = builder.into_inner().unwrap();
|
||||||
|
|
||||||
|
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn out_of_order_csv_fields() {
|
fn out_of_order_csv_fields() {
|
||||||
let csv1_content = "id:number,b\n1,0";
|
let csv1_content = "id:number,b\n1,0";
|
||||||
|
Loading…
Reference in New Issue
Block a user