mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Merge #3505
3505: Csv delimiter r=irevoire a=irevoire Fixes https://github.com/meilisearch/meilisearch/issues/3442 Closes https://github.com/meilisearch/meilisearch/pull/2803 Specified in https://github.com/meilisearch/specifications/pull/221 This PR is a reimplementation of https://github.com/meilisearch/meilisearch/pull/2803, on the new engine. Thanks for your idea and initial PR `@MixusMinimax;` sorry I couldn’t update/merge your PR. Way too many changes happened on the engine in the meantime. **Attention to reviewer**; I had to update deserr to implement the support of deserializing `char`s ------- It introduces four new error messages; - Invalid value in parameter csvDelimiter: expected a string of one character, but found an empty string - Invalid value in parameter csvDelimiter: expected a string of one character, but found the following string of 5 characters: doggo - csv delimiter must be an ascii character. Found: 🍰 - The Content-Type application/json does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type text/csv. And one error code; - `invalid_index_csv_delimiter` The `invalid_content_type` error code is now also used when we encounter the `csvDelimiter` query parameter with a non-csv content type. Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
1e9ac00800
12 changed files with 571 additions and 36 deletions
|
@ -15,7 +15,7 @@ actix-web = { version = "4.2.1", default-features = false }
|
|||
anyhow = "1.0.65"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.1.6"
|
||||
deserr = "0.4.1"
|
||||
deserr = "0.5.0"
|
||||
either = { version = "1.6.1", features = ["serde"] }
|
||||
enum-iterator = "1.1.3"
|
||||
file-store = { path = "../file-store" }
|
||||
|
|
|
@ -19,7 +19,7 @@ type Result<T> = std::result::Result<T, DocumentFormatError>;
|
|||
pub enum PayloadType {
|
||||
Ndjson,
|
||||
Json,
|
||||
Csv,
|
||||
Csv { delimiter: u8 },
|
||||
}
|
||||
|
||||
impl fmt::Display for PayloadType {
|
||||
|
@ -27,7 +27,7 @@ impl fmt::Display for PayloadType {
|
|||
match self {
|
||||
PayloadType::Ndjson => f.write_str("ndjson"),
|
||||
PayloadType::Json => f.write_str("json"),
|
||||
PayloadType::Csv => f.write_str("csv"),
|
||||
PayloadType::Csv { .. } => f.write_str("csv"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -105,11 +105,11 @@ impl ErrorCode for DocumentFormatError {
|
|||
}
|
||||
|
||||
/// Reads CSV from input and write an obkv batch to writer.
|
||||
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
|
||||
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
let mmap = unsafe { MmapOptions::new().map(file)? };
|
||||
let csv = csv::Reader::from_reader(mmap.as_ref());
|
||||
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
|
||||
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
|
||||
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
|
||||
|
||||
let count = builder.documents_count();
|
||||
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
|
||||
|
|
|
@ -220,6 +220,7 @@ InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
|
|||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue