mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
move csv parsing to document_formats
This commit is contained in:
parent
2a14948123
commit
6e8a3fe8de
4 changed files with 384 additions and 414 deletions
|
@ -1,8 +1,7 @@
|
|||
use std::{
|
||||
fmt,
|
||||
io::{Read, Seek, Write},
|
||||
};
|
||||
use std::io::{self, Read, Result as IoResult, Seek, Write};
|
||||
use std::fmt;
|
||||
|
||||
use csv::{Reader as CsvReader, StringRecordsIntoIter};
|
||||
use milli::documents::DocumentBatchBuilder;
|
||||
use serde_json::{Deserializer, Map, Value};
|
||||
|
||||
|
@ -12,6 +11,7 @@ type Result<T> = std::result::Result<T, DocumentFormatError>;
|
|||
pub enum PayloadType {
|
||||
Jsonl,
|
||||
Json,
|
||||
Csv,
|
||||
}
|
||||
|
||||
impl fmt::Display for PayloadType {
|
||||
|
@ -19,6 +19,7 @@ impl fmt::Display for PayloadType {
|
|||
match self {
|
||||
PayloadType::Jsonl => write!(f, "ndjson"),
|
||||
PayloadType::Json => write!(f, "json"),
|
||||
PayloadType::Csv => write!(f, "csv"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -34,7 +35,7 @@ pub enum DocumentFormatError {
|
|||
),
|
||||
}
|
||||
|
||||
internal_error!(DocumentFormatError: milli::documents::Error);
|
||||
internal_error!(DocumentFormatError: milli::documents::Error, io::Error);
|
||||
|
||||
macro_rules! malformed {
|
||||
($type:path, $e:expr) => {
|
||||
|
@ -42,6 +43,20 @@ macro_rules! malformed {
|
|||
};
|
||||
}
|
||||
|
||||
pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
let mut builder = DocumentBatchBuilder::new(writer).unwrap();
|
||||
|
||||
let iter = CsvDocumentIter::from_reader(input)?;
|
||||
for doc in iter {
|
||||
let doc = doc?;
|
||||
builder.add_documents(doc).unwrap();
|
||||
}
|
||||
builder.finish().unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
/// read jsonl from input and write an obkv batch to writer.
|
||||
pub fn read_jsonl(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
let mut builder = DocumentBatchBuilder::new(writer)?;
|
||||
|
@ -68,3 +83,281 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
enum AllowedType {
|
||||
String,
|
||||
Number,
|
||||
}
|
||||
|
||||
fn parse_csv_header(header: &str) -> (String, AllowedType) {
|
||||
// if there are several separators we only split on the last one.
|
||||
match header.rsplit_once(':') {
|
||||
Some((field_name, field_type)) => match field_type {
|
||||
"string" => (field_name.to_string(), AllowedType::String),
|
||||
"number" => (field_name.to_string(), AllowedType::Number),
|
||||
// if the pattern isn't reconized, we keep the whole field.
|
||||
_otherwise => (header.to_string(), AllowedType::String),
|
||||
},
|
||||
None => (header.to_string(), AllowedType::String),
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CsvDocumentIter<R>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
documents: StringRecordsIntoIter<R>,
|
||||
headers: Vec<(String, AllowedType)>,
|
||||
}
|
||||
|
||||
impl<R: Read> CsvDocumentIter<R> {
|
||||
pub fn from_reader(reader: R) -> IoResult<Self> {
|
||||
let mut records = CsvReader::from_reader(reader);
|
||||
|
||||
let headers = records
|
||||
.headers()?
|
||||
.into_iter()
|
||||
.map(parse_csv_header)
|
||||
.collect();
|
||||
|
||||
Ok(Self {
|
||||
documents: records.into_records(),
|
||||
headers,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Iterator for CsvDocumentIter<R> {
|
||||
type Item = Result<Map<String, Value>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let csv_document = self.documents.next()?;
|
||||
|
||||
match csv_document {
|
||||
Ok(csv_document) => {
|
||||
let mut document = Map::new();
|
||||
|
||||
for ((field_name, field_type), value) in
|
||||
self.headers.iter().zip(csv_document.into_iter())
|
||||
{
|
||||
let parsed_value = (|| match field_type {
|
||||
AllowedType::Number => malformed!(PayloadType::Csv, value
|
||||
.parse::<f64>()
|
||||
.map(Value::from)),
|
||||
AllowedType::String => Ok(Value::String(value.to_string())),
|
||||
})();
|
||||
|
||||
match parsed_value {
|
||||
Ok(value) => drop(document.insert(field_name.to_string(), value)),
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
|
||||
Some(Ok(document))
|
||||
}
|
||||
Err(e) => Some(Err(DocumentFormatError::MalformedPayload(Box::new(e), PayloadType::Csv))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_csv_document() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coma_in_field() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United, States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United, States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quote_in_field() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United"" States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United\" States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integer_in_field() {
|
||||
let documents = r#"city,country,pop:number
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": 4628910.0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float_in_field() {
|
||||
let documents = r#"city,country,pop:number
|
||||
"Boston","United States","4628910.01""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": 4628910.01,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn several_colon_in_header() {
|
||||
let documents = r#"city:love:string,country:state,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city:love": "Boston",
|
||||
"country:state": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ending_by_colon_in_header() {
|
||||
let documents = r#"city:,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city:": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn starting_by_colon_in_header() {
|
||||
let documents = r#":city,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
":city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[ignore]
|
||||
#[test]
|
||||
fn starting_by_colon_in_header2() {
|
||||
let documents = r#":string,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(dbg!(csv_iter.next().unwrap()).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_colon_in_header() {
|
||||
let documents = r#"city::string,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city:": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_type_in_header() {
|
||||
let documents = r#"city,country:number,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(csv_iter.next().unwrap().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_column_count1() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United States","4628910", "too much""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(csv_iter.next().unwrap().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_column_count2() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United States""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(csv_iter.next().unwrap().is_err());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,282 +0,0 @@
|
|||
use super::error::{Result, UpdateLoopError};
|
||||
use std::io::{Read, Result as IoResult};
|
||||
|
||||
use csv::{Reader as CsvReader, StringRecordsIntoIter};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
enum AllowedType {
|
||||
String,
|
||||
Number,
|
||||
}
|
||||
|
||||
fn parse_csv_header(header: &str) -> (String, AllowedType) {
|
||||
// if there are several separators we only split on the last one.
|
||||
match header.rsplit_once(':') {
|
||||
Some((field_name, field_type)) => match field_type {
|
||||
"string" => (field_name.to_string(), AllowedType::String),
|
||||
"number" => (field_name.to_string(), AllowedType::Number),
|
||||
// if the pattern isn't reconized, we keep the whole field.
|
||||
_otherwise => (header.to_string(), AllowedType::String),
|
||||
},
|
||||
None => (header.to_string(), AllowedType::String),
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CsvDocumentIter<R>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
documents: StringRecordsIntoIter<R>,
|
||||
headers: Vec<(String, AllowedType)>,
|
||||
}
|
||||
|
||||
impl<R: Read> CsvDocumentIter<R> {
|
||||
pub fn from_reader(reader: R) -> IoResult<Self> {
|
||||
let mut records = CsvReader::from_reader(reader);
|
||||
|
||||
let headers = records
|
||||
.headers()?
|
||||
.into_iter()
|
||||
.map(parse_csv_header)
|
||||
.collect();
|
||||
|
||||
Ok(Self {
|
||||
documents: records.into_records(),
|
||||
headers,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Iterator for CsvDocumentIter<R> {
|
||||
type Item = Result<Map<String, Value>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let csv_document = self.documents.next()?;
|
||||
|
||||
match csv_document {
|
||||
Ok(csv_document) => {
|
||||
let mut document = Map::new();
|
||||
|
||||
for ((field_name, field_type), value) in
|
||||
self.headers.iter().zip(csv_document.into_iter())
|
||||
{
|
||||
let parsed_value = (|| match field_type {
|
||||
AllowedType::Number => value
|
||||
.parse::<f64>()
|
||||
.map(Value::from)
|
||||
.map_err(|e| UpdateLoopError::MalformedPayload(Box::new(e))),
|
||||
AllowedType::String => Ok(Value::String(value.to_string())),
|
||||
})();
|
||||
|
||||
match parsed_value {
|
||||
Ok(value) => drop(document.insert(field_name.to_string(), value)),
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
|
||||
Some(Ok(document))
|
||||
}
|
||||
Err(e) => Some(Err(UpdateLoopError::MalformedPayload(Box::new(e)))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_csv_document() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coma_in_field() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United, States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United, States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quote_in_field() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United"" States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United\" States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integer_in_field() {
|
||||
let documents = r#"city,country,pop:number
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": 4628910.0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn float_in_field() {
|
||||
let documents = r#"city,country,pop:number
|
||||
"Boston","United States","4628910.01""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": 4628910.01,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn several_double_dot_in_header() {
|
||||
let documents = r#"city:love:string,country:state,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city:love": "Boston",
|
||||
"country:state": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ending_by_double_dot_in_header() {
|
||||
let documents = r#"city:,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city:": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn starting_by_double_dot_in_header() {
|
||||
let documents = r#":city,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
":city": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn starting_by_double_dot_in_header2() {
|
||||
let documents = r#":string,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(csv_iter.next().unwrap().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_double_dot_in_header() {
|
||||
let documents = r#"city::string,country,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
Value::Object(csv_iter.next().unwrap().unwrap()),
|
||||
json!({
|
||||
"city:": "Boston",
|
||||
"country": "United States",
|
||||
"pop": "4628910",
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_type_in_header() {
|
||||
let documents = r#"city,country:number,pop
|
||||
"Boston","United States","4628910""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(csv_iter.next().unwrap().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_column_count1() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United States","4628910", "too much""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(csv_iter.next().unwrap().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_column_count2() {
|
||||
let documents = r#"city,country,pop
|
||||
"Boston","United States""#;
|
||||
|
||||
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
|
||||
|
||||
assert!(csv_iter.next().unwrap().is_err());
|
||||
}
|
||||
}
|
|
@ -1,10 +1,8 @@
|
|||
mod csv_documents_iter;
|
||||
pub mod error;
|
||||
mod message;
|
||||
pub mod status;
|
||||
pub mod store;
|
||||
|
||||
use crate::index_controller::updates::csv_documents_iter::CsvDocumentIter;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
@ -15,7 +13,6 @@ use async_stream::stream;
|
|||
use bytes::Bytes;
|
||||
use futures::{Stream, StreamExt};
|
||||
use log::trace;
|
||||
use milli::documents::DocumentBatchBuilder;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::mpsc;
|
||||
|
@ -24,13 +21,13 @@ use uuid::Uuid;
|
|||
use self::error::{Result, UpdateLoopError};
|
||||
pub use self::message::UpdateMsg;
|
||||
use self::store::{UpdateStore, UpdateStoreInfo};
|
||||
use crate::document_formats::read_json;
|
||||
use crate::document_formats::{read_csv, read_json};
|
||||
use crate::index::{Index, Settings, Unchecked};
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use status::UpdateStatus;
|
||||
|
||||
use super::index_resolver::HardStateIndexResolver;
|
||||
use super::{DocumentAdditionFormat, Payload, Update};
|
||||
use super::{DocumentAdditionFormat, Update};
|
||||
|
||||
pub type UpdateSender = mpsc::Sender<UpdateMsg>;
|
||||
|
||||
|
@ -198,6 +195,7 @@ impl UpdateLoop {
|
|||
tokio::task::spawn_blocking(move || -> Result<_> {
|
||||
match format {
|
||||
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
|
||||
}
|
||||
|
||||
update_file.persist()?;
|
||||
|
@ -225,26 +223,6 @@ impl UpdateLoop {
|
|||
Ok(status.into())
|
||||
}
|
||||
|
||||
async fn documents_from_csv(&self, payload: Payload) -> Result<Uuid> {
|
||||
let file_store = self.update_file_store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let (uuid, mut file) = file_store.new_update().unwrap();
|
||||
let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap();
|
||||
|
||||
let iter = CsvDocumentIter::from_reader(StreamReader::new(payload))?;
|
||||
for doc in iter {
|
||||
let doc = doc?;
|
||||
builder.add_documents(doc).unwrap();
|
||||
}
|
||||
builder.finish().unwrap();
|
||||
|
||||
file.persist();
|
||||
|
||||
Ok(uuid)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let update_store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue