From 53c79e85f2a0acd4559c11eb01f27914ca1c5ccb Mon Sep 17 00:00:00 2001 From: marin postma Date: Sun, 24 Oct 2021 15:39:56 +0200 Subject: [PATCH] document errors --- milli/src/documents/builder.rs | 20 +++++++------------ milli/src/documents/mod.rs | 30 +++++++++++++++++++++++++--- milli/src/documents/serde.rs | 36 ++++++++++++++++++++++------------ 3 files changed, 57 insertions(+), 29 deletions(-) diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index 8c70910b5..a7b0c90a4 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -87,18 +87,13 @@ impl DocumentBatchBuilder { count: &mut self.count, }; - de.deserialize_any(&mut visitor).map_err(Error::JsonError)?; - - Ok(()) + de.deserialize_any(&mut visitor).map_err(Error::JsonError)? } - /// Extends the builder with json documents from a reader. + /// Creates a builder from a reader of CSV documents. /// - /// This method can be only called once and is mutually exclusive with extend from json. This - /// is because the fields in a csv are always guaranteed to come in order, and permits some - /// optimizations. - /// - /// From csv takes care to call finish in the end. + /// Since all fields in a csv documents are guaranteed to be ordered, we are able to perform + /// optimisations, and extending from another CSV is not allowed. pub fn from_csv(reader: R, writer: W) -> Result { let mut this = Self::new(writer)?; @@ -108,8 +103,7 @@ impl DocumentBatchBuilder { let mut records = csv::Reader::from_reader(reader); let headers = records - .headers() - .unwrap() + .headers()? .into_iter() .map(parse_csv_header) .map(|(k, t)| (this.index.insert(&k), t)) @@ -123,11 +117,11 @@ impl DocumentBatchBuilder { let mut writer = obkv::KvWriter::new(Cursor::new(&mut this.obkv_buffer)); for (value, (fid, ty)) in record.into_iter().zip(headers.iter()) { let value = match ty { - AllowedType::Number => value.parse::().map(Value::from).unwrap(), + AllowedType::Number => value.parse::().map(Value::from)?, AllowedType::String => Value::String(value.to_string()), }; - serde_json::to_writer(Cursor::new(&mut this.value_buffer), &value).unwrap(); + serde_json::to_writer(Cursor::new(&mut this.value_buffer), &value)?; writer.insert(*fid, &this.value_buffer)?; this.value_buffer.clear(); } diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 8a8b87794..bbc114480 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -7,7 +7,9 @@ mod builder; mod reader; mod serde; -use std::{fmt, io}; +use std::num::ParseFloatError; +use std::io; +use std::fmt::{self, Debug}; use ::serde::{Deserialize, Serialize}; use bimap::BiHashMap; @@ -81,14 +83,22 @@ impl io::Write for ByteCounter { #[derive(Debug)] pub enum Error { + ParseFloat(std::num::ParseFloatError), InvalidDocumentFormat, Custom(String), JsonError(serde_json::Error), + CsvError(csv::Error), Serialize(bincode::Error), Io(io::Error), DocumentTooLarge, } +impl From for Error { + fn from(e: csv::Error) -> Self { + Self::CsvError(e) + } +} + impl From for Error { fn from(other: io::Error) -> Self { Self::Io(other) @@ -101,15 +111,29 @@ impl From for Error { } } +impl From for Error { + fn from(other: serde_json::Error) -> Self { + Self::JsonError(other) + } +} + +impl From for Error { + fn from(other: ParseFloatError) -> Self { + Self::ParseFloat(other) + } +} + impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + Error::ParseFloat(e) => write!(f, "{}", e), Error::Custom(s) => write!(f, "Unexpected serialization error: {}", s), Error::InvalidDocumentFormat => f.write_str("Invalid document addition format."), Error::JsonError(err) => write!(f, "Couldn't serialize document value: {}", err), - Error::Io(e) => e.fmt(f), + Error::Io(e) => write!(f, "{}", e), Error::DocumentTooLarge => f.write_str("Provided document is too large (>2Gib)"), - Error::Serialize(e) => e.fmt(f), + Error::Serialize(e) => write!(f, "{}", e), + Error::CsvError(e) => write!(f, "{}", e), } } } diff --git a/milli/src/documents/serde.rs b/milli/src/documents/serde.rs index 2466ed373..4dfdca2c7 100644 --- a/milli/src/documents/serde.rs +++ b/milli/src/documents/serde.rs @@ -11,9 +11,19 @@ use serde::de::SeqAccess; use serde::de::Visitor; use serde_json::Value; +use super::Error; use super::{ByteCounter, DocumentsBatchIndex}; use crate::FieldId; +macro_rules! tri { + ($e:expr) => { + match $e { + Ok(r) => r, + Err(e) => return Ok(Err(e.into())), + } + }; +} + struct FieldIdResolver<'a>(&'a mut DocumentsBatchIndex); impl<'a, 'de> DeserializeSeed<'de> for FieldIdResolver<'a> { @@ -36,8 +46,8 @@ impl<'a, 'de> Visitor<'de> for FieldIdResolver<'a> { Ok(self.0.insert(v)) } - fn expecting(&self, _formatter: &mut fmt::Formatter) -> fmt::Result { - todo!() + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "a string") } } @@ -64,22 +74,22 @@ pub struct DocumentVisitor<'a, W> { impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> { /// This Visitor value is nothing, since it write the value to a file. - type Value = (); + type Value = Result<(), Error>; fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'de>, { - while let Some(_) = seq.next_element_seed(&mut *self)? { } + while let Some(v) = seq.next_element_seed(&mut *self)? { tri!(v) } - Ok(()) + Ok(Ok(())) } fn visit_map(self, mut map: A) -> Result where A: MapAccess<'de>, { - while let Some((key, value)) = map.next_entry_seed(FieldIdResolver(&mut *self.index), ValueDeserializer).unwrap() { + while let Some((key, value)) = map.next_entry_seed(FieldIdResolver(&mut *self.index), ValueDeserializer)? { self.values.insert(key, value); } @@ -88,19 +98,19 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> { for (key, value) in self.values.iter() { self.value_buffer.clear(); // This is guaranteed to work - serde_json::to_writer(Cursor::new(&mut *self.value_buffer), value).unwrap(); - obkv.insert(*key, &self.value_buffer).unwrap(); + tri!(serde_json::to_writer(Cursor::new(&mut *self.value_buffer), value)); + tri!(obkv.insert(*key, &self.value_buffer)); } - let reader = obkv.into_inner().unwrap().into_inner(); + let reader = tri!(obkv.into_inner()).into_inner(); - self.inner.write_u32::(reader.len() as u32).unwrap(); - self.inner.write_all(reader).unwrap(); + tri!(self.inner.write_u32::(reader.len() as u32)); + tri!(self.inner.write_all(reader)); *self.count += 1; self.values.clear(); - Ok(()) + Ok(Ok(())) } fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -111,7 +121,7 @@ impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> { impl<'a, 'de, W> DeserializeSeed<'de> for &mut DocumentVisitor<'a, W> where W: Write, { - type Value = (); + type Value = Result<(), Error>; fn deserialize(self, deserializer: D) -> Result where