diff --git a/Cargo.lock b/Cargo.lock index b055caf18..7f778a0f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2145,6 +2145,7 @@ name = "meilisearch-types" version = "0.28.0" dependencies = [ "actix-web", + "milli", "proptest", "proptest-derive", "serde", diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs deleted file mode 100644 index 9cf51b0b8..000000000 --- a/meilisearch-lib/src/document_formats.rs +++ /dev/null @@ -1,138 +0,0 @@ -use std::borrow::Borrow; -use std::fmt::{self, Debug, Display}; -use std::io::{self, BufRead, Seek, Write}; - -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; -use milli::documents::{DocumentsBatchBuilder, Error}; - -type Result = std::result::Result; - -#[derive(Debug)] -pub enum PayloadType { - Ndjson, - Json, - Csv, -} - -impl fmt::Display for PayloadType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - PayloadType::Ndjson => f.write_str("ndjson"), - PayloadType::Json => f.write_str("json"), - PayloadType::Csv => f.write_str("csv"), - } - } -} - -#[derive(Debug)] -pub enum DocumentFormatError { - Internal(Box), - MalformedPayload(Error, PayloadType), -} - -impl Display for DocumentFormatError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), - Self::MalformedPayload(me, b) => match me.borrow() { - Error::Json(se) => { - // https://github.com/meilisearch/meilisearch/issues/2107 - // The user input maybe insanely long. We need to truncate it. - let mut serde_msg = se.to_string(); - let ellipsis = "..."; - if serde_msg.len() > 100 + ellipsis.len() { - serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis); - } - - write!( - f, - "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.", - b, serde_msg - ) - } - _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me), - }, - } - } -} - -impl std::error::Error for DocumentFormatError {} - -impl From<(PayloadType, Error)> for DocumentFormatError { - fn from((ty, error): (PayloadType, Error)) -> Self { - match error { - Error::Io(e) => Self::Internal(Box::new(e)), - e => Self::MalformedPayload(e, ty), - } - } -} - -impl ErrorCode for DocumentFormatError { - fn error_code(&self) -> Code { - match self { - DocumentFormatError::Internal(_) => Code::Internal, - DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload, - } - } -} - -internal_error!(DocumentFormatError: io::Error); - -/// Reads CSV from input and write an obkv batch to writer. -pub fn read_csv(input: impl BufRead, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); - - let csv = csv::Reader::from_reader(input); - builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?; - - let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; - - Ok(count as usize) -} - -/// Reads JSON Lines from input and write an obkv batch to writer. -pub fn read_ndjson(mut input: impl BufRead, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); - let mut buf = String::with_capacity(1024); - while input.read_line(&mut buf)? > 0 { - if buf == "\n" { - buf.clear(); - continue; - } - builder - .append_unparsed_json_object(&buf) - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; - buf.clear(); - } - - let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; - - Ok(count as usize) -} - -/// Reads JSON from input and write an obkv batch to writer. -pub fn read_json(input: impl BufRead, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); - - builder - .append_json(input) - .map_err(|e| (PayloadType::Json, e))?; - - let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; - - Ok(count as usize) -} diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index 6a41fa7a0..f3ff64816 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -9,8 +9,8 @@ use milli::heed::{EnvOpenOptions, RoTxn}; use milli::update::{IndexDocumentsConfig, IndexerConfig}; use serde::{Deserialize, Serialize}; -use crate::document_formats::read_ndjson; use crate::index::updates::apply_settings_to_builder; +use milli::documents::document_formats::read_ndjson; use super::error::Result; use super::{index::Index, Settings, Unchecked}; diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs index ab2dd142d..b4ed08a3f 100644 --- a/meilisearch-lib/src/index_controller/error.rs +++ b/meilisearch-lib/src/index_controller/error.rs @@ -6,11 +6,11 @@ use meilisearch_types::internal_error; use tokio::task::JoinError; use super::DocumentAdditionFormat; -use crate::document_formats::DocumentFormatError; use crate::dump::error::DumpError; use crate::index::error::IndexError; use crate::tasks::error::TaskError; use crate::update_file_store::UpdateFileStoreError; +use milli::documents::document_formats::DocumentFormatError; use crate::index_resolver::error::IndexResolverError; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 8b1be1226..54309c148 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -20,7 +20,6 @@ use tokio::task::spawn_blocking; use tokio::time::sleep; use uuid::Uuid; -use crate::document_formats::{read_csv, read_json, read_ndjson}; use crate::dump::{self, load_dump, DumpHandler}; use crate::index::{ Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, @@ -34,6 +33,7 @@ use crate::tasks::{ BatchHandler, EmptyBatchHandler, Scheduler, SnapshotHandler, TaskFilter, TaskStore, }; use error::Result; +use milli::documents::document_formats::{read_csv, read_json, read_ndjson}; use self::error::IndexControllerError; use crate::index_resolver::index_store::{IndexStore, MapIndexStore}; diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 3d3d5e860..24d9df8fa 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -18,7 +18,6 @@ pub use milli; pub use milli::heed; mod compression; -pub mod document_formats; use walkdir::WalkDir; diff --git a/meilisearch-lib/src/update_file_store.rs b/meilisearch-lib/src/update_file_store.rs index c9c8eedbd..9d0cd1789 100644 --- a/meilisearch-lib/src/update_file_store.rs +++ b/meilisearch-lib/src/update_file_store.rs @@ -14,7 +14,7 @@ pub use test::MockUpdateFileStore as UpdateFileStore; const UPDATE_FILES_PATH: &str = "updates/updates_files"; -use crate::document_formats::read_ndjson; +use milli::documents::document_formats::read_ndjson; pub struct UpdateFile { path: PathBuf, diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 6949722e7..c16cd5168 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -10,6 +10,7 @@ proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } serde = { version = "1.0.136", features = ["derive"] } serde_json = "1.0.79" +milli = { path = "../../milli/milli" } [features] test-traits = ["proptest", "proptest-derive"] diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 56ac65f9e..87e4e0a7d 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -94,6 +94,17 @@ pub trait ErrorCode: std::error::Error { } } +impl ErrorCode for milli::documents::document_formats::DocumentFormatError { + fn error_code(&self) -> Code { + match self { + milli::documents::document_formats::DocumentFormatError::Internal(_) => Code::Internal, + milli::documents::document_formats::DocumentFormatError::MalformedPayload(_, _) => { + Code::MalformedPayload + } + } + } +} + #[allow(clippy::enum_variant_names)] enum ErrorType { InternalError,