diff --git a/meilisearch-http/src/helpers/env.rs b/meilisearch-http/src/helpers/env.rs deleted file mode 100644 index b76c9c8a7..000000000 --- a/meilisearch-http/src/helpers/env.rs +++ /dev/null @@ -1,17 +0,0 @@ -use meilisearch_lib::heed::Env; -use walkdir::WalkDir; - -pub trait EnvSizer { - fn size(&self) -> u64; -} - -impl EnvSizer for Env { - fn size(&self) -> u64 { - WalkDir::new(self.path()) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len()) - } -} diff --git a/meilisearch-http/src/helpers/mod.rs b/meilisearch-http/src/helpers/mod.rs deleted file mode 100644 index 3908c440c..000000000 --- a/meilisearch-http/src/helpers/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod env; - -pub use env::EnvSizer; diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 6485784fc..9df66071e 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -5,7 +5,6 @@ pub mod analytics; pub mod task; #[macro_use] pub mod extractors; -pub mod helpers; pub mod option; pub mod routes; diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 85b88ca36..cb7030051 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -326,7 +326,7 @@ async fn error_add_malformed_json_documents() { assert_eq!( response["message"], json!( - r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789", expected a documents, or a sequence of documents. at line 1 column 102`."# + r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789012345678901234567890123456789", expected a sequence at line 1 column 102`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -349,9 +349,7 @@ async fn error_add_malformed_json_documents() { assert_eq!(status_code, 400); assert_eq!( response["message"], - json!( - r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...90123456789m", expected a documents, or a sequence of documents. at line 1 column 103`."# - ) + json!("The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string \"0123456789012345678901234567...90123456789012345678901234567890123456789m\", expected a sequence at line 1 column 103`.") ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); @@ -388,7 +386,7 @@ async fn error_add_malformed_ndjson_documents() { assert_eq!( response["message"], json!( - r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."# + r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -411,9 +409,7 @@ async fn error_add_malformed_ndjson_documents() { assert_eq!(status_code, 400); assert_eq!( response["message"], - json!( - r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."# - ) + json!("The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`.") ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); @@ -1020,7 +1016,7 @@ async fn add_documents_invalid_geo_field() { index.wait_task(2).await; let (response, code) = index.get_task(2).await; assert_eq!(code, 200); - assert_eq!(response["status"], "succeeded"); + assert_eq!(response["status"], "failed"); } #[actix_rt::test] diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index 17f53fa2d..d5e916860 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -708,9 +708,7 @@ async fn faceting_max_values_per_facet() { }), |response, code| { assert_eq!(code, 200, "{}", response); - let numbers = dbg!(&response)["facetDistribution"]["number"] - .as_object() - .unwrap(); + let numbers = &response["facetDistribution"]["number"].as_object().unwrap(); assert_eq!(numbers.len(), 10_000); }, ) diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index 5b224cf49..72e899845 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -98,7 +98,7 @@ pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result { /// Reads JSON Lines from input and write an obkv batch to writer. pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result { let mut builder = DocumentsBatchBuilder::new(writer); - let mut reader = BufReader::new(input); + let reader = BufReader::new(input); for result in serde_json::Deserializer::from_reader(reader).into_iter() { let object = result @@ -122,7 +122,7 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result /// Reads JSON from input and write an obkv batch to writer. pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { let mut builder = DocumentsBatchBuilder::new(writer); - let mut reader = BufReader::new(input); + let reader = BufReader::new(input); let objects: Vec<_> = serde_json::from_reader(reader) .map_err(Error::Json) diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs index 83e9263b4..168f2f88b 100644 --- a/meilisearch-lib/src/error.rs +++ b/meilisearch-lib/src/error.rs @@ -25,6 +25,7 @@ impl ErrorCode for MilliError<'_> { // TODO: wait for spec for new error codes. UserError::SerdeJson(_) | UserError::DocumentLimitReached + | UserError::AccessingSoftDeletedDocument { .. } | UserError::UnknownInternalDocumentId { .. } => Code::Internal, UserError::InvalidStoreFile => Code::InvalidStore, UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, @@ -32,7 +33,9 @@ impl ErrorCode for MilliError<'_> { UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::Filter, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, - UserError::InvalidDocumentId { .. } => Code::InvalidDocumentId, + UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { + Code::InvalidDocumentId + } UserError::MissingPrimaryKey => Code::MissingPrimaryKey, UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent, UserError::SortRankingRuleMissing => Code::Sort, diff --git a/meilisearch-lib/src/index/error.rs b/meilisearch-lib/src/index/error.rs index e31fcc4a0..f795ceaa4 100644 --- a/meilisearch-lib/src/index/error.rs +++ b/meilisearch-lib/src/index/error.rs @@ -40,6 +40,12 @@ impl ErrorCode for IndexError { } } +impl From for IndexError { + fn from(error: milli::UserError) -> IndexError { + IndexError::Milli(error.into()) + } +} + #[derive(Debug, thiserror::Error)] pub enum FacetError { #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 518e9ce3e..02425d0bf 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -4,7 +4,6 @@ use std::marker::PhantomData; use std::ops::Deref; use std::path::Path; use std::sync::Arc; -use walkdir::WalkDir; use fst::IntoStreamer; use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn}; @@ -14,6 +13,7 @@ use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use time::OffsetDateTime; use uuid::Uuid; +use walkdir::WalkDir; use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS; @@ -245,11 +245,8 @@ impl Index { let fields_ids_map = self.fields_ids_map(&txn)?; let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let iter = self.all_documents(&txn)?.skip(offset).take(limit); - let mut documents = Vec::new(); - - for entry in iter { + for entry in self.all_documents(&txn)?.skip(offset).take(limit) { let (_id, obkv) = entry?; let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?; let document = match &attributes_to_retrieve { @@ -302,7 +299,7 @@ impl Index { } pub fn size(&self) -> u64 { - WalkDir::new(self.inner.path()) + WalkDir::new(self.path()) .into_iter() .filter_map(|entry| entry.ok()) .filter_map(|entry| entry.metadata().ok()) diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index e6c831a01..28d6e0222 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -24,12 +24,12 @@ pub use test::MockIndex as Index; /// code for unit testing, in places where an index would normally be used. #[cfg(test)] pub mod test { - use std::path::Path; - use std::path::PathBuf; + use std::path::{Path, PathBuf}; use std::sync::Arc; - use milli::update::IndexerConfig; - use milli::update::{DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod}; + use milli::update::{ + DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig, + }; use nelson::Mocker; use uuid::Uuid; @@ -162,7 +162,7 @@ pub mod test { primary_key: Option, file_store: UpdateFileStore, contents: impl Iterator, - ) -> Result { + ) -> Result>> { match self { MockIndex::Real(index) => { index.update_documents(method, primary_key, file_store, contents) diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index b3a4205b7..5e5a8e34b 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -11,7 +11,7 @@ use milli::update::{ use serde::{Deserialize, Serialize, Serializer}; use uuid::Uuid; -use super::error::Result; +use super::error::{IndexError, Result}; use super::index::{Index, IndexMeta}; use crate::update_file_store::UpdateFileStore; @@ -299,7 +299,7 @@ impl Index { primary_key: Option, file_store: UpdateFileStore, contents: impl IntoIterator, - ) -> Result { + ) -> Result>> { trace!("performing document addition"); let mut txn = self.write_txn()?; @@ -315,7 +315,7 @@ impl Index { }; let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step); - let builder = milli::update::IndexDocuments::new( + let mut builder = milli::update::IndexDocuments::new( &mut txn, self, self.indexer_config.as_ref(), @@ -323,20 +323,34 @@ impl Index { indexing_callback, )?; + let mut results = Vec::new(); for content_uuid in contents.into_iter() { let content_file = file_store.get_update(content_uuid)?; let reader = DocumentsBatchReader::from_reader(content_file)?; - let (builder, user_error) = builder.add_documents(reader)?; - todo!("use the user_error here"); + let (new_builder, user_result) = builder.add_documents(reader)?; + builder = new_builder; + + let user_result = match user_result { + Ok(count) => { + let addition = DocumentAdditionResult { + indexed_documents: count, + number_of_documents: count, + }; + info!("document addition done: {:?}", addition); + Ok(addition) + } + Err(e) => Err(IndexError::from(e)), + }; + + results.push(user_result); } - let addition = builder.execute()?; + if results.iter().any(Result::is_ok) { + let _addition = builder.execute()?; + txn.commit()?; + } - txn.commit()?; - - info!("document addition done: {:?}", addition); - - Ok(addition) + Ok(results) } pub fn update_settings(&self, settings: &Settings) -> Result<()> { diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index 686a549b9..284f64942 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -150,25 +150,34 @@ mod real { }) .await; - let event = match result { - Ok(Ok(result)) => TaskEvent::Succeeded { - timestamp: OffsetDateTime::now_utc(), - result: TaskResult::DocumentAddition { - indexed_documents: result.indexed_documents, - }, - }, - Ok(Err(e)) => TaskEvent::Failed { - timestamp: OffsetDateTime::now_utc(), - error: e.into(), - }, - Err(e) => TaskEvent::Failed { - timestamp: OffsetDateTime::now_utc(), - error: IndexResolverError::from(e).into(), - }, - }; - - for task in tasks.iter_mut() { - task.events.push(event.clone()); + match result { + Ok(Ok(results)) => { + for (task, result) in tasks.iter_mut().zip(results) { + let event = match result { + Ok(addition) => { + TaskEvent::succeeded(TaskResult::DocumentAddition { + indexed_documents: addition.indexed_documents, + }) + } + Err(error) => { + TaskEvent::failed(IndexResolverError::from(error)) + } + }; + task.events.push(event); + } + } + Ok(Err(e)) => { + let event = TaskEvent::failed(e); + for task in tasks.iter_mut() { + task.events.push(event.clone()); + } + } + Err(e) => { + let event = TaskEvent::failed(IndexResolverError::from(e)); + for task in tasks.iter_mut() { + task.events.push(event.clone()); + } + } } } _ => panic!("invalid batch!"), diff --git a/meilisearch-lib/src/update_file_store.rs b/meilisearch-lib/src/update_file_store.rs index e1be0dbd4..cb4eadf4d 100644 --- a/meilisearch-lib/src/update_file_store.rs +++ b/meilisearch-lib/src/update_file_store.rs @@ -150,8 +150,8 @@ mod store { let update_file = File::open(update_file_path)?; let mut dst_file = NamedTempFile::new_in(&dump_path)?; - let mut document_cursor = DocumentsBatchReader::from_reader(update_file)?.into_cursor(); - let index = document_cursor.documents_batch_index(); + let (mut document_cursor, index) = + DocumentsBatchReader::from_reader(update_file)?.into_cursor_and_fields_index(); let mut document_buffer = Map::new(); // TODO: we need to find a way to do this more efficiently. (create a custom serializer diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs index 8f97ab2de..52f181980 100644 --- a/permissive-json-pointer/src/lib.rs +++ b/permissive-json-pointer/src/lib.rs @@ -49,7 +49,7 @@ fn contained_in(selector: &str, key: &str) -> bool { /// map_leaf_values( /// value.as_object_mut().unwrap(), /// ["jean.race.name"], -/// |key, value| match (value, dbg!(key)) { +/// |key, value| match (value, key) { /// (Value::String(name), "jean.race.name") => *name = "patou".to_string(), /// _ => unreachable!(), /// }, @@ -729,7 +729,7 @@ mod tests { map_leaf_values( value.as_object_mut().unwrap(), ["jean.race.name"], - |key, value| match (value, dbg!(key)) { + |key, value| match (value, key) { (Value::String(name), "jean.race.name") => *name = S("patou"), _ => unreachable!(), },