diff --git a/meilisearch-http/src/helpers/env.rs b/meilisearch-http/src/helpers/env.rs deleted file mode 100644 index b76c9c8a7..000000000 --- a/meilisearch-http/src/helpers/env.rs +++ /dev/null @@ -1,17 +0,0 @@ -use meilisearch_lib::heed::Env; -use walkdir::WalkDir; - -pub trait EnvSizer { - fn size(&self) -> u64; -} - -impl EnvSizer for Env { - fn size(&self) -> u64 { - WalkDir::new(self.path()) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len()) - } -} diff --git a/meilisearch-http/src/helpers/mod.rs b/meilisearch-http/src/helpers/mod.rs deleted file mode 100644 index 3908c440c..000000000 --- a/meilisearch-http/src/helpers/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod env; - -pub use env::EnvSizer; diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index bfdb829d4..91a984796 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -5,7 +5,6 @@ pub mod analytics; pub mod task; #[macro_use] pub mod extractors; -pub mod helpers; pub mod option; pub mod routes; diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index c3baf0cb0..66aec798e 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -326,7 +326,7 @@ async fn error_add_malformed_json_documents() { assert_eq!( response["message"], json!( - r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789", expected a documents, or a sequence of documents. at line 1 column 102`."# + r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789012345678901234567890123456789", expected a sequence at line 1 column 102`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -349,9 +349,7 @@ async fn error_add_malformed_json_documents() { assert_eq!(status_code, 400); assert_eq!( response["message"], - json!( - r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...90123456789m", expected a documents, or a sequence of documents. at line 1 column 103`."# - ) + json!("The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string \"0123456789012345678901234567...90123456789012345678901234567890123456789m\", expected a sequence at line 1 column 103`.") ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); @@ -388,7 +386,7 @@ async fn error_add_malformed_ndjson_documents() { assert_eq!( response["message"], json!( - r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."# + r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -411,9 +409,7 @@ async fn error_add_malformed_ndjson_documents() { assert_eq!(status_code, 400); assert_eq!( response["message"], - json!( - r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."# - ) + json!("The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`.") ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); @@ -1020,7 +1016,7 @@ async fn add_documents_invalid_geo_field() { index.wait_task(2).await; let (response, code) = index.get_task(2).await; assert_eq!(code, 200); - assert_eq!(response["status"], "succeeded"); + assert_eq!(response["status"], "failed"); } #[actix_rt::test] diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index 02cdc751f..f3615465a 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -708,9 +708,7 @@ async fn faceting_max_values_per_facet() { }), |response, code| { assert_eq!(code, 200, "{}", response); - let numbers = dbg!(&response)["facetDistribution"]["number"] - .as_object() - .unwrap(); + let numbers = &response["facetDistribution"]["number"].as_object().unwrap(); assert_eq!(numbers.len(), 10_000); }, ) diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index 5b224cf49..72e899845 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -98,7 +98,7 @@ pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result { /// Reads JSON Lines from input and write an obkv batch to writer. pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result { let mut builder = DocumentsBatchBuilder::new(writer); - let mut reader = BufReader::new(input); + let reader = BufReader::new(input); for result in serde_json::Deserializer::from_reader(reader).into_iter() { let object = result @@ -122,7 +122,7 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result /// Reads JSON from input and write an obkv batch to writer. pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { let mut builder = DocumentsBatchBuilder::new(writer); - let mut reader = BufReader::new(input); + let reader = BufReader::new(input); let objects: Vec<_> = serde_json::from_reader(reader) .map_err(Error::Json) diff --git a/meilisearch-lib/src/dump/error.rs b/meilisearch-lib/src/dump/error.rs index 3f6e2aae5..5afbf9244 100644 --- a/meilisearch-lib/src/dump/error.rs +++ b/meilisearch-lib/src/dump/error.rs @@ -11,7 +11,7 @@ pub enum DumpError { #[error("An internal error has occurred. `{0}`.")] Internal(Box), #[error("{0}")] - IndexResolver(#[from] IndexResolverError), + IndexResolver(#[from] Box), } internal_error!( diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs index 83e9263b4..0e77bd360 100644 --- a/meilisearch-lib/src/error.rs +++ b/meilisearch-lib/src/error.rs @@ -32,7 +32,9 @@ impl ErrorCode for MilliError<'_> { UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::Filter, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, - UserError::InvalidDocumentId { .. } => Code::InvalidDocumentId, + UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { + Code::InvalidDocumentId + } UserError::MissingPrimaryKey => Code::MissingPrimaryKey, UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent, UserError::SortRankingRuleMissing => Code::Sort, diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index 8c7daba1f..6a41fa7a0 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -27,7 +27,7 @@ const DATA_FILE_NAME: &str = "documents.jsonl"; impl Index { pub fn dump(&self, path: impl AsRef) -> Result<()> { // acquire write txn make sure any ongoing write is finished before we start. - let txn = self.env.write_txn()?; + let txn = self.write_txn()?; let path = path.as_ref().join(format!("indexes/{}", self.uuid)); create_dir_all(&path)?; diff --git a/meilisearch-lib/src/index/error.rs b/meilisearch-lib/src/index/error.rs index e31fcc4a0..f795ceaa4 100644 --- a/meilisearch-lib/src/index/error.rs +++ b/meilisearch-lib/src/index/error.rs @@ -40,6 +40,12 @@ impl ErrorCode for IndexError { } } +impl From for IndexError { + fn from(error: milli::UserError) -> IndexError { + IndexError::Milli(error.into()) + } +} + #[derive(Debug, thiserror::Error)] pub enum FacetError { #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index d4772b73b..094c60760 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -6,16 +6,16 @@ use std::path::Path; use std::sync::Arc; use fst::IntoStreamer; -use milli::heed::{EnvOpenOptions, RoTxn}; +use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn}; use milli::update::{IndexerConfig, Setting}; use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use time::OffsetDateTime; use uuid::Uuid; +use walkdir::WalkDir; use crate::index::search::DEFAULT_PAGINATION_LIMITED_TO; -use crate::EnvSizer; use super::error::IndexError; use super::error::Result; @@ -245,11 +245,8 @@ impl Index { let fields_ids_map = self.fields_ids_map(&txn)?; let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit); - let mut documents = Vec::new(); - - for entry in iter { + for entry in self.all_documents(&txn)?.skip(offset).take(limit) { let (_id, obkv) = entry?; let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?; let document = match &attributes_to_retrieve { @@ -302,7 +299,12 @@ impl Index { } pub fn size(&self) -> u64 { - self.env.size() + WalkDir::new(self.path()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len()) } pub fn snapshot(&self, path: impl AsRef) -> Result<()> { @@ -310,9 +312,8 @@ impl Index { create_dir_all(&dst)?; dst.push("data.mdb"); let _txn = self.write_txn()?; - self.inner - .env - .copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; + self.inner.copy_to_path(dst, CompactionOption::Enabled)?; + Ok(()) } } diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index e6c831a01..28d6e0222 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -24,12 +24,12 @@ pub use test::MockIndex as Index; /// code for unit testing, in places where an index would normally be used. #[cfg(test)] pub mod test { - use std::path::Path; - use std::path::PathBuf; + use std::path::{Path, PathBuf}; use std::sync::Arc; - use milli::update::IndexerConfig; - use milli::update::{DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod}; + use milli::update::{ + DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig, + }; use nelson::Mocker; use uuid::Uuid; @@ -162,7 +162,7 @@ pub mod test { primary_key: Option, file_store: UpdateFileStore, contents: impl Iterator, - ) -> Result { + ) -> Result>> { match self { MockIndex::Real(index) => { index.update_documents(method, primary_key, file_store, contents) diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 6316f8812..dffe23929 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -11,7 +11,7 @@ use milli::update::{ use serde::{Deserialize, Serialize, Serializer}; use uuid::Uuid; -use super::error::Result; +use super::error::{IndexError, Result}; use super::index::{Index, IndexMeta}; use crate::update_file_store::UpdateFileStore; @@ -299,7 +299,7 @@ impl Index { primary_key: Option, file_store: UpdateFileStore, contents: impl IntoIterator, - ) -> Result { + ) -> Result>> { trace!("performing document addition"); let mut txn = self.write_txn()?; @@ -315,7 +315,7 @@ impl Index { }; let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step); - let builder = milli::update::IndexDocuments::new( + let mut builder = milli::update::IndexDocuments::new( &mut txn, self, self.indexer_config.as_ref(), @@ -323,20 +323,34 @@ impl Index { indexing_callback, )?; + let mut results = Vec::new(); for content_uuid in contents.into_iter() { let content_file = file_store.get_update(content_uuid)?; let reader = DocumentsBatchReader::from_reader(content_file)?; - let (builder, user_error) = builder.add_documents(reader)?; - todo!("use the user_error here"); + let (new_builder, user_result) = builder.add_documents(reader)?; + builder = new_builder; + + let user_result = match user_result { + Ok(count) => { + let addition = DocumentAdditionResult { + indexed_documents: count, + number_of_documents: count, + }; + info!("document addition done: {:?}", addition); + Ok(addition) + } + Err(e) => Err(IndexError::from(e)), + }; + + results.push(user_result); } - let addition = builder.execute()?; + if results.iter().any(Result::is_ok) { + let _addition = builder.execute()?; + txn.commit()?; + } - txn.commit()?; - - info!("document addition done: {:?}", addition); - - Ok(addition) + Ok(results) } pub fn update_settings(&self, settings: &Settings) -> Result<()> { diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index 686a549b9..284f64942 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -150,25 +150,34 @@ mod real { }) .await; - let event = match result { - Ok(Ok(result)) => TaskEvent::Succeeded { - timestamp: OffsetDateTime::now_utc(), - result: TaskResult::DocumentAddition { - indexed_documents: result.indexed_documents, - }, - }, - Ok(Err(e)) => TaskEvent::Failed { - timestamp: OffsetDateTime::now_utc(), - error: e.into(), - }, - Err(e) => TaskEvent::Failed { - timestamp: OffsetDateTime::now_utc(), - error: IndexResolverError::from(e).into(), - }, - }; - - for task in tasks.iter_mut() { - task.events.push(event.clone()); + match result { + Ok(Ok(results)) => { + for (task, result) in tasks.iter_mut().zip(results) { + let event = match result { + Ok(addition) => { + TaskEvent::succeeded(TaskResult::DocumentAddition { + indexed_documents: addition.indexed_documents, + }) + } + Err(error) => { + TaskEvent::failed(IndexResolverError::from(error)) + } + }; + task.events.push(event); + } + } + Ok(Err(e)) => { + let event = TaskEvent::failed(e); + for task in tasks.iter_mut() { + task.events.push(event.clone()); + } + } + Err(e) => { + let event = TaskEvent::failed(IndexResolverError::from(e)); + for task in tasks.iter_mut() { + task.events.push(event.clone()); + } + } } } _ => panic!("invalid batch!"), diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs index 527195729..630a4bb9d 100644 --- a/meilisearch-lib/src/snapshot.rs +++ b/meilisearch-lib/src/snapshot.rs @@ -181,9 +181,7 @@ impl SnapshotJob { let mut options = milli::heed::EnvOpenOptions::new(); options.map_size(self.index_size); let index = milli::Index::new(options, entry.path())?; - index - .env - .copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; + index.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; } Ok(()) diff --git a/meilisearch-lib/src/update_file_store.rs b/meilisearch-lib/src/update_file_store.rs index e1be0dbd4..d4c50c447 100644 --- a/meilisearch-lib/src/update_file_store.rs +++ b/meilisearch-lib/src/update_file_store.rs @@ -151,7 +151,7 @@ mod store { let update_file = File::open(update_file_path)?; let mut dst_file = NamedTempFile::new_in(&dump_path)?; let mut document_cursor = DocumentsBatchReader::from_reader(update_file)?.into_cursor(); - let index = document_cursor.documents_batch_index(); + let index = document_cursor.documents_batch_index().clone(); let mut document_buffer = Map::new(); // TODO: we need to find a way to do this more efficiently. (create a custom serializer diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs index 8f97ab2de..52f181980 100644 --- a/permissive-json-pointer/src/lib.rs +++ b/permissive-json-pointer/src/lib.rs @@ -49,7 +49,7 @@ fn contained_in(selector: &str, key: &str) -> bool { /// map_leaf_values( /// value.as_object_mut().unwrap(), /// ["jean.race.name"], -/// |key, value| match (value, dbg!(key)) { +/// |key, value| match (value, key) { /// (Value::String(name), "jean.race.name") => *name = "patou".to_string(), /// _ => unreachable!(), /// }, @@ -729,7 +729,7 @@ mod tests { map_leaf_values( value.as_object_mut().unwrap(), ["jean.race.name"], - |key, value| match (value, dbg!(key)) { + |key, value| match (value, key) { (Value::String(name), "jean.race.name") => *name = S("patou"), _ => unreachable!(), },