diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index fb44b9a49..bb18ad6f4 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -7,7 +7,6 @@ use actix_web::http::StatusCode; use actix_web::HttpResponseBuilder; use aweb::error::{JsonPayloadError, QueryPayloadError}; use meilisearch_error::{Code, ErrorCode}; -use meilisearch_lib::milli; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -55,53 +54,6 @@ impl aweb::error::ResponseError for ResponseError { } } -#[derive(Debug)] -pub struct MilliError<'a>(pub &'a milli::Error); - -impl Error for MilliError<'_> {} - -impl fmt::Display for MilliError<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl ErrorCode for MilliError<'_> { - fn error_code(&self) -> Code { - use milli::UserError; - - match self.0 { - milli::Error::InternalError(_) => Code::Internal, - milli::Error::IoError(_) => Code::Internal, - milli::Error::UserError(ref error) => { - match error { - // TODO: wait for spec for new error codes. - UserError::SerdeJson(_) - | UserError::MaxDatabaseSizeReached - | UserError::InvalidDocumentId { .. } - | UserError::InvalidStoreFile - | UserError::NoSpaceLeftOnDevice - | UserError::DocumentLimitReached => Code::Internal, - UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, - UserError::InvalidFilter(_) => Code::Filter, - UserError::InvalidFilterAttribute(_) => Code::Filter, - UserError::MissingDocumentId { .. } => Code::MissingDocumentId, - UserError::MissingPrimaryKey => Code::MissingPrimaryKey, - UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, - UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent, - UserError::SortRankingRuleMissing => Code::Sort, - UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, - UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, - UserError::InvalidGeoField { .. } => Code::InvalidGeoField, - UserError::InvalidSortableAttribute { .. } => Code::Sort, - UserError::SortError(_) => Code::Sort, - UserError::CriterionError(_) => Code::InvalidRankingRule, - } - } - } - } -} - impl fmt::Display for PayloadError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 0e479b122..219e8b1c8 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -96,8 +96,6 @@ pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: & let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config .app_data(data) - // TODO!: Why are we passing the data with two different things? - //.app_data(data) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch-lib/src/compression.rs b/meilisearch-lib/src/compression.rs index a71a02a55..c4747cb21 100644 --- a/meilisearch-lib/src/compression.rs +++ b/meilisearch-lib/src/compression.rs @@ -1,9 +1,9 @@ -use std::fs::File; +use std::fs::{create_dir_all, File}; use std::io::Write; use std::path::Path; -use flate2::{write::GzEncoder, Compression}; -use tar::Builder; +use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use tar::{Archive, Builder}; pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { let mut f = File::create(dest)?; @@ -16,11 +16,11 @@ pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Resul Ok(()) } -//pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { -//let f = File::open(&src)?; -//let gz = GzDecoder::new(f); -//let mut ar = Archive::new(gz); -//create_dir_all(&dest)?; -//ar.unpack(&dest)?; -//Ok(()) -//} +pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { + let f = File::open(&src)?; + let gz = GzDecoder::new(f); + let mut ar = Archive::new(gz); + create_dir_all(&dest)?; + ar.unpack(&dest)?; + Ok(()) +} diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index b5de21403..0ae0aefdb 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -206,6 +206,10 @@ impl Index { result })(); + if let Update::DocumentAddition { content_uuid, .. } = update.from.meta() { + let _ = self.update_file_store.delete(*content_uuid); + } + match result { Ok(result) => Ok(update.process(result)), Err(e) => Err(update.fail(e)), diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 1ad92dd56..a41e18683 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -15,8 +15,7 @@ use crate::document_formats::read_ndjson; use crate::index::apply_settings_to_builder; use crate::index::update_handler::UpdateHandler; use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; -use crate::index_controller::{self, IndexMetadata}; -use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; +use crate::index_controller::{self, asc_ranking_rule, desc_ranking_rule, IndexMetadata}; use crate::{index::Unchecked, options::IndexerOpts}; #[derive(Serialize, Deserialize, Debug)] diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 72a83a505..3f9d33223 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -16,6 +16,7 @@ pub use message::DumpMsg; use super::index_resolver::HardStateIndexResolver; use super::updates::UpdateSender; +use crate::compression::{from_tar_gz, to_tar_gz}; use crate::index_controller::dump_actor::error::DumpActorError; use crate::index_controller::updates::UpdateMsg; use crate::options::IndexerOpts; @@ -111,7 +112,7 @@ pub fn load_dump( let tmp_src = tempfile::tempdir()?; let tmp_src_path = tmp_src.path(); - crate::from_tar_gz(&src_path, tmp_src_path)?; + from_tar_gz(&src_path, tmp_src_path)?; let meta_path = tmp_src_path.join(META_FILE_NAME); let mut meta_file = File::open(&meta_path)?; @@ -172,7 +173,7 @@ impl DumpTask { let dump_path = tokio::task::spawn_blocking(move || -> Result { let temp_dump_file = tempfile::NamedTempFile::new()?; - crate::to_tar_gz(temp_dump_path, temp_dump_file.path()) + to_tar_gz(temp_dump_path, temp_dump_file.path()) .map_err(|e| DumpActorError::Internal(e.into()))?; let dump_path = self.path.join(self.uid).with_extension("dump"); diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index 9f86f7b08..008d0d219 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -143,7 +143,7 @@ where Some(index) => Ok(index), None => { // For some reason we got a uuid to an unexisting index, we return an error, - // and remove the uuid from th uuid store. + // and remove the uuid from the uuid store. let _ = self.index_uuid_store.delete(name.clone()).await; Err(IndexResolverError::UnexistingIndex(name)) } diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 4938e7c8d..52b2b1d01 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -496,13 +496,9 @@ pub fn asc_ranking_rule(text: &str) -> Option<&str> { .map(|(field, _)| field) } -/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name. +/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name. pub fn desc_ranking_rule(text: &str) -> Option<&str> { text.split_once("desc(") .and_then(|(_, tail)| tail.rsplit_once(")")) .map(|(field, _)| field) } - -fn update_files_path(path: impl AsRef) -> PathBuf { - path.as_ref().join("updates/updates_files") -} diff --git a/meilisearch-lib/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs index 2d83a491c..36e45547e 100644 --- a/meilisearch-lib/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -8,6 +8,7 @@ use tokio::fs; use tokio::task::spawn_blocking; use tokio::time::sleep; +use crate::compression::from_tar_gz; use crate::index_controller::updates::UpdateMsg; use super::index_resolver::HardStateIndexResolver; @@ -95,7 +96,7 @@ pub fn load_snapshot( ignore_missing_snapshot: bool, ) -> anyhow::Result<()> { if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { - match crate::from_tar_gz(snapshot_path, &db_path) { + match from_tar_gz(snapshot_path, &db_path) { Ok(()) => Ok(()), Err(e) => { //clean created db folder diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index 483fa80f8..09ddc1d89 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -141,8 +141,7 @@ impl UpdateFileStore { let mut document_buffer = Map::new(); // TODO: we need to find a way to do this more efficiently. (create a custom serializer - // for - // jsonl for example...) + // for jsonl for example...) while let Some((index, document)) = document_reader.next_document_with_index()? { for (field_id, content) in document.iter() { if let Some(field_name) = index.get_by_left(&field_id) { @@ -164,4 +163,10 @@ impl UpdateFileStore { pub fn get_size(&self, uuid: Uuid) -> Result { Ok(self.get_update(uuid)?.metadata()?.len()) } + + pub fn delete(&self, uuid: Uuid) -> Result<()> { + let path = self.path.join(uuid.to_string()); + std::fs::remove_file(path)?; + Ok(()) + } } diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index bb77250b5..df89d6ecc 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -1,7 +1,7 @@ mod codec; pub mod dump; -use std::fs::{create_dir_all, remove_file}; +use std::fs::create_dir_all; use std::path::Path; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -29,7 +29,6 @@ use codec::*; use super::error::Result; use super::status::{Enqueued, Processing}; use crate::index::Index; -use crate::index_controller::update_files_path; use crate::index_controller::updates::*; use crate::EnvSizer; @@ -269,8 +268,8 @@ impl UpdateStore { Ok(meta) } - // /// Push already processed update in the UpdateStore without triggering the notification - // /// process. This is useful for the dumps. + /// Push already processed update in the UpdateStore without triggering the notification + /// process. This is useful for the dumps. pub fn register_raw_updates( &self, wtxn: &mut heed::RwTxn, @@ -436,19 +435,19 @@ impl UpdateStore { pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> { let mut txn = self.env.write_txn()?; // Contains all the content file paths that we need to be removed if the deletion was successful. - let uuids_to_remove = Vec::new(); + let mut uuids_to_remove = Vec::new(); let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data(); while let Some(Ok(((_, uuid, _), pending))) = pendings.next() { if uuid == index_uuid { - let mut _pending = pending.decode()?; - //if let Some(update_uuid) = pending.content.take() { - //uuids_to_remove.push(update_uuid); - //} + let pending = pending.decode()?; + if let Update::DocumentAddition { content_uuid, .. } = pending.meta() { + uuids_to_remove.push(*content_uuid); + } - // Invariant check: we can only delete the current entry when we don't hold - // references to it anymore. This must be done after we have retrieved its content. + //Invariant check: we can only delete the current entry when we don't hold + //references to it anymore. This must be done after we have retrieved its content. unsafe { pendings.del_current()?; } @@ -485,12 +484,9 @@ impl UpdateStore { // Finally, remove any outstanding update files. This must be done after waiting for the // last update to ensure that the update files are not deleted before the update needs // them. - uuids_to_remove - .iter() - .map(|uuid: &Uuid| update_files_path(&self.path).join(uuid.to_string())) - .for_each(|path| { - let _ = remove_file(path); - }); + uuids_to_remove.iter().for_each(|uuid| { + let _ = self.update_file_store.delete(*uuid); + }); Ok(()) } diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 6eaaf431c..364a96dcf 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -28,30 +28,3 @@ impl EnvSizer for heed::Env { .fold(0, |acc, m| acc + m.len()) } } - -use std::fs::{create_dir_all, File}; -use std::io::Write; -use std::path::Path; - -use flate2::{read::GzDecoder, write::GzEncoder, Compression}; -use tar::{Archive, Builder}; - -pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { - let mut f = File::create(dest)?; - let gz_encoder = GzEncoder::new(&mut f, Compression::default()); - let mut tar_encoder = Builder::new(gz_encoder); - tar_encoder.append_dir_all(".", src)?; - let gz_encoder = tar_encoder.into_inner()?; - gz_encoder.finish()?; - f.flush()?; - Ok(()) -} - -pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { - let f = File::open(&src)?; - let gz = GzDecoder::new(f); - let mut ar = Archive::new(gz); - create_dir_all(&dest)?; - ar.unpack(&dest)?; - Ok(()) -}