diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index f8e2357fd..ded0aad52 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -30,11 +30,15 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result { meilisearch .set_max_index_size(opt.max_index_size.get_bytes() as usize) .set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize) + // snapshot .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) - .set_dump_dst(opt.dumps_dir.clone()) .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) - .set_snapshot_dir(opt.snapshot_dir.clone()); + .set_snapshot_dir(opt.snapshot_dir.clone()) + // dump + .set_ignore_missing_dump(opt.ignore_missing_dump) + .set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists) + .set_dump_dst(opt.dumps_dir.clone()); if let Some(ref path) = opt.import_snapshot { meilisearch.set_import_snapshot(path.clone()); diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 7abe9be55..520e659f0 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -112,14 +112,22 @@ pub struct Opt { #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h pub snapshot_interval_sec: u64, - /// Folder where dumps are created when the dump route is called. - #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] - pub dumps_dir: PathBuf, - /// Import a dump from the specified path, must be a `.dump` file. #[clap(long, conflicts_with = "import-snapshot")] pub import_dump: Option, + /// If the dump doesn't exists, load or create the database specified by `db-path` instead. + #[clap(long, requires = "import-dump")] + pub ignore_missing_dump: bool, + + /// Ignore the dump if a database already exists, and load that database instead. + #[clap(long, requires = "import-dump")] + pub ignore_dump_if_db_exists: bool, + + /// Folder where dumps are created when the dump route is called. + #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] + pub dumps_dir: PathBuf, + /// Set the log level #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] pub log_level: String, diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 409cacd66..bdaf75ac1 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -148,6 +148,8 @@ pub fn default_settings(dir: impl AsRef) -> Opt { schedule_snapshot: false, snapshot_interval_sec: 0, import_dump: None, + ignore_missing_dump: false, + ignore_dump_if_db_exists: false, indexer_options: IndexerOpts { // memory has to be unlimited because several meilisearch are running in test context. max_memory: MaxMemory::unlimited(), diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 9872d8254..2fcc34077 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -1,14 +1,16 @@ use std::fs::File; use std::path::{Path, PathBuf}; +use anyhow::bail; use chrono::{DateTime, Utc}; -use log::{info, trace, warn}; +use log::{info, trace}; use serde::{Deserialize, Serialize}; pub use actor::DumpActor; pub use handle_impl::*; use meilisearch_auth::AuthController; pub use message::DumpMsg; +use tempfile::TempDir; use tokio::fs::create_dir_all; use tokio::sync::oneshot; @@ -79,6 +81,47 @@ pub enum MetadataVersion { } impl MetadataVersion { + pub fn load_dump( + self, + src: impl AsRef, + dst: impl AsRef, + index_db_size: usize, + meta_env_size: usize, + indexing_options: &IndexerOpts, + ) -> anyhow::Result<()> { + match self { + MetadataVersion::V1(_meta) => { + anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") + } + MetadataVersion::V2(meta) => v2::load_dump( + meta, + src, + dst, + index_db_size, + meta_env_size, + indexing_options, + )?, + MetadataVersion::V3(meta) => v3::load_dump( + meta, + src, + dst, + index_db_size, + meta_env_size, + indexing_options, + )?, + MetadataVersion::V4(meta) => v4::load_dump( + meta, + src, + dst, + index_db_size, + meta_env_size, + indexing_options, + )?, + } + + Ok(()) + } + pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self { let meta = Metadata::new(index_db_size, update_db_size); Self::V4(meta) @@ -160,10 +203,46 @@ impl DumpInfo { pub fn load_dump( dst_path: impl AsRef, src_path: impl AsRef, + ignore_dump_if_db_exists: bool, + ignore_missing_dump: bool, index_db_size: usize, update_db_size: usize, indexer_opts: &IndexerOpts, ) -> anyhow::Result<()> { + let empty_db = crate::is_empty_db(&dst_path); + let src_path_exists = src_path.as_ref().exists(); + + if empty_db && src_path_exists { + let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?; + meta.load_dump( + tmp_src.path(), + tmp_dst.path(), + index_db_size, + update_db_size, + indexer_opts, + )?; + persist_dump(&dst_path, tmp_dst)?; + Ok(()) + } else if !empty_db && !ignore_dump_if_db_exists { + bail!( + "database already exists at {:?}, try to delete it or rename it", + dst_path + .as_ref() + .canonicalize() + .unwrap_or_else(|_| dst_path.as_ref().to_owned()) + ) + } else if !src_path_exists && !ignore_missing_dump { + bail!("dump doesn't exist at {:?}", src_path.as_ref()) + } else { + // there is nothing to do + Ok(()) + } +} + +fn extract_dump( + dst_path: impl AsRef, + src_path: impl AsRef, +) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> { // Setup a temp directory path in the same path as the database, to prevent cross devices // references. let temp_path = dst_path @@ -201,40 +280,14 @@ pub fn load_dump( meta.version() ); - match meta { - MetadataVersion::V1(_meta) => { - anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") - } - MetadataVersion::V2(meta) => v2::load_dump( - meta, - &tmp_src_path, - tmp_dst.path(), - index_db_size, - update_db_size, - indexer_opts, - )?, - MetadataVersion::V3(meta) => v3::load_dump( - meta, - &tmp_src_path, - tmp_dst.path(), - index_db_size, - update_db_size, - indexer_opts, - )?, - MetadataVersion::V4(meta) => v4::load_dump( - meta, - &tmp_src_path, - tmp_dst.path(), - index_db_size, - update_db_size, - indexer_opts, - )?, - } + Ok((tmp_src, tmp_dst, meta)) +} + +fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result<()> { let persisted_dump = tmp_dst.into_path(); // Delete everything in the `data.ms` except the tempdir. if dst_path.as_ref().exists() { - warn!("Overwriting database at {}", dst_path.as_ref().display()); for file in dst_path.as_ref().read_dir().unwrap() { let file = file.unwrap().path(); if file.file_name() == persisted_dump.file_name() { diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 3e3952058..8e163517a 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -150,6 +150,8 @@ pub struct IndexControllerBuilder { schedule_snapshot: bool, dump_src: Option, dump_dst: Option, + ignore_dump_if_db_exists: bool, + ignore_missing_dump: bool, } impl IndexControllerBuilder { @@ -186,6 +188,8 @@ impl IndexControllerBuilder { load_dump( db_path.as_ref(), src_path, + self.ignore_dump_if_db_exists, + self.ignore_missing_dump, index_size, task_store_size, &indexer_options, @@ -296,18 +300,6 @@ impl IndexControllerBuilder { self } - /// Set the index controller builder's dump src. - pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { - self.dump_src.replace(dump_src); - self - } - - /// Set the index controller builder's dump dst. - pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { - self.dump_dst.replace(dump_dst); - self - } - /// Set the index controller builder's import snapshot. pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { self.import_snapshot.replace(import_snapshot); @@ -325,6 +317,30 @@ impl IndexControllerBuilder { self.schedule_snapshot = true; self } + + /// Set the index controller builder's dump src. + pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { + self.dump_src.replace(dump_src); + self + } + + /// Set the index controller builder's dump dst. + pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { + self.dump_dst.replace(dump_dst); + self + } + + /// Set the index controller builder's ignore dump if db exists. + pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self { + self.ignore_dump_if_db_exists = ignore_dump_if_db_exists; + self + } + + /// Set the index controller builder's ignore missing dump. + pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self { + self.ignore_missing_dump = ignore_missing_dump; + self + } } impl IndexController diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index ed1942c94..21b8eb8c3 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -10,6 +10,8 @@ mod snapshot; pub mod tasks; mod update_file_store; +use std::path::Path; + pub use index_controller::MeiliSearch; pub use milli; @@ -33,3 +35,19 @@ impl EnvSizer for heed::Env { .fold(0, |acc, m| acc + m.len()) } } + +/// Check if a db is empty. It does not provide any information on the +/// validity of the data in it. +/// We consider a database as non empty when it's a non empty directory. +pub fn is_empty_db(db_path: impl AsRef) -> bool { + let db_path = db_path.as_ref(); + + if !db_path.exists() { + true + // if we encounter an error or if the db is a file we consider the db non empty + } else if let Ok(dir) = db_path.read_dir() { + dir.count() == 0 + } else { + true + } +} diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs index d35922a68..2e34d5427 100644 --- a/meilisearch-lib/src/snapshot.rs +++ b/meilisearch-lib/src/snapshot.rs @@ -49,7 +49,10 @@ pub fn load_snapshot( ignore_snapshot_if_db_exists: bool, ignore_missing_snapshot: bool, ) -> anyhow::Result<()> { - if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { + let empty_db = crate::is_empty_db(&db_path); + let snapshot_path_exists = snapshot_path.as_ref().exists(); + + if empty_db && snapshot_path_exists { match from_tar_gz(snapshot_path, &db_path) { Ok(()) => Ok(()), Err(e) => { @@ -58,7 +61,7 @@ pub fn load_snapshot( Err(e) } } - } else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists { + } else if !empty_db && !ignore_snapshot_if_db_exists { bail!( "database already exists at {:?}, try to delete it or rename it", db_path @@ -66,14 +69,8 @@ pub fn load_snapshot( .canonicalize() .unwrap_or_else(|_| db_path.as_ref().to_owned()) ) - } else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot { - bail!( - "snapshot doesn't exist at {:?}", - snapshot_path - .as_ref() - .canonicalize() - .unwrap_or_else(|_| snapshot_path.as_ref().to_owned()) - ) + } else if !snapshot_path_exists && !ignore_missing_snapshot { + bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref()) } else { Ok(()) }