mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 11:17:28 +01:00
Merge #2098
2098: feat(dump): Provide the same cli options as the snapshots r=MarinPostma a=irevoire Add two cli options for the dump: - `--ignore-missing-dump` - `--ignore-dump-if-db-exists` Fix #2087 Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
c8bb54cd94
@ -30,11 +30,15 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
||||
meilisearch
|
||||
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
|
||||
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
|
||||
// snapshot
|
||||
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
|
||||
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
||||
.set_dump_dst(opt.dumps_dir.clone())
|
||||
.set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec))
|
||||
.set_snapshot_dir(opt.snapshot_dir.clone());
|
||||
.set_snapshot_dir(opt.snapshot_dir.clone())
|
||||
// dump
|
||||
.set_ignore_missing_dump(opt.ignore_missing_dump)
|
||||
.set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists)
|
||||
.set_dump_dst(opt.dumps_dir.clone());
|
||||
|
||||
if let Some(ref path) = opt.import_snapshot {
|
||||
meilisearch.set_import_snapshot(path.clone());
|
||||
|
@ -124,14 +124,22 @@ pub struct Opt {
|
||||
#[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
|
||||
pub snapshot_interval_sec: u64,
|
||||
|
||||
/// Folder where dumps are created when the dump route is called.
|
||||
#[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
|
||||
pub dumps_dir: PathBuf,
|
||||
|
||||
/// Import a dump from the specified path, must be a `.dump` file.
|
||||
#[clap(long, conflicts_with = "import-snapshot")]
|
||||
pub import_dump: Option<PathBuf>,
|
||||
|
||||
/// If the dump doesn't exists, load or create the database specified by `db-path` instead.
|
||||
#[clap(long, requires = "import-dump")]
|
||||
pub ignore_missing_dump: bool,
|
||||
|
||||
/// Ignore the dump if a database already exists, and load that database instead.
|
||||
#[clap(long, requires = "import-dump")]
|
||||
pub ignore_dump_if_db_exists: bool,
|
||||
|
||||
/// Folder where dumps are created when the dump route is called.
|
||||
#[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
|
||||
pub dumps_dir: PathBuf,
|
||||
|
||||
/// Set the log level
|
||||
#[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
|
||||
pub log_level: String,
|
||||
|
@ -148,6 +148,8 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
schedule_snapshot: false,
|
||||
snapshot_interval_sec: 0,
|
||||
import_dump: None,
|
||||
ignore_missing_dump: false,
|
||||
ignore_dump_if_db_exists: false,
|
||||
indexer_options: IndexerOpts {
|
||||
// memory has to be unlimited because several meilisearch are running in test context.
|
||||
max_memory: MaxMemory::unlimited(),
|
||||
|
@ -1,14 +1,16 @@
|
||||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::bail;
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::{info, trace, warn};
|
||||
use log::{info, trace};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use actor::DumpActor;
|
||||
pub use handle_impl::*;
|
||||
use meilisearch_auth::AuthController;
|
||||
pub use message::DumpMsg;
|
||||
use tempfile::TempDir;
|
||||
use tokio::fs::create_dir_all;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
@ -79,6 +81,47 @@ pub enum MetadataVersion {
|
||||
}
|
||||
|
||||
impl MetadataVersion {
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
meta_env_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
match self {
|
||||
MetadataVersion::V1(_meta) => {
|
||||
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
|
||||
}
|
||||
MetadataVersion::V2(meta) => v2::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)?,
|
||||
MetadataVersion::V3(meta) => v3::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)?,
|
||||
MetadataVersion::V4(meta) => v4::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)?,
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
let meta = Metadata::new(index_db_size, update_db_size);
|
||||
Self::V4(meta)
|
||||
@ -160,10 +203,46 @@ impl DumpInfo {
|
||||
pub fn load_dump(
|
||||
dst_path: impl AsRef<Path>,
|
||||
src_path: impl AsRef<Path>,
|
||||
ignore_dump_if_db_exists: bool,
|
||||
ignore_missing_dump: bool,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
let empty_db = crate::is_empty_db(&dst_path);
|
||||
let src_path_exists = src_path.as_ref().exists();
|
||||
|
||||
if empty_db && src_path_exists {
|
||||
let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?;
|
||||
meta.load_dump(
|
||||
tmp_src.path(),
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?;
|
||||
persist_dump(&dst_path, tmp_dst)?;
|
||||
Ok(())
|
||||
} else if !empty_db && !ignore_dump_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
dst_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| dst_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !src_path_exists && !ignore_missing_dump {
|
||||
bail!("dump doesn't exist at {:?}", src_path.as_ref())
|
||||
} else {
|
||||
// there is nothing to do
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_dump(
|
||||
dst_path: impl AsRef<Path>,
|
||||
src_path: impl AsRef<Path>,
|
||||
) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> {
|
||||
// Setup a temp directory path in the same path as the database, to prevent cross devices
|
||||
// references.
|
||||
let temp_path = dst_path
|
||||
@ -201,40 +280,14 @@ pub fn load_dump(
|
||||
meta.version()
|
||||
);
|
||||
|
||||
match meta {
|
||||
MetadataVersion::V1(_meta) => {
|
||||
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
|
||||
}
|
||||
MetadataVersion::V2(meta) => v2::load_dump(
|
||||
meta,
|
||||
&tmp_src_path,
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?,
|
||||
MetadataVersion::V3(meta) => v3::load_dump(
|
||||
meta,
|
||||
&tmp_src_path,
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?,
|
||||
MetadataVersion::V4(meta) => v4::load_dump(
|
||||
meta,
|
||||
&tmp_src_path,
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?,
|
||||
}
|
||||
Ok((tmp_src, tmp_dst, meta))
|
||||
}
|
||||
|
||||
fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<()> {
|
||||
let persisted_dump = tmp_dst.into_path();
|
||||
|
||||
// Delete everything in the `data.ms` except the tempdir.
|
||||
if dst_path.as_ref().exists() {
|
||||
warn!("Overwriting database at {}", dst_path.as_ref().display());
|
||||
for file in dst_path.as_ref().read_dir().unwrap() {
|
||||
let file = file.unwrap().path();
|
||||
if file.file_name() == persisted_dump.file_name() {
|
||||
|
@ -150,6 +150,8 @@ pub struct IndexControllerBuilder {
|
||||
schedule_snapshot: bool,
|
||||
dump_src: Option<PathBuf>,
|
||||
dump_dst: Option<PathBuf>,
|
||||
ignore_dump_if_db_exists: bool,
|
||||
ignore_missing_dump: bool,
|
||||
}
|
||||
|
||||
impl IndexControllerBuilder {
|
||||
@ -186,6 +188,8 @@ impl IndexControllerBuilder {
|
||||
load_dump(
|
||||
db_path.as_ref(),
|
||||
src_path,
|
||||
self.ignore_dump_if_db_exists,
|
||||
self.ignore_missing_dump,
|
||||
index_size,
|
||||
task_store_size,
|
||||
&indexer_options,
|
||||
@ -296,18 +300,6 @@ impl IndexControllerBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump src.
|
||||
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
|
||||
self.dump_src.replace(dump_src);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump dst.
|
||||
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
|
||||
self.dump_dst.replace(dump_dst);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's import snapshot.
|
||||
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
|
||||
self.import_snapshot.replace(import_snapshot);
|
||||
@ -325,6 +317,30 @@ impl IndexControllerBuilder {
|
||||
self.schedule_snapshot = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump src.
|
||||
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
|
||||
self.dump_src.replace(dump_src);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump dst.
|
||||
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
|
||||
self.dump_dst.replace(dump_dst);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore dump if db exists.
|
||||
pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self {
|
||||
self.ignore_dump_if_db_exists = ignore_dump_if_db_exists;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore missing dump.
|
||||
pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self {
|
||||
self.ignore_missing_dump = ignore_missing_dump;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> IndexController<U, I>
|
||||
|
@ -10,6 +10,8 @@ mod snapshot;
|
||||
pub mod tasks;
|
||||
mod update_file_store;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
pub use index_controller::MeiliSearch;
|
||||
|
||||
pub use milli;
|
||||
@ -33,3 +35,19 @@ impl EnvSizer for heed::Env {
|
||||
.fold(0, |acc, m| acc + m.len())
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if a db is empty. It does not provide any information on the
|
||||
/// validity of the data in it.
|
||||
/// We consider a database as non empty when it's a non empty directory.
|
||||
pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
let db_path = db_path.as_ref();
|
||||
|
||||
if !db_path.exists() {
|
||||
true
|
||||
// if we encounter an error or if the db is a file we consider the db non empty
|
||||
} else if let Ok(dir) = db_path.read_dir() {
|
||||
dir.count() == 0
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
@ -49,7 +49,10 @@ pub fn load_snapshot(
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
|
||||
let empty_db = crate::is_empty_db(&db_path);
|
||||
let snapshot_path_exists = snapshot_path.as_ref().exists();
|
||||
|
||||
if empty_db && snapshot_path_exists {
|
||||
match from_tar_gz(snapshot_path, &db_path) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
@ -58,7 +61,7 @@ pub fn load_snapshot(
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
|
||||
} else if !empty_db && !ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
db_path
|
||||
@ -66,14 +69,8 @@ pub fn load_snapshot(
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
|
||||
bail!(
|
||||
"snapshot doesn't exist at {:?}",
|
||||
snapshot_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !snapshot_path_exists && !ignore_missing_snapshot {
|
||||
bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user