From 60c3bac108cff401512f81ff8972ca24a071ba45 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 14 Dec 2022 14:33:43 +0100 Subject: [PATCH 1/5] Bump milli to v0.37.3 --- Cargo.lock | 16 ++++++++-------- meilisearch-types/Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dfc9818aa..bd24f7569 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1332,8 +1332,8 @@ dependencies = [ [[package]] name = "filter-parser" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "nom", "nom_locate", @@ -1351,8 +1351,8 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "serde_json", ] @@ -1898,8 +1898,8 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "serde_json", ] @@ -2418,8 +2418,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.37.2" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b" +version = "0.37.3" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 787737edb..f265d442b 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -13,7 +13,7 @@ enum-iterator = "1.1.3" flate2 = "1.0.24" fst = "0.4.7" memmap2 = "0.5.7" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.2", default-features = false } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.3", default-features = false } proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } roaring = { version = "0.10.0", features = ["serde"] } From fbbc6eaecaf71b6909de96e6af538a77cdc547f8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 13 Dec 2022 16:33:07 +0100 Subject: [PATCH 2/5] Fix the import of dumps and snapshot. Some flags were badly applied + the database wrongly deleted when they shouldn't --- meilisearch/src/lib.rs | 125 ++++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 58 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index b11f063d2..89b944dde 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -108,75 +108,43 @@ pub fn create_app( .wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim)) } -// TODO: TAMO: Finish setting up things +enum OnFailure { + RemoveDb, + KeepDb, +} + pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, AuthController)> { - // we don't want to create anything in the data.ms yet, thus we - // wrap our two builders in a closure that'll be executed later. - let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key); - let index_scheduler_builder = || { - IndexScheduler::new(IndexSchedulerOptions { - version_file_path: opt.db_path.join(VERSION_FILE_NAME), - auth_path: opt.db_path.join("auth"), - tasks_path: opt.db_path.join("tasks"), - update_file_path: opt.db_path.join("update_files"), - indexes_path: opt.db_path.join("indexes"), - snapshots_path: opt.snapshot_dir.clone(), - dumps_path: opt.dump_dir.clone(), - task_db_size: opt.max_task_db_size.get_bytes() as usize, - index_size: opt.max_index_size.get_bytes() as usize, - indexer_config: (&opt.indexer_options).try_into()?, - autobatching_enabled: !opt.scheduler_options.disable_auto_batching, - }) - }; - - enum OnFailure { - RemoveDb, - KeepDb, - } - - let meilisearch_builder = |on_failure: OnFailure| -> anyhow::Result<_> { - // if anything wrong happens we delete the `data.ms` entirely. - match ( - index_scheduler_builder().map_err(anyhow::Error::from), - auth_controller_builder().map_err(anyhow::Error::from), - create_version_file(&opt.db_path).map_err(anyhow::Error::from), - ) { - (Ok(i), Ok(a), Ok(())) => Ok((i, a)), - (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { - if matches!(on_failure, OnFailure::RemoveDb) { - std::fs::remove_dir_all(&opt.db_path)?; - } - Err(e) - } - } - }; - let empty_db = is_empty_db(&opt.db_path); let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot { let snapshot_path_exists = snapshot_path.exists(); + // the db is empty and the snapshot exists, import it if empty_db && snapshot_path_exists { match compression::from_tar_gz(snapshot_path, &opt.db_path) { - Ok(()) => meilisearch_builder(OnFailure::RemoveDb)?, + Ok(()) => start_new_database(opt, OnFailure::RemoveDb)?, Err(e) => { std::fs::remove_dir_all(&opt.db_path)?; return Err(e); } } + // the db already exists and we should not ignore the snapshot => throw an error } else if !empty_db && !opt.ignore_snapshot_if_db_exists { bail!( "database already exists at {:?}, try to delete it or rename it", opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) ) + // the snapshot doesn't exists and we can't ignore it => throw an error } else if !snapshot_path_exists && !opt.ignore_missing_snapshot { bail!("snapshot doesn't exist at {}", snapshot_path.display()) + // the snapshot and the db exists, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag } else { - meilisearch_builder(OnFailure::RemoveDb)? + start_or_import_existing_database(opt, empty_db)? } } else if let Some(ref path) = opt.import_dump { let src_path_exists = path.exists(); + // the db is empty and the dump exists, import it if empty_db && src_path_exists { let (mut index_scheduler, mut auth_controller) = - meilisearch_builder(OnFailure::RemoveDb)?; + start_new_database(opt, OnFailure::RemoveDb)?; match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { Ok(()) => (index_scheduler, auth_controller), Err(e) => { @@ -184,29 +152,21 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth return Err(e); } } + // the db already exists and we should not ignore the dump option => throw an error } else if !empty_db && !opt.ignore_dump_if_db_exists { bail!( "database already exists at {:?}, try to delete it or rename it", opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) ) + // the dump doesn't exists and we can't ignore it => throw an error } else if !src_path_exists && !opt.ignore_missing_dump { bail!("dump doesn't exist at {:?}", path) + // the dump and the db exists and we can ignore the dump because of the ignore_dump_if_db_exists flag } else { - let (mut index_scheduler, mut auth_controller) = - meilisearch_builder(OnFailure::RemoveDb)?; - match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { - Ok(()) => (index_scheduler, auth_controller), - Err(e) => { - std::fs::remove_dir_all(&opt.db_path)?; - return Err(e); - } - } + start_or_import_existing_database(opt, empty_db)? } } else { - if !empty_db { - check_version_file(&opt.db_path)?; - } - meilisearch_builder(OnFailure::KeepDb)? + start_or_import_existing_database(opt, empty_db)? }; // We create a loop in a thread that registers snapshotCreation tasks @@ -228,6 +188,55 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth Ok((index_scheduler, auth_controller)) } +fn start_new_database( + opt: &Opt, + on_failure: OnFailure, +) -> anyhow::Result<(IndexScheduler, AuthController)> { + // we don't want to create anything in the data.ms yet, thus we + // wrap our two builders in a closure that'll be executed later. + let auth_controller = AuthController::new(&opt.db_path, &opt.master_key); + let index_scheduler_builder = || -> anyhow::Result<_> { + Ok(IndexScheduler::new(IndexSchedulerOptions { + version_file_path: opt.db_path.join(VERSION_FILE_NAME), + auth_path: opt.db_path.join("auth"), + tasks_path: opt.db_path.join("tasks"), + update_file_path: opt.db_path.join("update_files"), + indexes_path: opt.db_path.join("indexes"), + snapshots_path: opt.snapshot_dir.clone(), + dumps_path: opt.dumps_dir.clone(), + task_db_size: opt.max_task_db_size.get_bytes() as usize, + index_size: opt.max_index_size.get_bytes() as usize, + indexer_config: (&opt.indexer_options).try_into()?, + autobatching_enabled: !opt.scheduler_options.disable_auto_batching, + })?) + }; + + match ( + index_scheduler_builder(), + auth_controller.map_err(anyhow::Error::from), + create_version_file(&opt.db_path).map_err(anyhow::Error::from), + ) { + (Ok(i), Ok(a), Ok(())) => Ok((i, a)), + (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { + if matches!(on_failure, OnFailure::RemoveDb) { + std::fs::remove_dir_all(&opt.db_path)?; + } + Err(e) + } + } +} + +fn start_or_import_existing_database( + opt: &Opt, + empty_db: bool, +) -> anyhow::Result<(IndexScheduler, AuthController)> { + if !empty_db { + check_version_file(&opt.db_path)?; + } + + start_new_database(opt, OnFailure::KeepDb) +} + fn import_dump( db_path: &Path, dump_path: &Path, From 6c0b8edab5ec82360f1c01159203ed9124bb06fd Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 13 Dec 2022 17:02:07 +0100 Subject: [PATCH 3/5] Fix typos Co-authored-by: Louis Dureuil --- meilisearch/src/lib.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 89b944dde..c2cd4bbf0 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -132,10 +132,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth "database already exists at {:?}, try to delete it or rename it", opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) ) - // the snapshot doesn't exists and we can't ignore it => throw an error + // the snapshot doesn't exist and we can't ignore it => throw an error } else if !snapshot_path_exists && !opt.ignore_missing_snapshot { bail!("snapshot doesn't exist at {}", snapshot_path.display()) - // the snapshot and the db exists, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag + // the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag } else { start_or_import_existing_database(opt, empty_db)? } @@ -158,10 +158,11 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth "database already exists at {:?}, try to delete it or rename it", opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) ) - // the dump doesn't exists and we can't ignore it => throw an error + // the dump doesn't exist and we can't ignore it => throw an error } else if !src_path_exists && !opt.ignore_missing_dump { bail!("dump doesn't exist at {:?}", path) - // the dump and the db exists and we can ignore the dump because of the ignore_dump_if_db_exists flag + // the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag + // or, the dump is missing but we can ignore that because of the ignore_missing_dump flag } else { start_or_import_existing_database(opt, empty_db)? } From d66bb3a53f5c07dfca1e44e7d077acee907c5330 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 13 Dec 2022 17:25:49 +0100 Subject: [PATCH 4/5] rename the two new functions --- meilisearch/src/lib.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index c2cd4bbf0..ee88c7f7d 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -120,7 +120,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth // the db is empty and the snapshot exists, import it if empty_db && snapshot_path_exists { match compression::from_tar_gz(snapshot_path, &opt.db_path) { - Ok(()) => start_new_database(opt, OnFailure::RemoveDb)?, + Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?, Err(e) => { std::fs::remove_dir_all(&opt.db_path)?; return Err(e); @@ -137,14 +137,14 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth bail!("snapshot doesn't exist at {}", snapshot_path.display()) // the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag } else { - start_or_import_existing_database(opt, empty_db)? + open_or_create_database(opt, empty_db)? } } else if let Some(ref path) = opt.import_dump { let src_path_exists = path.exists(); // the db is empty and the dump exists, import it if empty_db && src_path_exists { let (mut index_scheduler, mut auth_controller) = - start_new_database(opt, OnFailure::RemoveDb)?; + open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?; match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { Ok(()) => (index_scheduler, auth_controller), Err(e) => { @@ -164,10 +164,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth // the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag // or, the dump is missing but we can ignore that because of the ignore_missing_dump flag } else { - start_or_import_existing_database(opt, empty_db)? + open_or_create_database(opt, empty_db)? } } else { - start_or_import_existing_database(opt, empty_db)? + open_or_create_database(opt, empty_db)? }; // We create a loop in a thread that registers snapshotCreation tasks @@ -189,7 +189,8 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Auth Ok((index_scheduler, auth_controller)) } -fn start_new_database( +/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything. +fn open_or_create_database_unchecked( opt: &Opt, on_failure: OnFailure, ) -> anyhow::Result<(IndexScheduler, AuthController)> { @@ -227,7 +228,8 @@ fn start_new_database( } } -fn start_or_import_existing_database( +/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you. +fn open_or_create_database( opt: &Opt, empty_db: bool, ) -> anyhow::Result<(IndexScheduler, AuthController)> { @@ -235,7 +237,7 @@ fn start_or_import_existing_database( check_version_file(&opt.db_path)?; } - start_new_database(opt, OnFailure::KeepDb) + open_or_create_database_unchecked(opt, OnFailure::KeepDb) } fn import_dump( From ce84a598734599c26fdc84c8ad88148972b6aab1 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 14 Dec 2022 20:02:39 +0100 Subject: [PATCH 5/5] Re-apply some changes from #3132 --- meilisearch/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index ee88c7f7d..6d18942a4 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -205,7 +205,7 @@ fn open_or_create_database_unchecked( update_file_path: opt.db_path.join("update_files"), indexes_path: opt.db_path.join("indexes"), snapshots_path: opt.snapshot_dir.clone(), - dumps_path: opt.dumps_dir.clone(), + dumps_path: opt.dump_dir.clone(), task_db_size: opt.max_task_db_size.get_bytes() as usize, index_size: opt.max_index_size.get_bytes() as usize, indexer_config: (&opt.indexer_options).try_into()?,