Add an experimental cli flag to disable snapshot compaction

This commit is contained in:
Tamo 2025-05-14 15:50:05 +02:00
parent d9a527854a
commit 83e71cd7b9
7 changed files with 35 additions and 3 deletions

View File

@ -131,6 +131,8 @@ pub struct IndexSchedulerOptions {
///
/// 0 disables the cache.
pub embedding_cache_cap: usize,
/// Snapshot compaction status.
pub experimental_no_snapshot_compaction: bool,
}
/// Structure which holds meilisearch's indexes and schedules the tasks

View File

@ -83,6 +83,9 @@ pub struct Scheduler {
///
/// 0 disables the cache.
pub(crate) embedding_cache_cap: usize,
/// Snapshot compaction status.
pub(crate) experimental_no_snapshot_compaction: bool,
}
impl Scheduler {
@ -98,6 +101,7 @@ impl Scheduler {
auth_env: self.auth_env.clone(),
version_file_path: self.version_file_path.clone(),
embedding_cache_cap: self.embedding_cache_cap,
experimental_no_snapshot_compaction: self.experimental_no_snapshot_compaction,
}
}
@ -114,6 +118,7 @@ impl Scheduler {
auth_env,
version_file_path: options.version_file_path.clone(),
embedding_cache_cap: options.embedding_cache_cap,
experimental_no_snapshot_compaction: options.experimental_no_snapshot_compaction,
}
}
}

View File

@ -41,7 +41,12 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?;
let compaction_option = if self.scheduler.experimental_no_snapshot_compaction {
CompactionOption::Disabled
} else {
CompactionOption::Enabled
};
self.env.copy_to_path(dst.join("data.mdb"), compaction_option)?;
// 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?;
@ -80,7 +85,7 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;
index
.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)
.copy_to_path(dst.join("data.mdb"), compaction_option)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
}
@ -90,7 +95,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?;
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?;
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), compaction_option)?;
// 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);

View File

@ -113,6 +113,7 @@ impl IndexScheduler {
instance_features: Default::default(),
auto_upgrade: true, // Don't cost much and will ensure the happy path works
embedding_cache_cap: 10,
experimental_no_snapshot_compaction: false,
};
let version = configuration(&mut options).unwrap_or({
(versioning::VERSION_MAJOR, versioning::VERSION_MINOR, versioning::VERSION_PATCH)

View File

@ -200,6 +200,7 @@ struct Infos {
experimental_get_task_documents_route: bool,
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
experimental_no_snapshot_compaction: bool,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
@ -248,6 +249,7 @@ impl Infos {
experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
http_addr,
master_key: _,
env,
@ -315,6 +317,7 @@ impl Infos {
experimental_get_task_documents_route: get_task_documents_route,
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),

View File

@ -234,6 +234,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
instance_features: opt.to_instance_features(),
auto_upgrade: opt.experimental_dumpless_upgrade,
embedding_cache_cap: opt.experimental_embedding_cache_entries,
experimental_no_snapshot_compaction: opt.experimental_no_snapshot_compaction,
};
let binary_version = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);

View File

@ -65,6 +65,7 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@ -455,6 +456,15 @@ pub struct Opt {
#[serde(default = "default_embedding_cache_entries")]
pub experimental_embedding_cache_entries: usize,
/// Experimental no snapshot compaction feature.
///
/// When enabled, Meilisearch will not compact snapshots during creation.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/833>.
#[clap(long, env = MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION)]
#[serde(default)]
pub experimental_no_snapshot_compaction: bool,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@ -559,6 +569,7 @@ impl Opt {
experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -655,6 +666,10 @@ impl Opt {
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
experimental_embedding_cache_entries.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
experimental_no_snapshot_compaction.to_string(),
);
indexer_options.export_to_env();
}