use std::convert::TryFrom; use std::env::VarError; use std::ffi::OsStr; use std::fmt::Display; use std::io::{BufReader, Read}; use std::num::ParseIntError; use std::ops::Deref; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; use std::{env, fmt, fs}; use byte_unit::{Byte, ByteError}; use clap::Parser; use meilisearch_types::features::InstanceTogglableFeatures; use meilisearch_types::milli::update::IndexerConfig; use rustls::server::{ AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache, }; use rustls::RootCertStore; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use serde::{Deserialize, Serialize}; use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use url::Url; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; const MEILI_DB_PATH: &str = "MEILI_DB_PATH"; const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR"; const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; const MEILI_ENV: &str = "MEILI_ENV"; const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL"; const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER"; #[cfg(feature = "analytics")] const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH"; const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH"; const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH"; const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH"; const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH"; const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION"; const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS"; const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT"; const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT"; const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS"; const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR"; const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT"; const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP"; const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_HA_PARAMETERS: &str = "MEILI_EXPERIMENTAL_HA_PARAMETERS"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; const DEFAULT_ENV: &str = "development"; const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB"; const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/"; const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400; const DEFAULT_SNAPSHOT_INTERVAL_SEC_STR: &str = "86400"; const DEFAULT_DUMP_DIR: &str = "dumps/"; const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; const DEFAULT_LOG_EVERY_N: usize = 100_000; // Each environment (index and task-db) is taking space in the virtual address space. // Ideally, indexes can occupy 2TiB each to avoid having to manually resize them. // The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be // opened simultaneously. pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB pub const TASK_DB_SIZE: u64 = 20 * 1024 * 1024 * 1024; // 20 GiB #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] #[serde(rename_all = "UPPERCASE")] pub enum LogLevel { Off, Error, Warn, #[default] Info, Debug, Trace, } #[derive(Debug)] pub struct LogLevelError { pub given_log_level: String, } impl Display for LogLevelError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, "Log level '{}' is invalid. Accepted values are 'OFF', 'ERROR', 'WARN', 'INFO', 'DEBUG', and 'TRACE'.", self.given_log_level ) } } impl Display for LogLevel { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { LogLevel::Off => Display::fmt("OFF", f), LogLevel::Error => Display::fmt("ERROR", f), LogLevel::Warn => Display::fmt("WARN", f), LogLevel::Info => Display::fmt("INFO", f), LogLevel::Debug => Display::fmt("DEBUG", f), LogLevel::Trace => Display::fmt("TRACE", f), } } } impl std::error::Error for LogLevelError {} impl FromStr for LogLevel { type Err = LogLevelError; fn from_str(s: &str) -> Result { match s.trim().to_lowercase().as_str() { "off" => Ok(LogLevel::Off), "error" => Ok(LogLevel::Error), "warn" => Ok(LogLevel::Warn), "info" => Ok(LogLevel::Info), "debug" => Ok(LogLevel::Debug), "trace" => Ok(LogLevel::Trace), _ => Err(LogLevelError { given_log_level: s.to_owned() }), } } } #[derive(Debug, Clone, Parser, Deserialize)] #[clap(version, next_display_order = None)] #[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct Opt { /// Designates the location where database files will be created and retrieved. #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())] #[serde(default = "default_db_path")] pub db_path: PathBuf, /// Sets the HTTP address and port Meilisearch will use. #[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())] #[serde(default = "default_http_addr")] pub http_addr: String, /// Sets the instance's master key, automatically protecting all routes except `GET /health`. #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, /// Configures the instance's environment. Value must be either `production` or `development`. #[clap(long, env = MEILI_ENV, default_value_t = default_env(), value_parser = POSSIBLE_ENV)] #[serde(default = "default_env")] pub env: String, /// Called whenever a task finishes so a third party can be notified. #[clap(long, env = MEILI_TASK_WEBHOOK_URL)] pub task_webhook_url: Option, /// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified. #[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)] pub task_webhook_authorization_header: Option, /// Deactivates Meilisearch's built-in telemetry when provided. /// /// Meilisearch automatically collects data from all instances that do not opt out using this flag. /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted /// at any time. #[cfg(feature = "analytics")] #[serde(default)] // we can't send true #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, /// Sets the maximum size of the index. Value must be given in bytes or explicitly stating a base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). #[clap(skip = default_max_index_size())] #[serde(skip, default = "default_max_index_size")] pub max_index_size: Byte, /// Sets the maximum size of the task database. Value must be given in bytes or explicitly stating a /// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). #[clap(skip = default_max_task_db_size())] #[serde(skip, default = "default_max_task_db_size")] pub max_task_db_size: Byte, /// Sets the maximum size of accepted payloads. Value must be given in bytes or explicitly stating a /// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())] #[serde(default = "default_http_payload_size_limit")] pub http_payload_size_limit: Byte, /// Sets the server's SSL certificates. #[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)] pub ssl_cert_path: Option, /// Sets the server's SSL key files. #[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)] pub ssl_key_path: Option, /// Enables client authentication in the specified path. #[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)] pub ssl_auth_path: Option, /// Sets the server's OCSP file. *Optional* /// /// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate. #[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)] pub ssl_ocsp_path: Option, /// Makes SSL authentication mandatory. #[serde(default)] #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] pub ssl_require_auth: bool, /// Activates SSL session resumption. #[serde(default)] #[clap(long, env = MEILI_SSL_RESUMPTION)] pub ssl_resumption: bool, /// Activates SSL tickets. #[serde(default)] #[clap(long, env = MEILI_SSL_TICKETS)] pub ssl_tickets: bool, /// Launches Meilisearch after importing a previously-generated snapshot at the given filepath. #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, /// Prevents a Meilisearch instance from throwing an error when `--import-snapshot` /// does not point to a valid snapshot file. /// /// This command will throw an error if `--import-snapshot` is not defined. #[clap( long, env = MEILI_IGNORE_MISSING_SNAPSHOT, requires = "import_snapshot" )] #[serde(default)] pub ignore_missing_snapshot: bool, /// Prevents a Meilisearch instance with an existing database from throwing an /// error when using `--import-snapshot`. Instead, the snapshot will be ignored /// and Meilisearch will launch using the existing database. /// /// This command will throw an error if `--import-snapshot` is not defined. #[clap( long, env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS, requires = "import_snapshot" )] #[serde(default)] pub ignore_snapshot_if_db_exists: bool, /// Sets the directory where Meilisearch will store snapshots. #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())] #[serde(default = "default_snapshot_dir")] pub snapshot_dir: PathBuf, /// Activates scheduled snapshots when provided. Snapshots are disabled by default. /// /// When provided with a value, defines the interval between each snapshot, in seconds. #[clap(long,env = MEILI_SCHEDULE_SNAPSHOT, num_args(0..=1), value_parser=parse_schedule_snapshot, default_value_t, default_missing_value=default_snapshot_interval_sec(), value_name = "SNAPSHOT_INTERVAL_SEC")] #[serde(default, deserialize_with = "schedule_snapshot_deserialize")] pub schedule_snapshot: ScheduleSnapshot, /// Imports the dump file located at the specified path. Path must point to a `.dump` file. /// If a database already exists, Meilisearch will throw an error and abort launch. #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import_snapshot")] pub import_dump: Option, /// Prevents Meilisearch from throwing an error when `--import-dump` does not point to /// a valid dump file. Instead, Meilisearch will start normally without importing any dump. /// /// This option will trigger an error if `--import-dump` is not defined. #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import_dump")] #[serde(default)] pub ignore_missing_dump: bool, /// Prevents a Meilisearch instance with an existing database from throwing an error /// when using `--import-dump`. Instead, the dump will be ignored and Meilisearch will /// launch using the existing database. /// /// This option will trigger an error if `--import-dump` is not defined. #[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import_dump")] #[serde(default)] pub ignore_dump_if_db_exists: bool, /// Sets the directory where Meilisearch will create dump files. #[clap(long, env = MEILI_DUMP_DIR, default_value_os_t = default_dump_dir())] #[serde(default = "default_dump_dir")] pub dump_dir: PathBuf, /// Defines how much detail should be present in Meilisearch's logs. /// /// Meilisearch currently supports six log levels, listed in order of increasing verbosity: OFF, ERROR, WARN, INFO, DEBUG, TRACE. #[clap(long, env = MEILI_LOG_LEVEL, default_value_t)] #[serde(default)] pub log_level: LogLevel, /// Experimental metrics feature. For more information, see: /// /// Enables the Prometheus metrics on the `GET /metrics` endpoint. #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_METRICS)] #[serde(default)] pub experimental_enable_metrics: bool, /// Experimental logs route feature. For more information, see: /// /// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs. #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)] #[serde(default)] pub experimental_enable_logs_route: bool, /// Enable multiple features that helps you to run meilisearch in a high availability context. /// TODO: TAMO: Update the discussion link /// For more information, see: /// /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now /// - Lets you specify a custom task ID upon registering a task /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed) #[clap(long, env = MEILI_EXPERIMENTAL_HA_PARAMETERS)] #[serde(default)] pub experimental_ha_parameters: bool, /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] pub experimental_reduce_indexing_memory_usage: bool, /// Experimentally reduces the maximum number of tasks that will be processed at once, see: #[clap(long, env = MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())] #[serde(default = "default_limit_batched_tasks")] pub experimental_max_number_of_batched_tasks: usize, #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, /// Set the path to a configuration file that should be used to setup the engine. /// Format must be TOML. #[clap(long)] pub config_file_path: Option, } impl Opt { /// Whether analytics should be enabled or not. #[cfg(all(not(debug_assertions), feature = "analytics"))] pub fn analytics(&self) -> bool { !self.no_analytics } /// Build a new Opt from config file, env vars and cli args. pub fn try_build() -> anyhow::Result<(Self, Option)> { // Parse the args to get the config_file_path. let mut opts = Opt::parse(); let mut config_read_from = None; let user_specified_config_file_path = opts .config_file_path .clone() .or_else(|| env::var("MEILI_CONFIG_FILE_PATH").map(PathBuf::from).ok()); let config_file_path = user_specified_config_file_path .clone() .unwrap_or_else(|| PathBuf::from(DEFAULT_CONFIG_FILE_PATH)); match std::fs::read_to_string(&config_file_path) { Ok(config) => { // If the file is successfully read, we deserialize it with `toml`. let opt_from_config = toml::from_str::(&config)?; // Return an error if config file contains 'config_file_path' // Using that key in the config file doesn't make sense bc it creates a logical loop (config file referencing itself) if opt_from_config.config_file_path.is_some() { anyhow::bail!("`config_file_path` is not supported in the configuration file") } // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. opt_from_config.export_to_env(); // Once injected we parse the cli args once again to take the new env vars into scope. opts = Opt::parse(); config_read_from = Some(config_file_path); } Err(e) => { if let Some(path) = user_specified_config_file_path { // If we have an error while reading the file defined by the user. anyhow::bail!( "unable to open or read the {:?} configuration file: {}.", path, e, ) } } } Ok((opts, config_read_from)) } /// Exports the opts values to their corresponding env vars if they are not set. fn export_to_env(self) { let Opt { db_path, http_addr, master_key, env, task_webhook_url, task_webhook_authorization_header, max_index_size: _, max_task_db_size: _, http_payload_size_limit, experimental_max_number_of_batched_tasks, ssl_cert_path, ssl_key_path, ssl_auth_path, ssl_ocsp_path, ssl_require_auth, ssl_resumption, ssl_tickets, snapshot_dir, schedule_snapshot, dump_dir, log_level, indexer_options, import_snapshot: _, ignore_missing_snapshot: _, ignore_snapshot_if_db_exists: _, import_dump: _, ignore_missing_dump: _, ignore_dump_if_db_exists: _, config_file_path: _, #[cfg(feature = "analytics")] no_analytics, experimental_enable_metrics, experimental_enable_logs_route, experimental_ha_parameters, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); if let Some(master_key) = master_key { export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); } export_to_env_if_not_present(MEILI_ENV, env); if let Some(task_webhook_url) = task_webhook_url { export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string()); } if let Some(task_webhook_authorization_header) = task_webhook_authorization_header { export_to_env_if_not_present( MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER, task_webhook_authorization_header, ); } #[cfg(feature = "analytics")] { export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); } export_to_env_if_not_present( MEILI_HTTP_PAYLOAD_SIZE_LIMIT, http_payload_size_limit.to_string(), ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, experimental_max_number_of_batched_tasks.to_string(), ); if let Some(ssl_cert_path) = ssl_cert_path { export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); } if let Some(ssl_key_path) = ssl_key_path { export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path); } if let Some(ssl_auth_path) = ssl_auth_path { export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path); } if let Some(ssl_ocsp_path) = ssl_ocsp_path { export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path); } export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string()); export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string()); export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string()); export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir); if let Some(snapshot_interval) = schedule_snapshot_to_env(schedule_snapshot) { export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, snapshot_interval) } export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir); export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string()); export_to_env_if_not_present( MEILI_EXPERIMENTAL_ENABLE_METRICS, experimental_enable_metrics.to_string(), ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_HA_PARAMETERS, experimental_ha_parameters.to_string(), ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, experimental_enable_logs_route.to_string(), ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, experimental_reduce_indexing_memory_usage.to_string(), ); indexer_options.export_to_env(); } pub fn get_ssl_config(&self) -> anyhow::Result> { if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { let config = rustls::ServerConfig::builder().with_safe_defaults(); let config = match &self.ssl_auth_path { Some(auth_path) => { let roots = load_certs(auth_path.to_path_buf())?; let mut client_auth_roots = RootCertStore::empty(); for root in roots { client_auth_roots.add(&root).unwrap(); } if self.ssl_require_auth { let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots); config.with_client_cert_verifier(verifier) } else { let verifier = AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots); config.with_client_cert_verifier(verifier) } } None => config.with_no_client_auth(), }; let certs = load_certs(cert_path.to_path_buf())?; let privkey = load_private_key(key_path.to_path_buf())?; let ocsp = load_ocsp(&self.ssl_ocsp_path)?; let mut config = config .with_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![]) .map_err(|_| anyhow::anyhow!("bad certificates/private key"))?; config.key_log = Arc::new(rustls::KeyLogFile::new()); if self.ssl_resumption { config.session_storage = ServerSessionMemoryCache::new(256); } if self.ssl_tickets { config.ticketer = rustls::Ticketer::new().unwrap(); } Ok(Some(config)) } else { Ok(None) } } pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures { InstanceTogglableFeatures { metrics: self.experimental_enable_metrics, logs_route: self.experimental_enable_logs_route, } } } #[derive(Debug, Default, Clone, Parser, Deserialize)] pub struct IndexerOpts { /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch /// uses no more than two thirds of available memory. #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] #[serde(default)] pub max_indexing_memory: MaxMemory, /// Sets the maximum number of threads Meilisearch can use during indexation. By default, the /// indexer avoids using more than half of a machine's total processing units. This ensures /// Meilisearch is always ready to perform searches, even while you are updating an index. #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] #[serde(default)] pub max_indexing_threads: MaxThreads, /// Whether or not we want to determine the budget of virtual memory address space we have available dynamically /// (the default), or statically. /// /// Determining the budget of virtual memory address space dynamically takes some time on some systems (such as macOS) /// and may make tests non-deterministic, so we want to skip it in tests. #[clap(skip)] #[serde(skip)] pub skip_index_budget: bool, } impl IndexerOpts { /// Exports the values to their corresponding env vars if they are not set. pub fn export_to_env(self) { let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self; if let Some(max_indexing_memory) = max_indexing_memory.0 { export_to_env_if_not_present( MEILI_MAX_INDEXING_MEMORY, max_indexing_memory.to_string(), ); } export_to_env_if_not_present( MEILI_MAX_INDEXING_THREADS, max_indexing_threads.0.to_string(), ); } } impl TryFrom<&IndexerOpts> for IndexerConfig { type Error = anyhow::Error; fn try_from(other: &IndexerOpts) -> Result { let thread_pool = rayon::ThreadPoolBuilder::new() .thread_name(|index| format!("indexing-thread:{index}")) .num_threads(*other.max_indexing_threads) .build()?; Ok(Self { log_every_n: Some(DEFAULT_LOG_EVERY_N), max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), thread_pool: Some(thread_pool), max_positions_per_attributes: None, skip_index_budget: other.skip_index_budget, ..Default::default() }) } } /// A type used to detect the max memory available and use 2/3 of it. #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct MaxMemory(Option); impl FromStr for MaxMemory { type Err = ByteError; fn from_str(s: &str) -> Result { Byte::from_str(s).map(Some).map(MaxMemory) } } impl Default for MaxMemory { fn default() -> MaxMemory { MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes)) } } impl fmt::Display for MaxMemory { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.0 { Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), None => f.write_str("unknown"), } } } impl Deref for MaxMemory { type Target = Option; fn deref(&self) -> &Self::Target { &self.0 } } impl MaxMemory { pub fn unlimited() -> Self { Self(None) } } /// Returns the total amount of bytes available or `None` if this system isn't supported. fn total_memory_bytes() -> Option { if sysinfo::IS_SUPPORTED_SYSTEM { let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram()); let mut system = System::new_with_specifics(memory_kind); system.refresh_memory(); Some(system.total_memory()) } else { None } } #[derive(Debug, Clone, Copy, Deserialize, Serialize)] pub struct MaxThreads(usize); impl FromStr for MaxThreads { type Err = ParseIntError; fn from_str(s: &str) -> Result { usize::from_str(s).map(Self) } } impl Default for MaxThreads { fn default() -> Self { MaxThreads(num_cpus::get() / 2) } } impl fmt::Display for MaxThreads { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.0) } } impl Deref for MaxThreads { type Target = usize; fn deref(&self) -> &Self::Target { &self.0 } } fn load_certs(filename: PathBuf) -> anyhow::Result> { let certfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; let mut reader = BufReader::new(certfile); certs(&mut reader) .map(|certs| certs.into_iter().map(rustls::Certificate).collect()) .map_err(|_| anyhow::anyhow!("cannot read certificate file")) } fn load_private_key(filename: PathBuf) -> anyhow::Result { let rsa_keys = { let keyfile = fs::File::open(filename.clone()) .map_err(|_| anyhow::anyhow!("cannot open private key file"))?; let mut reader = BufReader::new(keyfile); rsa_private_keys(&mut reader) .map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))? }; let pkcs8_keys = { let keyfile = fs::File::open(filename) .map_err(|_| anyhow::anyhow!("cannot open private key file"))?; let mut reader = BufReader::new(keyfile); pkcs8_private_keys(&mut reader).map_err(|_| { anyhow::anyhow!( "file contains invalid pkcs8 private key (encrypted keys not supported)" ) })? }; // prefer to load pkcs8 keys if !pkcs8_keys.is_empty() { Ok(rustls::PrivateKey(pkcs8_keys[0].clone())) } else { assert!(!rsa_keys.is_empty()); Ok(rustls::PrivateKey(rsa_keys[0].clone())) } } fn load_ocsp(filename: &Option) -> anyhow::Result> { let mut ret = Vec::new(); if let Some(ref name) = filename { fs::File::open(name) .map_err(|_| anyhow::anyhow!("cannot open ocsp file"))? .read_to_end(&mut ret) .map_err(|_| anyhow::anyhow!("cannot read oscp file"))?; } Ok(ret) } /// Checks if the key is defined in the environment variables. /// If not, inserts it with the given value. pub fn export_to_env_if_not_present(key: &str, value: T) where T: AsRef, { if let Err(VarError::NotPresent) = std::env::var(key) { std::env::set_var(key, value); } } /// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute. fn default_db_path() -> PathBuf { PathBuf::from(DEFAULT_DB_PATH) } pub fn default_http_addr() -> String { DEFAULT_HTTP_ADDR.to_string() } fn default_env() -> String { DEFAULT_ENV.to_string() } fn default_max_index_size() -> Byte { Byte::from_bytes(INDEX_SIZE) } fn default_max_task_db_size() -> Byte { Byte::from_bytes(TASK_DB_SIZE) } fn default_http_payload_size_limit() -> Byte { Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap() } fn default_limit_batched_tasks() -> usize { usize::MAX } fn default_snapshot_dir() -> PathBuf { PathBuf::from(DEFAULT_SNAPSHOT_DIR) } fn default_snapshot_interval_sec() -> &'static str { DEFAULT_SNAPSHOT_INTERVAL_SEC_STR } fn default_dump_dir() -> PathBuf { PathBuf::from(DEFAULT_DUMP_DIR) } /// Indicates if a snapshot was scheduled, and if yes with which interval. #[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)] pub enum ScheduleSnapshot { /// Scheduled snapshots are disabled. #[default] Disabled, /// Snapshots are scheduled at the specified interval, in seconds. Enabled(u64), } impl Display for ScheduleSnapshot { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ScheduleSnapshot::Disabled => write!(f, ""), ScheduleSnapshot::Enabled(value) => write!(f, "{}", value), } } } impl FromStr for ScheduleSnapshot { type Err = ParseIntError; fn from_str(s: &str) -> Result { Ok(match s { "" => ScheduleSnapshot::Disabled, s => ScheduleSnapshot::Enabled(s.parse()?), }) } } fn parse_schedule_snapshot(s: &str) -> Result { Ok(if s.is_empty() { ScheduleSnapshot::Disabled } else { ScheduleSnapshot::from_str(s)? }) } fn schedule_snapshot_to_env(schedule_snapshot: ScheduleSnapshot) -> Option { match schedule_snapshot { ScheduleSnapshot::Enabled(snapshot_delay) => Some(snapshot_delay.to_string()), _ => None, } } fn schedule_snapshot_deserialize<'de, D>(deserializer: D) -> Result where D: serde::Deserializer<'de>, { struct BoolOrInt; impl<'de> serde::de::Visitor<'de> for BoolOrInt { type Value = ScheduleSnapshot; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("integer or boolean") } fn visit_bool(self, value: bool) -> Result where E: serde::de::Error, { Ok(if value { ScheduleSnapshot::Enabled(DEFAULT_SNAPSHOT_INTERVAL_SEC) } else { ScheduleSnapshot::Disabled }) } fn visit_i64(self, v: i64) -> Result where E: serde::de::Error, { Ok(ScheduleSnapshot::Enabled(v as u64)) } fn visit_u64(self, v: u64) -> Result where E: serde::de::Error, { Ok(ScheduleSnapshot::Enabled(v)) } fn visit_none(self) -> Result where E: serde::de::Error, { Ok(ScheduleSnapshot::Disabled) } fn visit_unit(self) -> Result where E: serde::de::Error, { Ok(ScheduleSnapshot::Disabled) } } deserializer.deserialize_any(BoolOrInt) } #[cfg(test)] mod test { use super::*; #[test] fn test_valid_opt() { assert!(Opt::try_parse_from(Some("")).is_ok()); } #[test] #[ignore] fn test_meilli_config_file_path_valid() { temp_env::with_vars( vec![("MEILI_CONFIG_FILE_PATH", Some("../config.toml"))], // Relative path in meilisearch package || { assert!(Opt::try_build().is_ok()); }, ); } #[test] #[ignore] fn test_meilli_config_file_path_invalid() { temp_env::with_vars(vec![("MEILI_CONFIG_FILE_PATH", Some("../configgg.toml"))], || { let possible_error_messages = [ "unable to open or read the \"../configgg.toml\" configuration file: No such file or directory (os error 2).", "unable to open or read the \"../configgg.toml\" configuration file: The system cannot find the file specified. (os error 2).", // Windows ]; let error_message = Opt::try_build().unwrap_err().to_string(); assert!( possible_error_messages.contains(&error_message.as_str()), "Expected onf of {:?}, got {:?}.", possible_error_messages, error_message ); }); } }