Add support for config file

This commit is contained in:
mlemesle 2022-09-06 09:23:16 +02:00
parent 3a62e97c9f
commit 403226a029
6 changed files with 196 additions and 46 deletions

1
Cargo.lock generated
View File

@ -2087,6 +2087,7 @@ dependencies = [
"time 0.3.9",
"tokio",
"tokio-stream",
"toml",
"urlencoding",
"uuid",
"vergen",

View File

@ -76,6 +76,7 @@ thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = { version = "1.17.0", features = ["full"] }
tokio-stream = "0.1.8"
toml = "0.5.9"
uuid = { version = "1.1.2", features = ["serde", "v4"] }
walkdir = "2.3.2"
prometheus = { version = "0.13.0", features = ["process"], optional = true }

View File

@ -3,7 +3,6 @@ use std::sync::Arc;
use actix_web::http::KeepAlive;
use actix_web::HttpServer;
use clap::Parser;
use meilisearch_auth::AuthController;
use meilisearch_http::analytics;
use meilisearch_http::analytics::Analytics;
@ -29,7 +28,9 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::parse();
let opt = Opt::build();
println!("{:?}", opt);
setup(&opt)?;

View File

@ -4,8 +4,11 @@ use std::path::PathBuf;
use std::sync::Arc;
use byte_unit::Byte;
use clap::Parser;
use meilisearch_lib::options::{IndexerOpts, SchedulerConfig};
use clap::{Arg, Command, Parser};
use meilisearch_lib::{
export_to_env_if_not_present,
options::{IndexerOpts, SchedulerConfig},
};
use rustls::{
server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,
@ -14,90 +17,114 @@ use rustls::{
RootCertStore,
};
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::Serialize;
use serde::{Deserialize, Serialize};
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, Parser, Serialize)]
const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV";
#[cfg(all(not(debug_assertions), feature = "analytics"))]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE";
const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH";
const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH";
const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH";
const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH";
const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION";
const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS";
const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR";
const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT";
const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC";
const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
#[cfg(feature = "metrics")]
const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE";
#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
#[clap(version)]
pub struct Opt {
/// The destination where the database must be created.
#[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
#[clap(long, env = MEILI_DB_PATH, default_value = "./data.ms")]
pub db_path: PathBuf,
/// The address on which the http server will listen.
#[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
#[clap(long, env = MEILI_HTTP_ADDR, default_value = "127.0.0.1:7700")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
#[serde(skip)]
#[clap(long, env = "MEILI_MASTER_KEY")]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_MASTER_KEY)]
pub master_key: Option<String>,
/// This environment variable must be set to `production` if you are running in production.
/// If the server is running in development mode more logs will be displayed,
/// and the master key can be avoided which implies that there is no security on the updates routes.
/// This is useful to debug when integrating the engine with another service.
#[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
#[clap(long, env = MEILI_ENV, default_value = "development", possible_values = &POSSIBLE_ENV)]
pub env: String,
/// Do not send analytics to Meili.
#[cfg(all(not(debug_assertions), feature = "analytics"))]
#[serde(skip)] // we can't send true
#[clap(long, env = "MEILI_NO_ANALYTICS")]
#[serde(skip_serializing)] // we can't send true
#[clap(long, env = MEILI_NO_ANALYTICS)]
pub no_analytics: bool,
/// The maximum size, in bytes, of the main lmdb database directory
#[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")]
#[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value = "100 GiB")]
pub max_index_size: Byte,
/// The maximum size, in bytes, of the update lmdb database directory
#[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")]
#[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value = "100 GiB")]
pub max_task_db_size: Byte,
/// The maximum size, in bytes, of accepted JSON payloads
#[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")]
#[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value = "100 MB")]
pub http_payload_size_limit: Byte,
/// Read server certificates from CERTFILE.
/// This should contain PEM-format certificates
/// in the right order (the first certificate should
/// certify KEYFILE, the last should be a root CA).
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))]
pub ssl_cert_path: Option<PathBuf>,
/// Read private key from KEYFILE. This should be a RSA
/// private key or PKCS8-encoded private key, in PEM format.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))]
pub ssl_key_path: Option<PathBuf>,
/// Enable client authentication, and accept certificates
/// signed by those roots provided in CERTFILE.
#[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
#[serde(skip)]
#[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))]
#[serde(skip_serializing)]
pub ssl_auth_path: Option<PathBuf>,
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
/// Optional
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_OCSP_PATH, parse(from_os_str))]
pub ssl_ocsp_path: Option<PathBuf>,
/// Send a fatal alert if the client does not complete client authentication.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_REQUIRE_AUTH)]
pub ssl_require_auth: bool,
/// SSL support session resumption
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_RESUMPTION")]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_RESUMPTION)]
pub ssl_resumption: bool,
/// SSL support tickets.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_TICKETS")]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_TICKETS)]
pub ssl_tickets: bool,
/// Defines the path of the snapshot file to import.
@ -123,15 +150,15 @@ pub struct Opt {
pub ignore_snapshot_if_db_exists: bool,
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
#[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
#[clap(long, env = MEILI_SNAPSHOT_DIR, default_value = "snapshots/")]
pub snapshot_dir: PathBuf,
/// Activate snapshot scheduling.
#[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
#[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)]
pub schedule_snapshot: bool,
/// Defines time interval, in seconds, between each snapshot creation.
#[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
#[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value = "86400")] // 24h
pub snapshot_interval_sec: u64,
/// Import a dump from the specified path, must be a `.dump` file.
@ -147,16 +174,16 @@ pub struct Opt {
pub ignore_dump_if_db_exists: bool,
/// Folder where dumps are created when the dump route is called.
#[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
#[clap(long, env = MEILI_DUMPS_DIR, default_value = "dumps/")]
pub dumps_dir: PathBuf,
/// Set the log level
#[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
#[clap(long, env = MEILI_LOG_LEVEL, default_value = "info")]
pub log_level: String,
/// Enables Prometheus metrics and /metrics route.
#[cfg(feature = "metrics")]
#[clap(long, env = "MEILI_ENABLE_METRICS_ROUTE")]
#[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)]
pub enable_metrics_route: bool,
#[serde(flatten)]
@ -175,6 +202,83 @@ impl Opt {
!self.no_analytics
}
pub fn build() -> Self {
let args = Command::new("config")
.arg(
Arg::new("config_file_path")
.long("config-file-path")
.takes_value(true)
.default_value("./config.toml")
.help("Path to a config file, must be TOML format"),
)
.get_matches();
let config_file_path = args
.value_of("config_file_path")
.expect("default value present");
if let Some(Ok(opts_from_file)) = match std::fs::read_to_string(config_file_path) {
Ok(config_str) => Some(toml::from_str::<Opt>(&config_str)),
Err(err) => {
log::debug!("can't read {} : {}", config_file_path, err);
None
}
} {
opts_from_file.export_to_env();
}
Opt::parse()
}
fn export_to_env(self) {
export_to_env_if_not_present(MEILI_DB_PATH, self.db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, self.http_addr);
if let Some(master_key) = self.master_key {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, self.env);
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, self.no_analytics);
}
export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, self.max_index_size.to_string());
export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, self.max_task_db_size.to_string());
export_to_env_if_not_present(
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
self.http_payload_size_limit.to_string(),
);
if let Some(ssl_cert_path) = self.ssl_cert_path {
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
}
if let Some(ssl_key_path) = self.ssl_key_path {
export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path);
}
if let Some(ssl_auth_path) = self.ssl_auth_path {
export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path);
}
if let Some(ssl_ocsp_path) = self.ssl_ocsp_path {
export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path);
}
export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, self.ssl_require_auth.to_string());
export_to_env_if_not_present(MEILI_SSL_RESUMPTION, self.ssl_resumption.to_string());
export_to_env_if_not_present(MEILI_SSL_TICKETS, self.ssl_tickets.to_string());
export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, self.snapshot_dir);
export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, self.schedule_snapshot.to_string());
export_to_env_if_not_present(
MEILI_SNAPSHOT_INTERVAL_SEC,
self.snapshot_interval_sec.to_string(),
);
export_to_env_if_not_present(MEILI_DUMPS_DIR, self.dumps_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, self.log_level);
#[cfg(feature = "metrics")]
{
export_to_env_if_not_present(
MEILI_ENABLE_METRICS_ROUTE,
self.enable_metrics_route.to_string(),
);
}
self.indexer_options.export_to_env();
self.scheduler_options.export_to_env();
}
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let config = rustls::ServerConfig::builder().with_safe_defaults();

View File

@ -11,6 +11,8 @@ mod snapshot;
pub mod tasks;
mod update_file_store;
use std::env::VarError;
use std::ffi::OsStr;
use std::path::Path;
pub use index_controller::MeiliSearch;
@ -35,3 +37,14 @@ pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
true
}
}
/// Checks if the key is defined in the environment variables.
/// If not, inserts it with the given value.
pub fn export_to_env_if_not_present<T>(key: &str, value: T)
where
T: AsRef<OsStr>,
{
if let Err(VarError::NotPresent) = std::env::var(key) {
std::env::set_var(key, value);
}
}

View File

@ -1,22 +1,28 @@
use crate::export_to_env_if_not_present;
use core::fmt;
use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use milli::update::IndexerConfig;
use serde::Serialize;
use serde::{Deserialize, Serialize};
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, Parser, Serialize)]
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[serde(skip)]
#[serde(skip_serializing)]
#[clap(long, default_value = "100000", hide = true)] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[serde(skip)]
#[serde(skip_serializing)]
#[clap(long, hide = true)]
pub max_nb_chunks: Option<usize>,
@ -27,7 +33,7 @@ pub struct IndexerOpts {
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)]
#[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
pub max_indexing_memory: MaxMemory,
/// The maximum number of threads the indexer will use.
@ -35,18 +41,33 @@ pub struct IndexerOpts {
/// it will use the maximum number of available cores.
///
/// It defaults to half of the available threads.
#[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)]
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
pub max_indexing_threads: MaxThreads,
}
#[derive(Debug, Clone, Parser, Default, Serialize)]
#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)]
pub struct SchedulerConfig {
/// The engine will disable task auto-batching,
/// and will sequencialy compute each task one by one.
#[clap(long, env = "DISABLE_AUTO_BATCHING")]
#[clap(long, env = DISABLE_AUTO_BATCHING)]
pub disable_auto_batching: bool,
}
impl IndexerOpts {
pub fn export_to_env(self) {
if let Some(max_indexing_memory) = self.max_indexing_memory.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_MEMORY,
max_indexing_memory.to_string(),
);
}
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
self.max_indexing_threads.0.to_string(),
);
}
}
impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
@ -77,8 +98,17 @@ impl Default for IndexerOpts {
}
}
impl SchedulerConfig {
pub fn export_to_env(self) {
export_to_env_if_not_present(
DISABLE_AUTO_BATCHING,
self.disable_auto_batching.to_string(),
);
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy, Serialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
@ -134,7 +164,7 @@ fn total_memory_bytes() -> Option<u64> {
}
}
#[derive(Debug, Clone, Copy, Serialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct MaxThreads(usize);
impl FromStr for MaxThreads {