diff --git a/.github/scripts/is-latest-release.sh b/.github/scripts/is-latest-release.sh index 81534a2f7..54f0a9d3a 100644 --- a/.github/scripts/is-latest-release.sh +++ b/.github/scripts/is-latest-release.sh @@ -85,7 +85,7 @@ get_latest() { latest="" current_tag="" for release_info in $releases; do - if [ $i -eq 0 ]; then # Cheking tag_name + if [ $i -eq 0 ]; then # Checking tag_name if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release current_tag=$release_info else diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml index 4d0425f14..4cb87684d 100644 --- a/.github/workflows/milestone-workflow.yml +++ b/.github/workflows/milestone-workflow.yml @@ -62,12 +62,12 @@ jobs: - uses: actions/checkout@v3 - name: Download the issue template run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE - - name: Replace all empty occurences in the templates + - name: Replace all empty occurrences in the templates run: | - # Replace all <> occurences + # Replace all <> occurrences sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE - # Replace all <> occurences + # Replace all <> occurrences milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE @@ -95,12 +95,12 @@ jobs: - uses: actions/checkout@v3 - name: Download the issue template run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE - - name: Replace all empty occurences in the templates + - name: Replace all empty occurrences in the templates run: | - # Replace all <> occurences + # Replace all <> occurrences sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE - # Replace all <> occurences + # Replace all <> occurrences milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE - name: Create the issue diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 449aec020..f2e119a6d 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -53,7 +53,7 @@ jobs: uses: docker/metadata-action@v4 with: images: getmeili/meilisearch - # The lastest and `vX.Y` tags are only pushed for the official Meilisearch releases + # The latest and `vX.Y` tags are only pushed for the official Meilisearch releases # See https://github.com/docker/metadata-action#latest-tag flavor: latest=false tags: | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 93d5a2136..8ac897e45 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -112,7 +112,7 @@ The full Meilisearch release process is described in [this guide](https://github ### Release assets For each release, the following assets are created: -- Binaries for differents platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release +- Binaries for different platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release - Binaries are pushed to HomeBrew and APT (not published for RC) - Docker tags are created/updated: - `vX.Y.Z` diff --git a/Cargo.lock b/Cargo.lock index 7a0802e3e..76296a764 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2087,6 +2087,7 @@ dependencies = [ "time 0.3.9", "tokio", "tokio-stream", + "toml", "urlencoding", "uuid", "vergen", diff --git a/config.toml b/config.toml new file mode 100644 index 000000000..8da71c70a --- /dev/null +++ b/config.toml @@ -0,0 +1,129 @@ +# This file shows the default configuration of Meilisearch. +# All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables + +db_path = "./data.ms" +# The destination where the database must be created. + +env = "development" # Possible values: [development, production] +# This environment variable must be set to `production` if you are running in production. +# More logs wiil be displayed if the server is running in development mode. Setting the master +# key is optional; hence no security on the updates routes. This +# is useful to debug when integrating the engine with another service. + +http_addr = "127.0.0.1:7700" +# The address on which the HTTP server will listen. + +# master_key = "MASTER_KEY" +# Sets the instance's master key, automatically protecting all routes except GET /health. + +# no_analytics = false +# Do not send analytics to Meilisearch. + +disable_auto_batching = false +# The engine will disable task auto-batching, and will sequencialy compute each task one by one. + + +### DUMP + +dumps_dir = "dumps/" +# Folder where dumps are created when the dump route is called. + +# import_dump = "./path/to/my/file.dump" +# Import a dump from the specified path, must be a `.dump` file. + +ignore_missing_dump = false +# If the dump doesn't exist, load or create the database specified by `db_path` instead. + +ignore_dump_if_db_exists = false +# Ignore the dump if a database already exists, and load that database instead. + +### + + +log_level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] +# Set the log level. + + +### INDEX + +max_index_size = "100 GiB" +# The maximum size, in bytes, of the main LMDB database directory. + +# max_indexing_memory = "2 GiB" +# The maximum amount of memory the indexer will use. +# +# In case the engine is unable to retrieve the available memory the engine will try to use +# the memory it needs but without real limit, this can lead to Out-Of-Memory issues and it +# is recommended to specify the amount of memory to use. +# +# /!\ The default value is system dependant /!\ + +# max_indexing_threads = 4 +# The maximum number of threads the indexer will use. If the number set is higher than the +# real number of cores available in the machine, it will use the maximum number of +# available cores. +# +# It defaults to half of the available threads. + +### + + +max_task_db_size = "100 GiB" +# The maximum size, in bytes, of the update LMDB database directory. + +http_payload_size_limit = "100 MB" +# The maximum size, in bytes, of accepted JSON payloads. + + +### SNAPSHOT + +schedule_snapshot = false +# Activate snapshot scheduling. + +snapshot_dir = "snapshots/" +# Defines the directory path where Meilisearch will create a snapshot each snapshot_interval_sec. + +snapshot_interval_sec = 86400 +# Defines time interval, in seconds, between each snapshot creation. + +# import_snapshot = "./path/to/my/snapshot" +# Defines the path of the snapshot file to import. This option will, by default, stop the +# process if a database already exists, or if no snapshot exists at the given path. If this +# option is not specified, no snapshot is imported. + +ignore_missing_snapshot = false +# The engine will ignore a missing snapshot and not return an error in such a case. + +ignore_snapshot_if_db_exists = false +# The engine will skip snapshot importation and not return an error in such a case. + +### + + +### SSL + +# ssl_auth_path = "./path/to/root" +# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE. + +# ssl_cert_path = "./path/to/CERTFILE" +# Read server certificates from CERTFILE. This should contain PEM-format certificates in +# the right order (the first certificate should certify KEYFILE, the last should be a root +# CA). + +# ssl_key_path = "./path/to/private-key" +# Read the private key from KEYFILE. This should be an RSA private key or PKCS8-encoded +# private key, in PEM format. + +# ssl_ocsp_path = "./path/to/OCSPFILE" +# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional. + +ssl_require_auth = false +# Send a fatal alert if the client does not complete client authentication. + +ssl_resumption = false +# SSL support session resumption. + +ssl_tickets = false +# SSL support tickets. + +### diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index baea8b578..bc57cbab2 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -76,6 +76,7 @@ thiserror = "1.0.30" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } tokio = { version = "1.17.0", features = ["full"] } tokio-stream = "0.1.8" +toml = "0.5.9" uuid = { version = "1.1.2", features = ["serde", "v4"] } walkdir = "2.3.2" prometheus = { version = "0.13.0", features = ["process"], optional = true } diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index f0dfd0fab..7b76cdd80 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -349,16 +349,16 @@ pub struct SearchAggregator { // sort sort_with_geo_point: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains sort_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one sort_total_number_of_criteria: usize, // filter filter_with_geo_radius: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains filter_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one filter_total_number_of_criteria: usize, used_syntax: HashMap, @@ -366,7 +366,7 @@ pub struct SearchAggregator { // The maximum number of terms in a q request max_terms_number: usize, - // everytime a search is done, we increment the counter linked to the used settings + // every time a search is done, we increment the counter linked to the used settings matching_strategy: HashMap, // pagination diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 9627aeef8..b6f92ae28 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,9 +1,9 @@ use std::env; +use std::path::PathBuf; use std::sync::Arc; use actix_web::http::KeepAlive; use actix_web::HttpServer; -use clap::Parser; use meilisearch_auth::AuthController; use meilisearch_http::analytics; use meilisearch_http::analytics::Analytics; @@ -29,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[actix_web::main] async fn main() -> anyhow::Result<()> { - let opt = Opt::parse(); + let (opt, config_read_from) = Opt::try_build()?; setup(&opt)?; @@ -58,7 +58,7 @@ async fn main() -> anyhow::Result<()> { #[cfg(any(debug_assertions, not(feature = "analytics")))] let (analytics, user) = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, &user); + print_launch_resume(&opt, &user, config_read_from); run_http(meilisearch, auth_controller, opt, analytics).await?; @@ -97,7 +97,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume(opt: &Opt, user: &str) { +pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { @@ -118,6 +118,12 @@ pub fn print_launch_resume(opt: &Opt, user: &str) { eprintln!("{}", ascii_name); + eprintln!( + "Config file path:\t{:?}", + config_read_from + .map(|config_file_path| config_file_path.display().to_string()) + .unwrap_or_else(|| "none".to_string()) + ); eprintln!("Database path:\t\t{:?}", opt.db_path); eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr); eprintln!("Environment:\t\t{:?}", opt.env); diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index bdfa283a6..ae12f0cc6 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -5,7 +5,10 @@ use std::sync::Arc; use byte_unit::Byte; use clap::Parser; -use meilisearch_lib::options::{IndexerOpts, SchedulerConfig}; +use meilisearch_lib::{ + export_to_env_if_not_present, + options::{IndexerOpts, SchedulerConfig}, +}; use rustls::{ server::{ AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, @@ -14,149 +17,208 @@ use rustls::{ RootCertStore, }; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; -#[derive(Debug, Clone, Parser, Serialize)] +const MEILI_DB_PATH: &str = "MEILI_DB_PATH"; +const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR"; +const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; +const MEILI_ENV: &str = "MEILI_ENV"; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; +const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE"; +const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE"; +const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; +const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH"; +const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH"; +const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH"; +const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH"; +const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH"; +const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION"; +const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS"; +const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT"; +const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT"; +const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS"; +const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR"; +const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT"; +const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC"; +const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP"; +const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; +const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; +const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR"; +const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +#[cfg(feature = "metrics")] +const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE"; + +const DEFAULT_DB_PATH: &str = "./data.ms"; +const DEFAULT_HTTP_ADDR: &str = "127.0.0.1:7700"; +const DEFAULT_ENV: &str = "development"; +const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB"; +const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB"; +const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB"; +const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/"; +const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400; +const DEFAULT_DUMPS_DIR: &str = "dumps/"; +const DEFAULT_LOG_LEVEL: &str = "INFO"; + +#[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[clap(version)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct Opt { /// The destination where the database must be created. - #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] + #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())] + #[serde(default = "default_db_path")] pub db_path: PathBuf, /// The address on which the http server will listen. - #[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")] + #[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())] + #[serde(default = "default_http_addr")] pub http_addr: String, - /// The master key allowing you to do everything on the server. - #[serde(skip)] - #[clap(long, env = "MEILI_MASTER_KEY")] + /// Sets the instance's master key, automatically protecting all routes except GET /health + #[serde(skip_serializing)] + #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, /// This environment variable must be set to `production` if you are running in production. - /// If the server is running in development mode more logs will be displayed, - /// and the master key can be avoided which implies that there is no security on the updates routes. - /// This is useful to debug when integrating the engine with another service. - #[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)] + /// More logs wiil be displayed if the server is running in development mode. Setting the master + /// key is optional; hence no security on the updates routes. This + /// is useful to debug when integrating the engine with another service + #[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)] + #[serde(default = "default_env")] pub env: String, /// Do not send analytics to Meili. #[cfg(all(not(debug_assertions), feature = "analytics"))] - #[serde(skip)] // we can't send true - #[clap(long, env = "MEILI_NO_ANALYTICS")] + #[serde(skip_serializing, default)] // we can't send true + #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, - /// The maximum size, in bytes, of the main lmdb database directory - #[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")] + /// The maximum size, in bytes, of the main LMDB database directory + #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())] + #[serde(default = "default_max_index_size")] pub max_index_size: Byte, - /// The maximum size, in bytes, of the update lmdb database directory - #[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")] + /// The maximum size, in bytes, of the update LMDB database directory + #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())] + #[serde(default = "default_max_task_db_size")] pub max_task_db_size: Byte, /// The maximum size, in bytes, of accepted JSON payloads - #[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")] + #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())] + #[serde(default = "default_http_payload_size_limit")] pub http_payload_size_limit: Byte, /// Read server certificates from CERTFILE. /// This should contain PEM-format certificates /// in the right order (the first certificate should /// certify KEYFILE, the last should be a root CA). - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))] pub ssl_cert_path: Option, - /// Read private key from KEYFILE. This should be a RSA + /// Read the private key from KEYFILE. This should be an RSA /// private key or PKCS8-encoded private key, in PEM format. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))] pub ssl_key_path: Option, /// Enable client authentication, and accept certificates /// signed by those roots provided in CERTFILE. - #[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))] - #[serde(skip)] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))] pub ssl_auth_path: Option, /// Read DER-encoded OCSP response from OCSPFILE and staple to certificate. /// Optional - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_OCSP_PATH, parse(from_os_str))] pub ssl_ocsp_path: Option, /// Send a fatal alert if the client does not complete client authentication. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")] + #[serde(skip_serializing, default)] + #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] pub ssl_require_auth: bool, /// SSL support session resumption - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_RESUMPTION")] + #[serde(skip_serializing, default)] + #[clap(long, env = MEILI_SSL_RESUMPTION)] pub ssl_resumption: bool, /// SSL support tickets. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_TICKETS")] + #[serde(skip_serializing, default)] + #[clap(long, env = MEILI_SSL_TICKETS)] pub ssl_tickets: bool, /// Defines the path of the snapshot file to import. - /// This option will, by default, stop the process if a database already exist or if no snapshot exists at - /// the given path. If this option is not specified no snapshot is imported. - #[clap(long, env = "MEILI_IMPORT_SNAPSHOT")] + /// This option will, by default, stop the process if a database already exists, or if no snapshot exists at + /// the given path. If this option is not specified, no snapshot is imported. + #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, - /// The engine will ignore a missing snapshot and not return an error in such case. + /// The engine will ignore a missing snapshot and not return an error in such a case. #[clap( long, - env = "MEILI_IGNORE_MISSING_SNAPSHOT", + env = MEILI_IGNORE_MISSING_SNAPSHOT, requires = "import-snapshot" )] + #[serde(default)] pub ignore_missing_snapshot: bool, /// The engine will skip snapshot importation and not return an error in such case. #[clap( long, - env = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS", + env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS, requires = "import-snapshot" )] + #[serde(default)] pub ignore_snapshot_if_db_exists: bool, - /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. - #[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")] + /// Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec. + #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())] + #[serde(default = "default_snapshot_dir")] pub snapshot_dir: PathBuf, /// Activate snapshot scheduling. - #[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")] + #[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)] + #[serde(default)] pub schedule_snapshot: bool, /// Defines time interval, in seconds, between each snapshot creation. - #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h + #[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())] + #[serde(default = "default_snapshot_interval_sec")] + // 24h pub snapshot_interval_sec: u64, /// Import a dump from the specified path, must be a `.dump` file. - #[clap(long, env = "MEILI_IMPORT_DUMP", conflicts_with = "import-snapshot")] + #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")] pub import_dump: Option, - /// If the dump doesn't exists, load or create the database specified by `db-path` instead. - #[clap(long, env = "MEILI_IGNORE_MISSING_DUMP", requires = "import-dump")] + /// If the dump doesn't exist, load or create the database specified by `db-path` instead. + #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")] + #[serde(default)] pub ignore_missing_dump: bool, /// Ignore the dump if a database already exists, and load that database instead. - #[clap(long, env = "MEILI_IGNORE_DUMP_IF_DB_EXISTS", requires = "import-dump")] + #[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import-dump")] + #[serde(default)] pub ignore_dump_if_db_exists: bool, /// Folder where dumps are created when the dump route is called. - #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] + #[clap(long, env = MEILI_DUMPS_DIR, default_value_os_t = default_dumps_dir())] + #[serde(default = "default_dumps_dir")] pub dumps_dir: PathBuf, - /// Set the log level - #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] + /// Set the log level. # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] + #[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())] + #[serde(default = "default_log_level")] pub log_level: String, /// Enables Prometheus metrics and /metrics route. #[cfg(feature = "metrics")] - #[clap(long, env = "MEILI_ENABLE_METRICS_ROUTE")] + #[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)] + #[serde(default)] pub enable_metrics_route: bool, #[serde(flatten)] @@ -166,15 +228,139 @@ pub struct Opt { #[serde(flatten)] #[clap(flatten)] pub scheduler_options: SchedulerConfig, + + /// The path to a configuration file that should be used to setup the engine. + /// Format must be TOML. + #[serde(skip_serializing)] + #[clap(long)] + pub config_file_path: Option, } impl Opt { - /// Wether analytics should be enabled or not. + /// Whether analytics should be enabled or not. #[cfg(all(not(debug_assertions), feature = "analytics"))] pub fn analytics(&self) -> bool { !self.no_analytics } + /// Build a new Opt from config file, env vars and cli args. + pub fn try_build() -> anyhow::Result<(Self, Option)> { + // Parse the args to get the config_file_path. + let mut opts = Opt::parse(); + let mut config_read_from = None; + if let Some(config_file_path) = opts + .config_file_path + .clone() + .or_else(|| Some(PathBuf::from("./config.toml"))) + { + match std::fs::read(&config_file_path) { + Ok(config) => { + // If the file is successfully read, we deserialize it with `toml`. + let opt_from_config = toml::from_slice::(&config)?; + // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. + opt_from_config.export_to_env(); + // Once injected we parse the cli args once again to take the new env vars into scope. + opts = Opt::parse(); + config_read_from = Some(config_file_path); + } + // If we have an error while reading the file defined by the user. + Err(_) if opts.config_file_path.is_some() => anyhow::bail!( + "unable to open or read the {:?} configuration file.", + opts.config_file_path.unwrap().display().to_string() + ), + _ => (), + } + } + + Ok((opts, config_read_from)) + } + + /// Exports the opts values to their corresponding env vars if they are not set. + fn export_to_env(self) { + let Opt { + db_path, + http_addr, + master_key, + env, + max_index_size, + max_task_db_size, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + snapshot_dir, + schedule_snapshot, + snapshot_interval_sec, + dumps_dir, + log_level, + indexer_options, + scheduler_options, + import_snapshot: _, + ignore_missing_snapshot: _, + ignore_snapshot_if_db_exists: _, + import_dump: _, + ignore_missing_dump: _, + ignore_dump_if_db_exists: _, + config_file_path: _, + #[cfg(all(not(debug_assertions), feature = "analytics"))] + no_analytics, + #[cfg(feature = "metrics")] + enable_metrics_route, + } = self; + export_to_env_if_not_present(MEILI_DB_PATH, db_path); + export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); + if let Some(master_key) = master_key { + export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); + } + export_to_env_if_not_present(MEILI_ENV, env); + #[cfg(all(not(debug_assertions), feature = "analytics"))] + { + export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); + } + export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string()); + export_to_env_if_not_present( + MEILI_HTTP_PAYLOAD_SIZE_LIMIT, + http_payload_size_limit.to_string(), + ); + if let Some(ssl_cert_path) = ssl_cert_path { + export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); + } + if let Some(ssl_key_path) = ssl_key_path { + export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path); + } + if let Some(ssl_auth_path) = ssl_auth_path { + export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path); + } + if let Some(ssl_ocsp_path) = ssl_ocsp_path { + export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path); + } + export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string()); + export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string()); + export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string()); + export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir); + export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string()); + export_to_env_if_not_present( + MEILI_SNAPSHOT_INTERVAL_SEC, + snapshot_interval_sec.to_string(), + ); + export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir); + export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level); + #[cfg(feature = "metrics")] + { + export_to_env_if_not_present( + MEILI_ENABLE_METRICS_ROUTE, + enable_metrics_route.to_string(), + ); + } + indexer_options.export_to_env(); + scheduler_options.export_to_env(); + } + pub fn get_ssl_config(&self) -> anyhow::Result> { if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { let config = rustls::ServerConfig::builder().with_safe_defaults(); @@ -273,6 +459,48 @@ fn load_ocsp(filename: &Option) -> anyhow::Result> { Ok(ret) } +/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute. + +fn default_db_path() -> PathBuf { + PathBuf::from(DEFAULT_DB_PATH) +} + +fn default_http_addr() -> String { + DEFAULT_HTTP_ADDR.to_string() +} + +fn default_env() -> String { + DEFAULT_ENV.to_string() +} + +fn default_max_index_size() -> Byte { + Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap() +} + +fn default_max_task_db_size() -> Byte { + Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap() +} + +fn default_http_payload_size_limit() -> Byte { + Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap() +} + +fn default_snapshot_dir() -> PathBuf { + PathBuf::from(DEFAULT_SNAPSHOT_DIR) +} + +fn default_snapshot_interval_sec() -> u64 { + DEFAULT_SNAPSHOT_INTERVAL_SEC +} + +fn default_dumps_dir() -> PathBuf { + PathBuf::from(DEFAULT_DUMPS_DIR) +} + +fn default_log_level() -> String { + DEFAULT_LOG_LEVEL.to_string() +} + #[cfg(test)] mod test { use super::*; diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index fe23720aa..786d318f8 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -147,7 +147,7 @@ enum TaskDetails { IndexInfo { primary_key: Option }, #[serde(rename_all = "camelCase")] DocumentDeletion { - received_document_ids: usize, + matched_documents: usize, deleted_documents: Option, }, #[serde(rename_all = "camelCase")] @@ -255,7 +255,7 @@ impl From for TaskView { } => ( TaskType::DocumentDeletion, Some(TaskDetails::DocumentDeletion { - received_document_ids: ids.len(), + matched_documents: ids.len(), deleted_documents: None, }), ), diff --git a/meilisearch-http/tests/dashboard/mod.rs b/meilisearch-http/tests/dashboard/mod.rs index d097cfd4b..2699cd16f 100644 --- a/meilisearch-http/tests/dashboard/mod.rs +++ b/meilisearch-http/tests/dashboard/mod.rs @@ -1,5 +1,6 @@ use crate::common::Server; +#[cfg(feature = "mini-dashboard")] #[actix_rt::test] async fn dashboard_assets_load() { let server = Server::new().await; diff --git a/meilisearch-lib/src/dump/compat/v2.rs b/meilisearch-lib/src/dump/compat/v2.rs index 364d894c4..ba3b8e3a6 100644 --- a/meilisearch-lib/src/dump/compat/v2.rs +++ b/meilisearch-lib/src/dump/compat/v2.rs @@ -145,7 +145,7 @@ pub fn error_code_from_str(s: &str) -> anyhow::Result { "unsupported_media_type" => Code::UnsupportedMediaType, "dump_already_in_progress" => Code::DumpAlreadyInProgress, "dump_process_failed" => Code::DumpProcessFailed, - _ => bail!("unknow error code."), + _ => bail!("unknown error code."), }; Ok(code) diff --git a/meilisearch-lib/src/dump/loaders/v4.rs b/meilisearch-lib/src/dump/loaders/v4.rs index 0744df7ea..44ec23517 100644 --- a/meilisearch-lib/src/dump/loaders/v4.rs +++ b/meilisearch-lib/src/dump/loaders/v4.rs @@ -57,10 +57,10 @@ fn patch_updates(src: impl AsRef, dst: impl AsRef) -> anyhow::Result let updates_path = src.as_ref().join("updates/data.jsonl"); let output_updates_path = dst.as_ref().join("updates/data.jsonl"); create_dir_all(output_updates_path.parent().unwrap())?; - let udpates_file = File::open(updates_path)?; + let updates_file = File::open(updates_path)?; let mut output_update_file = File::create(output_updates_path)?; - serde_json::Deserializer::from_reader(udpates_file) + serde_json::Deserializer::from_reader(updates_file) .into_iter::() .try_for_each(|task| -> anyhow::Result<()> { let task: Task = task?.into(); diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index 57171d529..1a9aa1d0d 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -27,7 +27,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); -/// The maximimum number of results that the engine +/// The maximum number of results that the engine /// will be able to return in one search call. pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; diff --git a/meilisearch-lib/src/index_resolver/index_store.rs b/meilisearch-lib/src/index_resolver/index_store.rs index e4f58f130..ea3c7125a 100644 --- a/meilisearch-lib/src/index_resolver/index_store.rs +++ b/meilisearch-lib/src/index_resolver/index_store.rs @@ -51,7 +51,7 @@ impl MapIndexStore { #[async_trait::async_trait] impl IndexStore for MapIndexStore { async fn create(&self, uuid: Uuid) -> Result { - // We need to keep the lock until we are sure the db file has been opened correclty, to + // We need to keep the lock until we are sure the db file has been opened correctly, to // ensure that another db is not created at the same time. let mut lock = self.index_store.write().await; diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 70fd2ba51..7fe0984dc 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -11,6 +11,8 @@ mod snapshot; pub mod tasks; mod update_file_store; +use std::env::VarError; +use std::ffi::OsStr; use std::path::Path; pub use index_controller::MeiliSearch; @@ -35,3 +37,14 @@ pub fn is_empty_db(db_path: impl AsRef) -> bool { true } } + +/// Checks if the key is defined in the environment variables. +/// If not, inserts it with the given value. +pub fn export_to_env_if_not_present(key: &str, value: T) +where + T: AsRef, +{ + if let Err(VarError::NotPresent) = std::env::var(key) { + std::env::set_var(key, value); + } +} diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index ea810b9b7..bd406fbdd 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -1,33 +1,40 @@ +use crate::export_to_env_if_not_present; + use core::fmt; use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; use byte_unit::{Byte, ByteError}; use clap::Parser; use milli::update::IndexerConfig; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use sysinfo::{RefreshKind, System, SystemExt}; -#[derive(Debug, Clone, Parser, Serialize)] +const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; +const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; +const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; +const DEFAULT_LOG_EVERY_N: usize = 100000; + +#[derive(Debug, Clone, Parser, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. - #[serde(skip)] - #[clap(long, default_value = "100000", hide = true)] // 100k + #[serde(skip_serializing, default = "default_log_every_n")] + #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k pub log_every_n: usize, /// Grenad max number of chunks in bytes. - #[serde(skip)] + #[serde(skip_serializing)] #[clap(long, hide = true)] pub max_nb_chunks: Option, - /// The maximum amount of memory the indexer will use. It defaults to 2/3 - /// of the available memory. It is recommended to use something like 80%-90% - /// of the available memory, no more. + /// The maximum amount of memory the indexer will use. /// /// In case the engine is unable to retrieve the available memory the engine will /// try to use the memory it needs but without real limit, this can lead to /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)] + #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] + #[serde(default)] pub max_indexing_memory: MaxMemory, /// The maximum number of threads the indexer will use. @@ -35,18 +42,43 @@ pub struct IndexerOpts { /// it will use the maximum number of available cores. /// /// It defaults to half of the available threads. - #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] + #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] + #[serde(default)] pub max_indexing_threads: MaxThreads, } -#[derive(Debug, Clone, Parser, Default, Serialize)] +#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct SchedulerConfig { /// The engine will disable task auto-batching, /// and will sequencialy compute each task one by one. - #[clap(long, env = "DISABLE_AUTO_BATCHING")] + #[clap(long, env = DISABLE_AUTO_BATCHING)] + #[serde(default)] pub disable_auto_batching: bool, } +impl IndexerOpts { + /// Exports the values to their corresponding env vars if they are not set. + pub fn export_to_env(self) { + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + log_every_n: _, + max_nb_chunks: _, + } = self; + if let Some(max_indexing_memory) = max_indexing_memory.0 { + export_to_env_if_not_present( + MEILI_MAX_INDEXING_MEMORY, + max_indexing_memory.to_string(), + ); + } + export_to_env_if_not_present( + MEILI_MAX_INDEXING_THREADS, + max_indexing_threads.0.to_string(), + ); + } +} + impl TryFrom<&IndexerOpts> for IndexerConfig { type Error = anyhow::Error; @@ -77,8 +109,17 @@ impl Default for IndexerOpts { } } +impl SchedulerConfig { + pub fn export_to_env(self) { + let SchedulerConfig { + disable_auto_batching, + } = self; + export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); + } +} + /// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy, Serialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct MaxMemory(Option); impl FromStr for MaxMemory { @@ -134,7 +175,7 @@ fn total_memory_bytes() -> Option { } } -#[derive(Debug, Clone, Copy, Serialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct MaxThreads(usize); impl FromStr for MaxThreads { @@ -164,3 +205,7 @@ impl Deref for MaxThreads { &self.0 } } + +fn default_log_every_n() -> usize { + DEFAULT_LOG_EVERY_N +} diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs index 24d0d3a65..32b20aeb8 100644 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ b/meilisearch-lib/src/tasks/task_store/store.rs @@ -63,7 +63,7 @@ impl Store { /// Returns the id for the next task. /// /// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit - /// the task to the store in the same transaction, no one else will hav this task id. + /// the task to the store in the same transaction, no one else will have this task id. pub fn next_task_id(&self, txn: &mut RwTxn) -> Result { let id = self .tasks