MeiliSearch/meilisearch-http/src/option.rs

299 lines
12 KiB
Rust
Raw Normal View History

2020-12-12 13:32:06 +01:00
use std::io::{BufReader, Read};
use std::path::PathBuf;
use std::sync::Arc;
2021-03-15 18:11:10 +01:00
use std::{error, fs};
2020-12-12 13:32:06 +01:00
2020-12-22 14:02:41 +01:00
use byte_unit::Byte;
2021-03-15 18:11:10 +01:00
use grenad::CompressionType;
2020-12-12 13:32:06 +01:00
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
RootCertStore,
};
use structopt::StructOpt;
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
2021-02-04 13:21:15 +01:00
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
2021-02-04 13:21:15 +01:00
/// The maximum amount of memory to use for the Grenad buffer. It is recommended
/// to use something like 80%-90% of the available memory.
///
/// It is automatically split by the number of jobs e.g. if you use 7 jobs
/// and 7 GB of max memory, each thread will use a maximum of 1 GB.
#[structopt(long, default_value = "7 GiB")]
pub max_memory: Byte,
/// Size of the linked hash map cache when indexing.
/// The bigger it is, the faster the indexing is but the more memory it takes.
#[structopt(long, default_value = "500")]
pub linked_hash_map_size: usize,
/// The name of the compression algorithm to use when compressing intermediate
2021-02-04 13:21:15 +01:00
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// The number of bytes to remove from the begining of the chunks while reading/sorting
/// or merging them.
///
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
#[structopt(long, default_value = "4 GiB")]
pub chunk_fusing_shrink_size: Byte,
2021-02-04 13:21:15 +01:00
/// Enable the chunk fusing or not, this reduces the amount of disk space used.
#[structopt(long)]
pub enable_chunk_fusing: bool,
2020-12-22 17:13:50 +01:00
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
2021-01-29 19:14:23 +01:00
impl Default for IndexerOpts {
fn default() -> Self {
Self {
2021-02-04 13:21:15 +01:00
log_every_n: 100_000,
2021-01-29 19:14:23 +01:00
max_nb_chunks: None,
2021-02-04 13:21:15 +01:00
max_memory: Byte::from_str("1GiB").unwrap(),
linked_hash_map_size: 500,
2021-01-29 19:14:23 +01:00
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
2021-02-04 13:21:15 +01:00
chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(),
2021-01-29 19:14:23 +01:00
enable_chunk_fusing: false,
indexing_jobs: None,
}
}
}
2020-12-12 13:32:06 +01:00
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
2020-12-22 14:02:41 +01:00
#[derive(Debug, Clone, StructOpt)]
2020-12-12 13:32:06 +01:00
pub struct Opt {
/// The destination where the database must be created.
#[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
2020-12-22 17:13:50 +01:00
pub db_path: PathBuf,
2020-12-12 13:32:06 +01:00
/// The address on which the http server will listen.
#[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
#[structopt(long, env = "MEILI_MASTER_KEY")]
pub master_key: Option<String>,
/// The Sentry DSN to use for error reporting. This defaults to the MeiliSearch Sentry project.
/// You can disable sentry all together using the `--no-sentry` flag or `MEILI_NO_SENTRY` environment variable.
#[cfg(all(not(debug_assertions), feature = "sentry"))]
2021-03-15 18:11:10 +01:00
#[structopt(
long,
env = "SENTRY_DSN",
default_value = "https://5ddfa22b95f241198be2271aaf028653@sentry.io/3060337"
)]
2020-12-12 13:32:06 +01:00
pub sentry_dsn: String,
/// Disable Sentry error reporting.
#[structopt(long, env = "MEILI_NO_SENTRY")]
2021-02-23 14:13:43 +01:00
#[cfg(all(not(debug_assertions), feature = "sentry"))]
2020-12-12 13:32:06 +01:00
pub no_sentry: bool,
/// This environment variable must be set to `production` if you are running in production.
/// If the server is running in development mode more logs will be displayed,
/// and the master key can be avoided which implies that there is no security on the updates routes.
/// This is useful to debug when integrating the engine with another service.
#[structopt(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
pub env: String,
/// Do not send analytics to Meili.
#[structopt(long, env = "MEILI_NO_ANALYTICS")]
pub no_analytics: bool,
/// The maximum size, in bytes, of the main lmdb database directory
2020-12-22 14:02:41 +01:00
#[structopt(long, env = "MEILI_MAX_MDB_SIZE", default_value = "100 GiB")]
pub max_mdb_size: Byte,
2020-12-12 13:32:06 +01:00
/// The maximum size, in bytes, of the update lmdb database directory
2020-12-22 14:02:41 +01:00
#[structopt(long, env = "MEILI_MAX_UDB_SIZE", default_value = "10 GiB")]
pub max_udb_size: Byte,
2020-12-12 13:32:06 +01:00
/// The maximum size, in bytes, of accepted JSON payloads
2020-12-22 14:02:41 +01:00
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "10 MiB")]
pub http_payload_size_limit: Byte,
2020-12-12 13:32:06 +01:00
/// Read server certificates from CERTFILE.
/// This should contain PEM-format certificates
/// in the right order (the first certificate should
/// certify KEYFILE, the last should be a root CA).
#[structopt(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
pub ssl_cert_path: Option<PathBuf>,
/// Read private key from KEYFILE. This should be a RSA
/// private key or PKCS8-encoded private key, in PEM format.
#[structopt(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
pub ssl_key_path: Option<PathBuf>,
/// Enable client authentication, and accept certificates
/// signed by those roots provided in CERTFILE.
#[structopt(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
pub ssl_auth_path: Option<PathBuf>,
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
/// Optional
#[structopt(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
pub ssl_ocsp_path: Option<PathBuf>,
/// Send a fatal alert if the client does not complete client authentication.
#[structopt(long, env = "MEILI_SSL_REQUIRE_AUTH")]
pub ssl_require_auth: bool,
/// SSL support session resumption
#[structopt(long, env = "MEILI_SSL_RESUMPTION")]
pub ssl_resumption: bool,
/// SSL support tickets.
#[structopt(long, env = "MEILI_SSL_TICKETS")]
pub ssl_tickets: bool,
/// Defines the path of the snapshot file to import.
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
/// the given path. If this option is not specified no snapshot is imported.
#[structopt(long)]
pub import_snapshot: Option<PathBuf>,
/// The engine will ignore a missing snapshot and not return an error in such case.
#[structopt(long, requires = "import-snapshot")]
pub ignore_missing_snapshot: bool,
/// The engine will skip snapshot importation and not return an error in such case.
#[structopt(long, requires = "import-snapshot")]
pub ignore_snapshot_if_db_exists: bool,
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
#[structopt(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
pub snapshot_dir: PathBuf,
/// Activate snapshot scheduling.
#[structopt(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
pub schedule_snapshot: bool,
/// Defines time interval, in seconds, between each snapshot creation.
2021-03-17 12:01:56 +01:00
#[structopt(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
pub snapshot_interval_sec: u64,
2020-12-12 13:32:06 +01:00
/// Folder where dumps are created when the dump route is called.
#[structopt(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
pub dumps_dir: PathBuf,
/// Import a dump from the specified path, must be a `.tar.gz` file.
#[structopt(long, conflicts_with = "import-snapshot")]
pub import_dump: Option<PathBuf>,
/// The batch size used in the importation process, the bigger it is the faster the dump is created.
#[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")]
pub dump_batch_size: usize,
2020-12-22 17:13:50 +01:00
#[structopt(flatten)]
pub indexer_options: IndexerOpts,
2020-12-12 13:32:06 +01:00
}
impl Opt {
pub fn get_ssl_config(&self) -> Result<Option<rustls::ServerConfig>, Box<dyn error::Error>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let client_auth = match &self.ssl_auth_path {
Some(auth_path) => {
let roots = load_certs(auth_path.to_path_buf())?;
let mut client_auth_roots = RootCertStore::empty();
for root in roots {
client_auth_roots.add(&root).unwrap();
}
if self.ssl_require_auth {
AllowAnyAuthenticatedClient::new(client_auth_roots)
} else {
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots)
}
}
None => NoClientAuth::new(),
};
let mut config = rustls::ServerConfig::new(client_auth);
config.key_log = Arc::new(rustls::KeyLogFile::new());
let certs = load_certs(cert_path.to_path_buf())?;
let privkey = load_private_key(key_path.to_path_buf())?;
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
config
.set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
.map_err(|_| "bad certificates/private key")?;
if self.ssl_resumption {
config.set_persistence(rustls::ServerSessionMemoryCache::new(256));
}
if self.ssl_tickets {
config.ticketer = rustls::Ticketer::new();
}
Ok(Some(config))
} else {
Ok(None)
}
}
}
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
let mut reader = BufReader::new(certfile);
Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?)
}
fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn error::Error>> {
let rsa_keys = {
let keyfile =
fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?;
let mut reader = BufReader::new(keyfile);
rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")?
};
let pkcs8_keys = {
let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?;
let mut reader = BufReader::new(keyfile);
pkcs8_private_keys(&mut reader)
.map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")?
};
// prefer to load pkcs8 keys
if !pkcs8_keys.is_empty() {
Ok(pkcs8_keys[0].clone())
} else {
assert!(!rsa_keys.is_empty());
Ok(rsa_keys[0].clone())
}
}
fn load_ocsp(filename: &Option<PathBuf>) -> Result<Vec<u8>, Box<dyn error::Error>> {
let mut ret = Vec::new();
if let Some(ref name) = filename {
fs::File::open(name)
.map_err(|_| "cannot open ocsp file")?
.read_to_end(&mut ret)
.map_err(|_| "cannot read oscp file")?;
}
Ok(ret)
}