From 705d10a96d995e20bcf6d63e5728dafb5890e9c1 Mon Sep 17 00:00:00 2001 From: 2shiori17 <98276492+2shiori17@users.noreply.github.com> Date: Thu, 24 Mar 2022 18:52:36 +0000 Subject: [PATCH 1/4] Add instance options for RAM and CPU usage --- meilisearch-http/src/option.rs | 4 +- meilisearch-http/tests/common/server.rs | 2 +- meilisearch-lib/src/options.rs | 68 ++++++++++++++++++++----- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 7284b4588..9fefb4231 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -145,8 +145,8 @@ pub struct Opt { #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] pub log_level: String, - #[serde(skip)] - #[clap(skip)] + #[serde(flatten)] + #[clap(flatten)] pub indexer_options: IndexerOpts, #[serde(flatten)] diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index dcb4b6266..8ed106dd3 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -152,7 +152,7 @@ pub fn default_settings(dir: impl AsRef) -> Opt { ignore_dump_if_db_exists: false, indexer_options: IndexerOpts { // memory has to be unlimited because several meilisearch are running in test context. - max_memory: MaxMemory::unlimited(), + max_indexing_memory: MaxMemory::unlimited(), ..Default::default() }, log_level: "off".into(), diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 195576799..bb24d94b0 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -1,5 +1,5 @@ use core::fmt; -use std::{convert::TryFrom, ops::Deref, str::FromStr}; +use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; use byte_unit::{Byte, ByteError}; use clap::Parser; @@ -7,15 +7,17 @@ use milli::{update::IndexerConfig, CompressionType}; use serde::Serialize; use sysinfo::{RefreshKind, System, SystemExt}; -#[derive(Debug, Clone, Parser)] +#[derive(Debug, Clone, Parser, Serialize)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. - #[clap(long, default_value = "100000")] // 100k + #[serde(skip)] + #[clap(long, default_value = "100000", hide = true)] // 100k pub log_every_n: usize, /// Grenad max number of chunks in bytes. - #[clap(long)] + #[serde(skip)] + #[clap(long, hide = true)] pub max_nb_chunks: Option, /// The maximum amount of memory the indexer will use. It defaults to 2/3 @@ -25,22 +27,30 @@ pub struct IndexerOpts { /// In case the engine is unable to retrieve the available memory the engine will /// try to use the memory it needs but without real limit, this can lead to /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[clap(long, default_value_t)] - pub max_memory: MaxMemory, + #[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)] + pub max_indexing_memory: MaxMemory, + + /// The maximum number of threads the indexer will use. + /// It defaults to half of the available threads. + #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] + pub max_indexing_threads: MaxThreads, /// The name of the compression algorithm to use when compressing intermediate /// Grenad chunks while indexing documents. /// /// Choosing a fast algorithm will make the indexing faster but may consume more memory. - #[clap(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] + #[serde(skip)] + #[clap(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"], hide = true)] pub chunk_compression_type: CompressionType, /// The level of compression of the chosen algorithm. - #[clap(long, requires = "chunk-compression-type")] + #[serde(skip)] + #[clap(long, requires = "chunk-compression-type", hide = true)] pub chunk_compression_level: Option, /// Number of parallel jobs for indexing, defaults to # of CPUs. - #[clap(long)] + #[serde(skip)] + #[clap(long, hide = true)] pub indexing_jobs: Option, } @@ -74,13 +84,13 @@ impl TryFrom<&IndexerOpts> for IndexerConfig { fn try_from(other: &IndexerOpts) -> Result { let thread_pool = rayon::ThreadPoolBuilder::new() - .num_threads(other.indexing_jobs.unwrap_or(num_cpus::get() / 2)) + .num_threads(other.indexing_jobs.unwrap_or(*other.max_indexing_threads)) .build()?; Ok(Self { log_every_n: Some(other.log_every_n), max_nb_chunks: other.max_nb_chunks, - max_memory: (*other.max_memory).map(|b| b.get_bytes() as usize), + max_memory: (*other.max_indexing_memory).map(|b| b.get_bytes() as usize), chunk_compression_type: other.chunk_compression_type, chunk_compression_level: other.chunk_compression_level, thread_pool: Some(thread_pool), @@ -95,7 +105,8 @@ impl Default for IndexerOpts { Self { log_every_n: 100_000, max_nb_chunks: None, - max_memory: MaxMemory::default(), + max_indexing_memory: MaxMemory::default(), + max_indexing_threads: MaxThreads::default(), chunk_compression_type: CompressionType::None, chunk_compression_level: None, indexing_jobs: None, @@ -104,7 +115,7 @@ impl Default for IndexerOpts { } /// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Serialize)] pub struct MaxMemory(Option); impl FromStr for MaxMemory { @@ -159,3 +170,34 @@ fn total_memory_bytes() -> Option { None } } + +#[derive(Debug, Clone, Copy, Serialize)] +pub struct MaxThreads(usize); + +impl FromStr for MaxThreads { + type Err = ParseIntError; + + fn from_str(s: &str) -> Result { + usize::from_str(s).map(Self) + } +} + +impl Default for MaxThreads { + fn default() -> Self { + MaxThreads(num_cpus::get() / 2) + } +} + +impl fmt::Display for MaxThreads { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Deref for MaxThreads { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} From 9f1c88680d228f12e90e20ff704fc03106215500 Mon Sep 17 00:00:00 2001 From: shiori <98276492+2shiori17@users.noreply.github.com> Date: Thu, 31 Mar 2022 02:48:41 +0900 Subject: [PATCH 2/4] Fix my mistake when resolving conflicts --- meilisearch-http/tests/common/server.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 6c2afa616..b439ec52e 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -138,7 +138,6 @@ pub fn default_settings(dir: impl AsRef) -> Opt { indexer_options: IndexerOpts { // memory has to be unlimited because several meilisearch are running in test context. max_indexing_memory: MaxMemory::unlimited(), - ..Default::default() ..Parser::parse_from(None as Option<&str>) }, ..Parser::parse_from(None as Option<&str>) From a2d7c16f91917139e16d2bd55368a067b57896ec Mon Sep 17 00:00:00 2001 From: 2shiori17 <98276492+2shiori17@users.noreply.github.com> Date: Thu, 31 Mar 2022 09:27:29 +0900 Subject: [PATCH 3/4] Remove indexing_jobs option --- meilisearch-lib/src/options.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 7fb29e2bd..d51c6ff35 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -34,11 +34,6 @@ pub struct IndexerOpts { /// It defaults to half of the available threads. #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] pub max_indexing_threads: MaxThreads, - - /// Number of parallel jobs for indexing, defaults to # of CPUs. - #[serde(skip)] - #[clap(long, hide = true)] - pub indexing_jobs: Option, } #[derive(Debug, Clone, Parser, Default, Serialize)] @@ -71,7 +66,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig { fn try_from(other: &IndexerOpts) -> Result { let thread_pool = rayon::ThreadPoolBuilder::new() - .num_threads(other.indexing_jobs.unwrap_or(*other.max_indexing_threads)) + .num_threads(*other.max_indexing_threads) .build()?; Ok(Self { @@ -92,7 +87,6 @@ impl Default for IndexerOpts { max_nb_chunks: None, max_indexing_memory: MaxMemory::default(), max_indexing_threads: MaxThreads::default(), - indexing_jobs: None, } } } From 98107565c0702294e8031d8b12b5ffb19e865203 Mon Sep 17 00:00:00 2001 From: 2shiori17 <98276492+2shiori17@users.noreply.github.com> Date: Thu, 31 Mar 2022 09:32:45 +0900 Subject: [PATCH 4/4] Add more detailed comments for max_indexing_threads --- meilisearch-lib/src/options.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index d51c6ff35..c71f1cba6 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -31,6 +31,9 @@ pub struct IndexerOpts { pub max_indexing_memory: MaxMemory, /// The maximum number of threads the indexer will use. + /// If the number set is higher than the real number of cores available in the machine, + /// it will use the maximum number of available cores. + /// /// It defaults to half of the available threads. #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] pub max_indexing_threads: MaxThreads,