diff --git a/Cargo.lock b/Cargo.lock index e15c8af90..bd17cdc0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -458,6 +458,20 @@ name = "bytemuck" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9966d2ab714d0f785dbac0a0396251a35280aeb42413281617d0209ab4898435" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54" +dependencies = [ + "proc-macro2 1.0.27", + "quote 1.0.9", + "syn 1.0.73", +] [[package]] name = "byteorder" @@ -630,6 +644,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" + [[package]] name = "cow-utils" version = "0.1.2" @@ -1097,13 +1117,14 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "grenad" -version = "0.1.0" -source = "git+https://github.com/Kerollmops/grenad.git?rev=3adcb26#3adcb267dcbc590c7da10eb5f887a254865b3dbe" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7824d499230110f4e4a8d4fd3fd4dc15c1347fce5082e4bba82eef17f43e1ed8" dependencies = [ + "bytemuck", "byteorder", "flate2", - "log", - "nix", + "lz4_flex", "snap", "tempfile", "zstd", @@ -1161,8 +1182,8 @@ dependencies = [ [[package]] name = "heed" -version = "0.12.0" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" +version = "0.12.1" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" dependencies = [ "byteorder", "heed-traits", @@ -1180,12 +1201,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" dependencies = [ "bincode", "heed-traits", @@ -1534,6 +1555,15 @@ dependencies = [ "syn 0.15.44", ] +[[package]] +name = "lz4_flex" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827b976d911b5d2e42b2ccfc7c0d2461a1414e8280436885218762fc529b3f8" +dependencies = [ + "twox-hash", +] + [[package]] name = "main_error" version = "0.1.1" @@ -1619,6 +1649,7 @@ dependencies = [ "siphasher", "slice-group-by", "structopt", + "sysinfo", "tar", "tempdir", "tempfile", @@ -1675,13 +1706,14 @@ dependencies = [ [[package]] name = "milli" -version = "0.11.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.11.0#c51bb6789cb3fbb6511138374b3443f9116a445c" +version = "0.12.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.12.0#5cbe8793251bbf143434c8a4c4e7195ca6c5f2ac" dependencies = [ "bstr", "byteorder", "chrono", "concat-arrays", + "crossbeam-channel", "csv", "either", "flate2", @@ -1788,18 +1820,6 @@ dependencies = [ "syn 1.0.73", ] -[[package]] -name = "nix" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2" -dependencies = [ - "bitflags", - "cc", - "cfg-if 1.0.0", - "libc", -] - [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -2690,6 +2710,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stdweb" version = "0.4.20" @@ -2812,6 +2838,21 @@ dependencies = [ "unicode-xid 0.2.2", ] +[[package]] +name = "sysinfo" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0af066e6272f2175c1783cfc2ebf3e2d8dfe2c182b00677fdeccbf8291af83fb" +dependencies = [ + "cfg-if 1.0.0", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi", +] + [[package]] name = "tar" version = "0.4.35" @@ -3052,6 +3093,16 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "twox-hash" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f559b464de2e2bdabcac6a210d12e9b5a5973c251e102c44c585c71d51bd78e" +dependencies = [ + "cfg-if 1.0.0", + "static_assertions", +] + [[package]] name = "typenum" version = "1.13.0" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index f0732c2c3..aa56ad69f 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -50,7 +50,7 @@ main_error = "0.1.0" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.11.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.12.0" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.5.2" @@ -76,6 +76,7 @@ pin-project = "1.0.7" whoami = { version = "1.1.2", optional = true } reqwest = { version = "0.11.3", features = ["json", "rustls-tls"], default-features = false, optional = true } serdeval = "0.1.0" +sysinfo = "0.20.0" [dev-dependencies] actix-rt = "2.1.0" diff --git a/meilisearch-http/src/index/update_handler.rs b/meilisearch-http/src/index/update_handler.rs index 1426cca01..f3977a00d 100644 --- a/meilisearch-http/src/index/update_handler.rs +++ b/meilisearch-http/src/index/update_handler.rs @@ -14,10 +14,8 @@ pub struct UpdateHandler { chunk_compression_level: Option, thread_pool: ThreadPool, log_frequency: usize, - max_memory: usize, - linked_hash_map_size: usize, + max_memory: Option, chunk_compression_type: CompressionType, - chunk_fusing_shrink_size: u64, } impl UpdateHandler { @@ -25,15 +23,14 @@ impl UpdateHandler { let thread_pool = rayon::ThreadPoolBuilder::new() .num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2)) .build()?; + Ok(Self { max_nb_chunks: opt.max_nb_chunks, chunk_compression_level: opt.chunk_compression_level, thread_pool, log_frequency: opt.log_every_n, - max_memory: opt.max_memory.get_bytes() as usize, - linked_hash_map_size: opt.linked_hash_map_size, + max_memory: opt.max_memory.map(|m| m.get_bytes() as usize), chunk_compression_type: opt.chunk_compression_type, - chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(), }) } @@ -48,10 +45,10 @@ impl UpdateHandler { } update_builder.thread_pool(&self.thread_pool); update_builder.log_every_n(self.log_frequency); - update_builder.max_memory(self.max_memory); - update_builder.linked_hash_map_size(self.linked_hash_map_size); + if let Some(max_memory) = self.max_memory { + update_builder.max_memory(max_memory); + } update_builder.chunk_compression_type(self.chunk_compression_type); - update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size); update_builder } diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index d87b98adb..f3c077c05 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -1,5 +1,9 @@ +use byte_unit::ByteError; +use std::fmt; use std::io::{BufReader, Read}; +use std::ops::Deref; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use std::{error, fs}; @@ -11,6 +15,7 @@ use rustls::{ RootCertStore, }; use structopt::StructOpt; +use sysinfo::{RefreshKind, System, SystemExt}; #[derive(Debug, Clone, StructOpt)] pub struct IndexerOpts { @@ -23,13 +28,15 @@ pub struct IndexerOpts { #[structopt(long)] pub max_nb_chunks: Option, - /// The maximum amount of memory to use for the Grenad buffer. It is recommended - /// to use something like 80%-90% of the available memory. + /// The maximum amount of memory the indexer will use. It defaults to 2/3 + /// of the available memory. It is recommended to use something like 80%-90% + /// of the available memory, no more. /// - /// It is automatically split by the number of jobs e.g. if you use 7 jobs - /// and 7 GB of max memory, each thread will use a maximum of 1 GB. - #[structopt(long, default_value = "7 GiB")] - pub max_memory: Byte, + /// In case the engine is unable to retrieve the available memory the engine will + /// try to use the memory it needs but without real limit, this can lead to + /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. + #[structopt(long, default_value)] + pub max_memory: MaxMemory, /// Size of the linked hash map cache when indexing. /// The bigger it is, the faster the indexing is but the more memory it takes. @@ -69,7 +76,7 @@ impl Default for IndexerOpts { Self { log_every_n: 100_000, max_nb_chunks: None, - max_memory: Byte::from_str("1GiB").unwrap(), + max_memory: MaxMemory::default(), linked_hash_map_size: 500, chunk_compression_type: CompressionType::None, chunk_compression_level: None, @@ -240,6 +247,57 @@ impl Opt { } } +/// A type used to detect the max memory available and use 2/3 of it. +#[derive(Debug, Clone, Copy)] +pub struct MaxMemory(Option); + +impl FromStr for MaxMemory { + type Err = ByteError; + + fn from_str(s: &str) -> Result { + Byte::from_str(s).map(Some).map(MaxMemory) + } +} + +impl Default for MaxMemory { + fn default() -> MaxMemory { + MaxMemory( + total_memory_bytes() + .map(|bytes| bytes * 2 / 3) + .map(Byte::from_bytes), + ) + } +} + +impl fmt::Display for MaxMemory { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), + None => f.write_str("unknown"), + } + } +} + +impl Deref for MaxMemory { + type Target = Option; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// Returns the total amount of bytes available or `None` if this system isn't supported. +fn total_memory_bytes() -> Option { + if System::IS_SUPPORTED { + let memory_kind = RefreshKind::new().with_memory(); + let mut system = System::new_with_specifics(memory_kind); + system.refresh_memory(); + Some(system.total_memory() * 1024) // KiB into bytes + } else { + None + } +} + fn load_certs(filename: PathBuf) -> Result, Box> { let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?; let mut reader = BufReader::new(certfile);