From 9feba5028d3dad81fa721ad1a03f59031f339ef2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 8 Jul 2024 20:58:27 +0200 Subject: [PATCH] update byte-unit --- Cargo.lock | 232 ++++++++++++++++++- meilisearch/Cargo.toml | 3 +- meilisearch/src/error.rs | 4 +- meilisearch/src/lib.rs | 9 +- meilisearch/src/option.rs | 18 +- meilisearch/tests/common/server.rs | 8 +- meilisearch/tests/documents/add_documents.rs | 2 +- tracing-trace/Cargo.toml | 3 +- tracing-trace/src/processor/fmt.rs | 4 +- 9 files changed, 253 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62564200c..94ae6e541 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,7 +45,7 @@ dependencies = [ "actix-service", "actix-tls", "actix-utils", - "ahash", + "ahash 0.8.11", "base64 0.22.1", "bitflags 2.6.0", "brotli", @@ -181,7 +181,7 @@ dependencies = [ "actix-tls", "actix-utils", "actix-web-codegen", - "ahash", + "ahash 0.8.11", "bytes", "bytestring", "cfg-if", @@ -256,6 +256,17 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.11" @@ -378,6 +389,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "arroy" version = "0.4.0" @@ -583,6 +600,18 @@ dependencies = [ "serde", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -592,6 +621,30 @@ dependencies = [ "generic-array", ] +[[package]] +name = "borsh" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.68", + "syn_derive", +] + [[package]] name = "brotli" version = "6.0.0" @@ -641,14 +694,37 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byte-unit" -version = "4.0.19" +version = "5.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da78b32057b8fdfc352504708feeba7216dcd65a2c9ab02978cbd288d1279b6c" +checksum = "33ac19bdf0b2665407c39d82dbc937e951e7e2001609f0fb32edd0af45a2d63e" dependencies = [ + "rust_decimal", "serde", "utf8-width", ] +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "bytecount" version = "0.6.3" @@ -866,6 +942,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "change-detection" version = "1.2.0" @@ -1882,6 +1964,12 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.30" @@ -2250,13 +2338,22 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "ahash", + "ahash 0.8.11", "allocator-api2", ] @@ -2533,7 +2630,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.5", "serde", ] @@ -2667,7 +2764,7 @@ checksum = "93f0c1347cd3ac8d7c6e3a2dc33ac496d365cf09fc0831aa61111e1a6738983e" dependencies = [ "cedarwood", "fxhash", - "hashbrown", + "hashbrown 0.14.5", "lazy_static", "phf", "phf_codegen", @@ -4211,6 +4308,26 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "pulp" version = "0.18.9" @@ -4279,6 +4396,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -4448,6 +4571,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" version = "0.12.5" @@ -4508,6 +4640,35 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rkyv" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "roaring" version = "0.10.6" @@ -4531,6 +4692,22 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rust_decimal" +version = "1.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -4685,6 +4862,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "segment" version = "0.2.4" @@ -4848,6 +5031,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "similar" version = "2.5.0" @@ -5044,6 +5233,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.68", +] + [[package]] name = "sync_wrapper" version = "1.0.1" @@ -5099,6 +5300,12 @@ dependencies = [ "windows", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tar" version = "0.4.41" @@ -5889,7 +6096,7 @@ version = "0.16.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", "once_cell", ] @@ -6110,6 +6317,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xattr" version = "1.3.1" diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 52f0cf6b7..09b397929 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -32,8 +32,9 @@ anyhow = { version = "1.0.86", features = ["backtrace"] } async-stream = "0.3.5" async-trait = "0.1.81" bstr = "1.9.1" -byte-unit = { version = "4.0.19", default-features = false, features = [ +byte-unit = { version = "5.1.4", default-features = false, features = [ "std", + "byte", "serde", ] } bytes = "1.6.0" diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 96496a33f..1d2475948 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -1,6 +1,6 @@ use actix_web as aweb; use aweb::error::{JsonPayloadError, QueryPayloadError}; -use byte_unit::Byte; +use byte_unit::{Byte, UnitType}; use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; @@ -33,7 +33,7 @@ pub enum MeilisearchHttpError { TooManySearchRequests(usize), #[error("Internal error: Search limiter is down.")] SearchLimiterIsDown, - #[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))] + #[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))] PayloadTooLarge(usize), #[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.", .0.iter().map(|uid| format!("\"{uid}\"")).collect::>().join(", "), .0.len() diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 2af103593..b33826141 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -15,6 +15,7 @@ use std::fs::File; use std::io::{BufReader, BufWriter}; use std::num::NonZeroUsize; use std::path::Path; +use std::str::FromStr; use std::sync::Arc; use std::thread::{self, available_parallelism}; use std::time::Duration; @@ -300,15 +301,15 @@ fn open_or_create_database_unchecked( dumps_path: opt.dump_dir.clone(), webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()), webhook_authorization_header: opt.task_webhook_authorization_header.clone(), - task_db_size: opt.max_task_db_size.get_bytes() as usize, - index_base_map_size: opt.max_index_size.get_bytes() as usize, + task_db_size: opt.max_task_db_size.as_u64() as usize, + index_base_map_size: opt.max_index_size.as_u64() as usize, enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, cleanup_enabled: !opt.experimental_replication_parameters, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, - index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, + index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize, index_count: DEFAULT_INDEX_COUNT, instance_features, })?) @@ -476,7 +477,7 @@ pub fn configure_data( opt.experimental_search_queue_size, available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()), ); - let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; + let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; config .app_data(index_scheduler) .app_data(auth) diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index fed824079..af4da1113 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -9,7 +9,7 @@ use std::str::FromStr; use std::sync::Arc; use std::{env, fmt, fs}; -use byte_unit::{Byte, ByteError}; +use byte_unit::{Byte, ParseError, UnitType}; use clap::Parser; use meilisearch_types::features::InstanceTogglableFeatures; use meilisearch_types::milli::update::IndexerConfig; @@ -674,7 +674,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig { Ok(Self { log_every_n: Some(DEFAULT_LOG_EVERY_N), - max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), + max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize), thread_pool: Some(thread_pool), max_positions_per_attributes: None, skip_index_budget: other.skip_index_budget, @@ -688,23 +688,25 @@ impl TryFrom<&IndexerOpts> for IndexerConfig { pub struct MaxMemory(Option); impl FromStr for MaxMemory { - type Err = ByteError; + type Err = ParseError; - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { Byte::from_str(s).map(Some).map(MaxMemory) } } impl Default for MaxMemory { fn default() -> MaxMemory { - MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes)) + MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64)) } } impl fmt::Display for MaxMemory { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.0 { - Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), + Some(memory) => { + write!(f, "{}", memory.get_appropriate_unit(UnitType::Binary)) + } None => f.write_str("unknown"), } } @@ -844,11 +846,11 @@ fn default_env() -> String { } fn default_max_index_size() -> Byte { - Byte::from_bytes(INDEX_SIZE) + Byte::from_u64(INDEX_SIZE) } fn default_max_task_db_size() -> Byte { - Byte::from_bytes(TASK_DB_SIZE) + Byte::from_u64(TASK_DB_SIZE) } fn default_http_payload_size_limit() -> Byte { diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index 41607f76d..e2c25efc6 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -6,7 +6,7 @@ use std::time::Duration; use actix_http::body::MessageBody; use actix_web::dev::ServiceResponse; use actix_web::http::StatusCode; -use byte_unit::{Byte, ByteUnit}; +use byte_unit::{Byte, Unit}; use clap::Parser; use meilisearch::option::{IndexerOpts, MaxMemory, Opt}; use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; @@ -231,9 +231,9 @@ pub fn default_settings(dir: impl AsRef) -> Opt { env: "development".to_owned(), #[cfg(feature = "analytics")] no_analytics: true, - max_index_size: Byte::from_unit(100.0, ByteUnit::MiB).unwrap(), - max_task_db_size: Byte::from_unit(1.0, ByteUnit::GiB).unwrap(), - http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(), + max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(), + max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(), + http_payload_size_limit: Byte::from_u64_with_unit(10, Unit::MiB).unwrap(), snapshot_dir: ".".into(), indexer_options: IndexerOpts { // memory has to be unlimited because several meilisearch are running in test context. diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 5e32564c7..7c5f3efd3 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -2274,7 +2274,7 @@ async fn error_add_documents_payload_size() { snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" { - "message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.", + "message": "The provided payload reached the size limit. The maximum accepted payload size is 10 MiB.", "code": "payload_too_large", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#payload_too_large" diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml index 401ea8f37..ca9c07fe4 100644 --- a/tracing-trace/Cargo.toml +++ b/tracing-trace/Cargo.toml @@ -13,8 +13,9 @@ serde_json = "1.0.120" tracing = "0.1.40" tracing-error = "0.2.0" tracing-subscriber = "0.3.18" -byte-unit = { version = "4.0.19", default-features = false, features = [ +byte-unit = { version = "5.1.4", default-features = false, features = [ "std", + "byte", "serde", ] } tokio = { version = "1.38.0", features = ["sync"] } diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs index 68e95c00c..41406df6a 100644 --- a/tracing-trace/src/processor/fmt.rs +++ b/tracing-trace/src/processor/fmt.rs @@ -1,6 +1,8 @@ use std::collections::HashMap; use std::io::Read; +use byte_unit::UnitType; + use crate::entry::{ Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, @@ -190,6 +192,6 @@ fn print_duration(duration: std::time::Duration) -> String { /// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes. fn print_memory(MemoryStats { resident }: MemoryStats) -> String { use byte_unit::Byte; - let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true); + let rss_bytes = Byte::from_u64(resident).get_appropriate_unit(UnitType::Binary); format!("RSS {rss_bytes:.2}") }