Merge pull request #59 from meilisearch/improve-bytes-structopt

Use the byte-unit crate to ease library usage
This commit is contained in:
Clément Renault 2020-12-20 14:52:39 +01:00 committed by GitHub
commit 5039528b56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 57 additions and 19 deletions

16
Cargo.lock generated
View File

@ -99,6 +99,15 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
[[package]]
name = "byte-unit"
version = "4.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c8758c32833faaae35b24a73d332e62d0528e89076ae841c63940e37008b153"
dependencies = [
"utf8-width",
]
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.3.4" version = "1.3.4"
@ -669,6 +678,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bstr", "bstr",
"byte-unit",
"byteorder", "byteorder",
"criterion", "criterion",
"crossbeam-channel", "crossbeam-channel",
@ -1440,6 +1450,12 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "utf8-width"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "0.8.1" version = "0.8.1"

View File

@ -7,6 +7,7 @@ edition = "2018"
[dependencies] [dependencies]
anyhow = "1.0.28" anyhow = "1.0.28"
bstr = "0.2.13" bstr = "0.2.13"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
byteorder = "1.3.4" byteorder = "1.3.4"
crossbeam-channel = "0.5.0" crossbeam-channel = "0.5.0"
csv = "1.1.3" csv = "1.1.3"

17
http-ui/Cargo.lock generated
View File

@ -196,6 +196,15 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
[[package]]
name = "byte-unit"
version = "4.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c8758c32833faaae35b24a73d332e62d0528e89076ae841c63940e37008b153"
dependencies = [
"utf8-width",
]
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.3.4" version = "1.3.4"
@ -759,6 +768,7 @@ dependencies = [
"askama", "askama",
"askama_warp", "askama_warp",
"async-compression", "async-compression",
"byte-unit",
"bytes", "bytes",
"flate2", "flate2",
"futures", "futures",
@ -1010,6 +1020,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bstr", "bstr",
"byte-unit",
"byteorder", "byteorder",
"crossbeam-channel", "crossbeam-channel",
"csv", "csv",
@ -2212,6 +2223,12 @@ version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
[[package]]
name = "utf8-width"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "0.8.1" version = "0.8.1"

View File

@ -8,6 +8,7 @@ edition = "2018"
[dependencies] [dependencies]
anyhow = "1.0.28" anyhow = "1.0.28"
async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] } async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] }
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = "0.10.5" heed = "0.10.5"
memmap = "0.7.0" memmap = "0.7.0"

View File

@ -11,6 +11,7 @@ use std::{mem, io};
use askama_warp::Template; use askama_warp::Template;
use async_compression::tokio_02::write::GzipEncoder; use async_compression::tokio_02::write::GzipEncoder;
use byte_unit::Byte;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
use futures::stream; use futures::stream;
use futures::{FutureExt, StreamExt}; use futures::{FutureExt, StreamExt};
@ -44,13 +45,13 @@ pub struct Opt {
/// The maximum size the database can take on disk. It is recommended to specify /// The maximum size the database can take on disk. It is recommended to specify
/// the whole disk space (value must be a multiple of a page size). /// the whole disk space (value must be a multiple of a page size).
#[structopt(long = "db-size", default_value = "107374182400")] // 100 GB #[structopt(long = "db-size", default_value = "100 GiB")]
database_size: usize, database_size: Byte,
/// The maximum size the database that stores the updates can take on disk. It is recommended /// The maximum size the database that stores the updates can take on disk. It is recommended
/// to specify the whole disk space (value must be a multiple of a page size). /// to specify the whole disk space (value must be a multiple of a page size).
#[structopt(long = "udb-size", default_value = "10737418240")] // 10 GB #[structopt(long = "udb-size", default_value = "10 GiB")]
update_database_size: usize, update_database_size: Byte,
/// Disable document highlighting on the dashboard. /// Disable document highlighting on the dashboard.
#[structopt(long)] #[structopt(long)]
@ -84,8 +85,8 @@ pub struct IndexerOpt {
/// ///
/// It is automatically split by the number of jobs e.g. if you use 7 jobs /// It is automatically split by the number of jobs e.g. if you use 7 jobs
/// and 7 GB of max memory, each thread will use a maximum of 1 GB. /// and 7 GB of max memory, each thread will use a maximum of 1 GB.
#[structopt(long, default_value = "7516192768")] // 7 GB #[structopt(long, default_value = "7 GiB")]
pub max_memory: usize, pub max_memory: Byte,
/// Size of the linked hash map cache when indexing. /// Size of the linked hash map cache when indexing.
/// The bigger it is, the faster the indexing is but the more memory it takes. /// The bigger it is, the faster the indexing is but the more memory it takes.
@ -108,8 +109,8 @@ pub struct IndexerOpt {
/// ///
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`, /// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set. /// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
#[structopt(long, default_value = "4294967296")] // 4 GB #[structopt(long, default_value = "4 GiB")]
pub chunk_fusing_shrink_size: u64, pub chunk_fusing_shrink_size: Byte,
/// Enable the chunk fusing or not, this reduces the amount of disk used by a factor of 2. /// Enable the chunk fusing or not, this reduces the amount of disk used by a factor of 2.
#[structopt(long)] #[structopt(long)]
@ -281,7 +282,7 @@ async fn main() -> anyhow::Result<()> {
create_dir_all(&opt.database)?; create_dir_all(&opt.database)?;
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(opt.database_size); options.map_size(opt.database_size.get_bytes() as usize);
// Setup the global thread pool // Setup the global thread pool
let jobs = opt.indexer.indexing_jobs.unwrap_or(0); let jobs = opt.indexer.indexing_jobs.unwrap_or(0);
@ -293,7 +294,7 @@ async fn main() -> anyhow::Result<()> {
// Setup the LMDB based update database. // Setup the LMDB based update database.
let mut update_store_options = EnvOpenOptions::new(); let mut update_store_options = EnvOpenOptions::new();
update_store_options.map_size(opt.update_database_size); update_store_options.map_size(opt.update_database_size.get_bytes() as usize);
let update_store_path = opt.database.join("updates.mdb"); let update_store_path = opt.database.join("updates.mdb");
create_dir_all(&update_store_path)?; create_dir_all(&update_store_path)?;
@ -316,10 +317,10 @@ async fn main() -> anyhow::Result<()> {
} }
update_builder.thread_pool(GLOBAL_THREAD_POOL.get().unwrap()); update_builder.thread_pool(GLOBAL_THREAD_POOL.get().unwrap());
update_builder.log_every_n(indexer_opt_cloned.log_every_n); update_builder.log_every_n(indexer_opt_cloned.log_every_n);
update_builder.max_memory(indexer_opt_cloned.max_memory); update_builder.max_memory(indexer_opt_cloned.max_memory.get_bytes() as usize);
update_builder.linked_hash_map_size(indexer_opt_cloned.linked_hash_map_size); update_builder.linked_hash_map_size(indexer_opt_cloned.linked_hash_map_size);
update_builder.chunk_compression_type(indexer_opt_cloned.chunk_compression_type); update_builder.chunk_compression_type(indexer_opt_cloned.chunk_compression_type);
update_builder.chunk_fusing_shrink_size(indexer_opt_cloned.chunk_fusing_shrink_size); update_builder.chunk_fusing_shrink_size(indexer_opt_cloned.chunk_fusing_shrink_size.get_bytes());
// we extract the update type and execute the update itself. // we extract the update type and execute the update itself.
let result: anyhow::Result<()> = match meta { let result: anyhow::Result<()> = match meta {

View File

@ -2,9 +2,10 @@ use std::path::PathBuf;
use std::{str, io, fmt}; use std::{str, io, fmt};
use anyhow::Context; use anyhow::Context;
use byte_unit::Byte;
use crate::Index;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use structopt::StructOpt; use structopt::StructOpt;
use crate::Index;
use Command::*; use Command::*;
@ -39,8 +40,8 @@ pub struct Opt {
/// The maximum size the database can take on disk. It is recommended to specify /// The maximum size the database can take on disk. It is recommended to specify
/// the whole disk space (value must be a multiple of a page size). /// the whole disk space (value must be a multiple of a page size).
#[structopt(long = "db-size", default_value = "107374182400")] // 100 GB #[structopt(long = "db-size", default_value = "100 GiB")]
database_size: usize, database_size: Byte,
/// Verbose mode (-v, -vv, -vvv, etc.) /// Verbose mode (-v, -vv, -vvv, etc.)
#[structopt(short, long, parse(from_occurrences))] #[structopt(short, long, parse(from_occurrences))]
@ -159,7 +160,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
.init()?; .init()?;
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(opt.database_size); options.map_size(opt.database_size.get_bytes() as usize);
// Open the LMDB database. // Open the LMDB database.
let index = Index::new(options, opt.database)?; let index = Index::new(options, opt.database)?;

View File

@ -5,6 +5,7 @@ use std::path::PathBuf;
use std::time::Instant; use std::time::Instant;
use anyhow::Context; use anyhow::Context;
use byte_unit::Byte;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use log::debug; use log::debug;
use structopt::StructOpt; use structopt::StructOpt;
@ -21,8 +22,8 @@ pub struct Opt {
/// The maximum size the database can take on disk. It is recommended to specify /// The maximum size the database can take on disk. It is recommended to specify
/// the whole disk space (value must be a multiple of a page size). /// the whole disk space (value must be a multiple of a page size).
#[structopt(long = "db-size", default_value = "107374182400")] // 100 GB #[structopt(long = "db-size", default_value = "100 GiB")]
database_size: usize, database_size: Byte,
/// Verbose mode (-v, -vv, -vvv, etc.) /// Verbose mode (-v, -vv, -vvv, etc.)
#[structopt(short, long, parse(from_occurrences))] #[structopt(short, long, parse(from_occurrences))]
@ -41,7 +42,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
std::fs::create_dir_all(&opt.database)?; std::fs::create_dir_all(&opt.database)?;
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(opt.database_size); options.map_size(opt.database_size.get_bytes() as usize);
// Open the LMDB database. // Open the LMDB database.
let index = Index::new(options, &opt.database)?; let index = Index::new(options, &opt.database)?;