WIP: refactor IndexController

change the architecture of the index controller to allow it to own an
index store.
This commit is contained in:
mpostma 2021-01-16 15:09:48 +01:00
parent 686f987180
commit 6a3f625e11
No known key found for this signature in database
GPG key ID: CBC8A7C1D7A28C3A
15 changed files with 1197 additions and 287 deletions

View file

@ -9,10 +9,60 @@ use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
RootCertStore,
};
use grenad::CompressionType;
use structopt::StructOpt;
use crate::updates::IndexerOpts;
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// MTBL max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory to use for the MTBL buffer. It is recommended
/// to use something like 80%-90% of the available memory.
///
/// It is automatically split by the number of jobs e.g. if you use 7 jobs
/// and 7 GB of max memory, each thread will use a maximum of 1 GB.
#[structopt(long, default_value = "7 GiB")]
pub max_memory: Byte,
/// Size of the linked hash map cache when indexing.
/// The bigger it is, the faster the indexing is but the more memory it takes.
#[structopt(long, default_value = "500")]
pub linked_hash_map_size: usize,
/// The name of the compression algorithm to use when compressing intermediate
/// chunks during indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// The number of bytes to remove from the begining of the chunks while reading/sorting
/// or merging them.
///
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
#[structopt(long, default_value = "4 GiB")]
pub chunk_fusing_shrink_size: Byte,
/// Enable the chunk fusing or not, this reduces the amount of disk used by a factor of 2.
#[structopt(long)]
pub enable_chunk_fusing: bool,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, StructOpt)]