Add a jobs parameter to set the number of threads the indexer uses

This commit is contained in:
Kerollmops 2020-06-28 12:13:10 +02:00
parent 7e16afbdce
commit 6a2834f2b0
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -40,6 +40,10 @@ struct Opt {
#[structopt(long = "db", parse(from_os_str))] #[structopt(long = "db", parse(from_os_str))]
database: PathBuf, database: PathBuf,
/// Number of parallel jobs, defaults to # of CPUs.
#[structopt(short, long)]
jobs: Option<usize>,
/// Files to index in parallel. /// Files to index in parallel.
files_to_index: Vec<PathBuf>, files_to_index: Vec<PathBuf>,
} }
@ -388,6 +392,10 @@ fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> any
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
let opt = Opt::from_args(); let opt = Opt::from_args();
if let Some(jobs) = opt.jobs {
rayon::ThreadPoolBuilder::new().num_threads(jobs).build_global()?;
}
std::fs::create_dir_all(&opt.database)?; std::fs::create_dir_all(&opt.database)?;
let env = EnvOpenOptions::new() let env = EnvOpenOptions::new()
.map_size(100 * 1024 * 1024 * 1024) // 100 GB .map_size(100 * 1024 * 1024 * 1024) // 100 GB