feat(lib): auto-batching

This commit is contained in:
mpostma 2022-01-19 11:21:19 +01:00 committed by ad hoc
parent 622c15e825
commit c9a236b0af
No known key found for this signature in database
GPG key ID: 4F00A782990CC643
28 changed files with 1181 additions and 777 deletions

View file

@ -1,9 +1,10 @@
use core::fmt;
use std::{ops::Deref, str::FromStr};
use std::{convert::TryFrom, ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use milli::CompressionType;
use milli::{update::IndexerConfig, CompressionType};
use serde::Serialize;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, Parser)]
@ -43,6 +44,52 @@ pub struct IndexerOpts {
pub indexing_jobs: Option<usize>,
}
#[derive(Debug, Clone, Parser, Default, Serialize)]
pub struct SchedulerConfig {
/// enable the autobatching experimental feature
#[clap(long, hide = true)]
pub enable_autobatching: bool,
// The maximum number of updates of the same type that can be batched together.
// If unspecified, this is unlimited. A value of 0 is interpreted as 1.
#[clap(long, requires = "enable-autobatching", hide = true)]
pub max_batch_size: Option<usize>,
// The maximum number of documents in a document batch. Since batches must contain at least one
// update for the scheduler to make progress, the number of documents in a batch will be at
// least the number of documents of its first update.
#[clap(long, requires = "enable-autobatching", hide = true)]
pub max_documents_per_batch: Option<usize>,
/// Debounce duration in seconds
///
/// When a new task is enqueued, the scheduler waits for `debounce_duration_sec` seconds for new updates before
/// starting to process a batch of updates.
#[clap(long, requires = "enable-autobatching", hide = true)]
pub debounce_duration_sec: Option<u64>,
}
impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(other.indexing_jobs.unwrap_or(num_cpus::get() / 2))
.build()?;
Ok(Self {
log_every_n: Some(other.log_every_n),
max_nb_chunks: other.max_nb_chunks,
max_memory: (*other.max_memory).map(|b| b.get_bytes() as usize),
chunk_compression_type: other.chunk_compression_type,
chunk_compression_level: other.chunk_compression_level,
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
..Default::default()
})
}
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {