Make the batched tasks size limit effectively work

This commit is contained in:
Clément Renault 2025-01-09 11:59:35 +01:00
parent 8650ee66c1
commit d0bdff7b7b
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
7 changed files with 34 additions and 15 deletions

View file

@ -115,6 +115,9 @@ pub struct IndexSchedulerOptions {
/// If the autobatcher is allowed to automatically batch tasks
/// it will only batch this defined number of tasks at once.
pub max_number_of_batched_tasks: usize,
/// If the autobatcher is allowed to automatically batch tasks
/// it will only batch this defined maximum size (in bytes) of tasks at once.
pub batched_tasks_size_limit: u64,
/// The experimental features enabled for this instance.
pub instance_features: InstanceTogglableFeatures,
}

View file

@ -497,17 +497,26 @@ impl IndexScheduler {
1
};
let enqueued = index_tasks
.into_iter()
.take(tasks_limit)
.map(|task_id| {
self.queue
.tasks
.get_task(rtxn, task_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
.map(|task| (task.uid, task.kind))
})
.collect::<Result<Vec<_>>>()?;
let mut enqueued = Vec::new();
let mut total_size: u64 = 0;
for task_id in index_tasks.into_iter().take(tasks_limit) {
let task = self
.queue
.tasks
.get_task(rtxn, task_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
if let Some(uuid) = task.content_uuid() {
let content_size = self.queue.file_store.compute_size(uuid)?;
total_size = total_size.saturating_add(content_size);
}
if total_size > self.scheduler.batched_tasks_size_limit && !enqueued.is_empty() {
break;
}
enqueued.push((task.uid, task.kind));
}
if let Some((batchkind, create_index)) =
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())

View file

@ -60,6 +60,9 @@ pub struct Scheduler {
/// The maximum number of tasks that will be batched together.
pub(crate) max_number_of_batched_tasks: usize,
/// The maximum size, in bytes, of tasks in a batch.
pub(crate) batched_tasks_size_limit: u64,
/// The path used to create the dumps.
pub(crate) dumps_path: PathBuf,
@ -80,6 +83,7 @@ impl Scheduler {
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
batched_tasks_size_limit: self.batched_tasks_size_limit,
dumps_path: self.dumps_path.clone(),
snapshots_path: self.snapshots_path.clone(),
auth_path: self.auth_path.clone(),
@ -94,6 +98,7 @@ impl Scheduler {
wake_up: Arc::new(SignalEvent::auto(true)),
autobatching_enabled: options.autobatching_enabled,
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: options.dumps_path.clone(),
snapshots_path: options.snapshots_path.clone(),
auth_path: options.auth_path.clone(),

View file

@ -107,6 +107,7 @@ impl IndexScheduler {
cleanup_enabled: true,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: usize::MAX,
batched_tasks_size_limit: u64::MAX,
instance_features: Default::default(),
};
configuration(&mut options);