Fix the test and simplify types

This commit is contained in:
Mubelotix 2025-06-23 18:55:23 +02:00
parent 4925b30196
commit 2f82d94502
No known key found for this signature in database
GPG key ID: 89F391DBCC8CE7F0
9 changed files with 87 additions and 51 deletions

View file

@ -174,7 +174,7 @@ impl BatchQueue {
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
println!("Saving batch: {}", batch.embedder_stats.is_some());
println!("Saving batch: {:?}", batch.embedder_stats);
self.all_batches.put(
wtxn,
@ -184,7 +184,7 @@ impl BatchQueue {
progress: None,
details: batch.details,
stats: batch.stats,
embedder_stats: batch.embedder_stats.as_ref().map(|s| BatchEmbeddingStats::from(s.as_ref())),
embedder_stats: batch.embedder_stats.as_ref().into(),
started_at: batch.started_at,
finished_at: batch.finished_at,
enqueued_at: batch.enqueued_at,

View file

@ -437,8 +437,10 @@ impl IndexScheduler {
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
println!("create next batch");
let batch_id = self.queue.batches.next_batch_id(rtxn)?;
let mut current_batch = ProcessingBatch::new(batch_id);
println!("over");
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let count_total_enqueued = enqueued.len();
@ -454,6 +456,7 @@ impl IndexScheduler {
kind: Kind::TaskCancelation,
id: task_id,
});
println!("task cancelled");
return Ok(Some((Batch::TaskCancelation { task }, current_batch)));
}
@ -524,7 +527,7 @@ impl IndexScheduler {
}
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { println!("return"); return Ok(None) };
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@ -602,6 +605,7 @@ impl IndexScheduler {
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
{
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
println!("autobatch");
return Ok(self
.create_next_batch_index(
rtxn,
@ -615,6 +619,7 @@ impl IndexScheduler {
// If we found no tasks then we were notified for something that got autobatched
// somehow and there is nothing to do.
println!("nothing to do");
Ok(None)
}
}

View file

@ -164,7 +164,7 @@ impl IndexScheduler {
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, &progress, current_batch.clone_embedder_stats())?;
self.apply_index_operation(&mut index_wtxn, &index, op, &progress, current_batch.embedder_stats.clone())?;
{
progress.update_progress(FinalizingIndexStep::Committing);
@ -240,20 +240,11 @@ impl IndexScheduler {
builder.set_primary_key(primary_key);
let must_stop_processing = self.scheduler.must_stop_processing.clone();
let embedder_stats = match current_batch.embedder_stats {
Some(ref stats) => stats.clone(),
None => {
let embedder_stats: Arc<EmbedderStats> = Default::default();
current_batch.embedder_stats = Some(embedder_stats.clone());
embedder_stats
},
};
builder
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
embedder_stats,
current_batch.embedder_stats.clone(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
index_wtxn.commit()?;

View file

@ -29,7 +29,7 @@ pub struct ProcessingBatch {
pub uid: BatchId,
pub details: DetailsView,
pub stats: BatchStats,
pub embedder_stats: Option<Arc<EmbedderStats>>,
pub embedder_stats: Arc<EmbedderStats>,
pub statuses: HashSet<Status>,
pub kinds: HashSet<Kind>,
@ -47,11 +47,13 @@ impl ProcessingBatch {
let mut statuses = HashSet::default();
statuses.insert(Status::Processing);
println!("Processing batch created: {}", uid);
Self {
uid,
details: DetailsView::default(),
stats: BatchStats::default(),
embedder_stats: None,
embedder_stats: Default::default(),
statuses,
kinds: HashSet::default(),
@ -64,17 +66,6 @@ impl ProcessingBatch {
}
}
pub fn clone_embedder_stats(&mut self) -> Arc<EmbedderStats> {
match self.embedder_stats {
Some(ref stats) => stats.clone(),
None => {
let embedder_stats: Arc<EmbedderStats> = Default::default();
self.embedder_stats = Some(embedder_stats.clone());
embedder_stats
},
}
}
/// Update itself with the content of the task and update the batch id in the task.
pub fn processing<'a>(&mut self, tasks: impl IntoIterator<Item = &'a mut Task>) {
for task in tasks.into_iter() {
@ -113,11 +104,14 @@ impl ProcessingBatch {
}
pub fn reason(&mut self, reason: BatchStopReason) {
println!("batch stopped: {:?}", reason);
self.reason = reason;
}
/// Must be called once the batch has finished processing.
pub fn finished(&mut self) {
println!("Batch finished: {}", self.uid);
self.details = DetailsView::default();
self.stats = BatchStats::default();
self.finished_at = Some(OffsetDateTime::now_utc());
@ -132,6 +126,8 @@ impl ProcessingBatch {
/// Update the timestamp of the tasks and the inner structure of this structure.
pub fn update(&mut self, task: &mut Task) {
println!("Updating task: {} in batch: {}", task.uid, self.uid);
// We must re-set this value in case we're dealing with a task that has been added between
// the `processing` and `finished` state
// We must re-set this value in case we're dealing with a task that has been added between
@ -156,13 +152,13 @@ impl ProcessingBatch {
}
pub fn to_batch(&self) -> Batch {
println!("Converting to batch: {:?}", self.embedder_stats);
println!("Converting to batch: {:?} {:?}", self.uid, self.embedder_stats);
Batch {
uid: self.uid,
progress: None,
details: self.details.clone(),
stats: self.stats.clone(),
embedder_stats: self.embedder_stats.as_ref().map(|s| BatchEmbeddingStats::from(s.as_ref())),
embedder_stats: self.embedder_stats.as_ref().into(),
started_at: self.started_at,
finished_at: self.finished_at,
enqueued_at: self.enqueued_at,