mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
Fix the test and simplify types
This commit is contained in:
parent
4925b30196
commit
2f82d94502
9 changed files with 87 additions and 51 deletions
|
@ -174,7 +174,7 @@ impl BatchQueue {
|
|||
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
|
||||
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
|
||||
|
||||
println!("Saving batch: {}", batch.embedder_stats.is_some());
|
||||
println!("Saving batch: {:?}", batch.embedder_stats);
|
||||
|
||||
self.all_batches.put(
|
||||
wtxn,
|
||||
|
@ -184,7 +184,7 @@ impl BatchQueue {
|
|||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
embedder_stats: batch.embedder_stats.as_ref().map(|s| BatchEmbeddingStats::from(s.as_ref())),
|
||||
embedder_stats: batch.embedder_stats.as_ref().into(),
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
|
|
|
@ -437,8 +437,10 @@ impl IndexScheduler {
|
|||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
println!("create next batch");
|
||||
let batch_id = self.queue.batches.next_batch_id(rtxn)?;
|
||||
let mut current_batch = ProcessingBatch::new(batch_id);
|
||||
println!("over");
|
||||
|
||||
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
|
||||
let count_total_enqueued = enqueued.len();
|
||||
|
@ -454,6 +456,7 @@ impl IndexScheduler {
|
|||
kind: Kind::TaskCancelation,
|
||||
id: task_id,
|
||||
});
|
||||
println!("task cancelled");
|
||||
return Ok(Some((Batch::TaskCancelation { task }, current_batch)));
|
||||
}
|
||||
|
||||
|
@ -524,7 +527,7 @@ impl IndexScheduler {
|
|||
}
|
||||
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { println!("return"); return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
|
||||
|
@ -602,6 +605,7 @@ impl IndexScheduler {
|
|||
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
|
||||
{
|
||||
current_batch.reason(autobatch_stop_reason.unwrap_or(stop_reason));
|
||||
println!("autobatch");
|
||||
return Ok(self
|
||||
.create_next_batch_index(
|
||||
rtxn,
|
||||
|
@ -615,6 +619,7 @@ impl IndexScheduler {
|
|||
|
||||
// If we found no tasks then we were notified for something that got autobatched
|
||||
// somehow and there is nothing to do.
|
||||
println!("nothing to do");
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -164,7 +164,7 @@ impl IndexScheduler {
|
|||
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress, current_batch.clone_embedder_stats())?;
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress, current_batch.embedder_stats.clone())?;
|
||||
|
||||
{
|
||||
progress.update_progress(FinalizingIndexStep::Committing);
|
||||
|
@ -240,20 +240,11 @@ impl IndexScheduler {
|
|||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
|
||||
let embedder_stats = match current_batch.embedder_stats {
|
||||
Some(ref stats) => stats.clone(),
|
||||
None => {
|
||||
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
||||
current_batch.embedder_stats = Some(embedder_stats.clone());
|
||||
embedder_stats
|
||||
},
|
||||
};
|
||||
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
embedder_stats,
|
||||
current_batch.embedder_stats.clone(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
|
|
|
@ -29,7 +29,7 @@ pub struct ProcessingBatch {
|
|||
pub uid: BatchId,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
pub embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
pub embedder_stats: Arc<EmbedderStats>,
|
||||
|
||||
pub statuses: HashSet<Status>,
|
||||
pub kinds: HashSet<Kind>,
|
||||
|
@ -47,11 +47,13 @@ impl ProcessingBatch {
|
|||
let mut statuses = HashSet::default();
|
||||
statuses.insert(Status::Processing);
|
||||
|
||||
println!("Processing batch created: {}", uid);
|
||||
|
||||
Self {
|
||||
uid,
|
||||
details: DetailsView::default(),
|
||||
stats: BatchStats::default(),
|
||||
embedder_stats: None,
|
||||
embedder_stats: Default::default(),
|
||||
|
||||
statuses,
|
||||
kinds: HashSet::default(),
|
||||
|
@ -64,17 +66,6 @@ impl ProcessingBatch {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn clone_embedder_stats(&mut self) -> Arc<EmbedderStats> {
|
||||
match self.embedder_stats {
|
||||
Some(ref stats) => stats.clone(),
|
||||
None => {
|
||||
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
||||
self.embedder_stats = Some(embedder_stats.clone());
|
||||
embedder_stats
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Update itself with the content of the task and update the batch id in the task.
|
||||
pub fn processing<'a>(&mut self, tasks: impl IntoIterator<Item = &'a mut Task>) {
|
||||
for task in tasks.into_iter() {
|
||||
|
@ -113,11 +104,14 @@ impl ProcessingBatch {
|
|||
}
|
||||
|
||||
pub fn reason(&mut self, reason: BatchStopReason) {
|
||||
println!("batch stopped: {:?}", reason);
|
||||
self.reason = reason;
|
||||
}
|
||||
|
||||
/// Must be called once the batch has finished processing.
|
||||
pub fn finished(&mut self) {
|
||||
println!("Batch finished: {}", self.uid);
|
||||
|
||||
self.details = DetailsView::default();
|
||||
self.stats = BatchStats::default();
|
||||
self.finished_at = Some(OffsetDateTime::now_utc());
|
||||
|
@ -132,6 +126,8 @@ impl ProcessingBatch {
|
|||
|
||||
/// Update the timestamp of the tasks and the inner structure of this structure.
|
||||
pub fn update(&mut self, task: &mut Task) {
|
||||
println!("Updating task: {} in batch: {}", task.uid, self.uid);
|
||||
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
// the `processing` and `finished` state
|
||||
// We must re-set this value in case we're dealing with a task that has been added between
|
||||
|
@ -156,13 +152,13 @@ impl ProcessingBatch {
|
|||
}
|
||||
|
||||
pub fn to_batch(&self) -> Batch {
|
||||
println!("Converting to batch: {:?}", self.embedder_stats);
|
||||
println!("Converting to batch: {:?} {:?}", self.uid, self.embedder_stats);
|
||||
Batch {
|
||||
uid: self.uid,
|
||||
progress: None,
|
||||
details: self.details.clone(),
|
||||
stats: self.stats.clone(),
|
||||
embedder_stats: self.embedder_stats.as_ref().map(|s| BatchEmbeddingStats::from(s.as_ref())),
|
||||
embedder_stats: self.embedder_stats.as_ref().into(),
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
enqueued_at: self.enqueued_at,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue