Move embedder stats out of progress

This commit is contained in:
Mubelotix 2025-06-23 15:24:14 +02:00
parent 4cadc8113b
commit 4925b30196
No known key found for this signature in database
GPG key ID: 89F391DBCC8CE7F0
30 changed files with 255 additions and 69 deletions

View file

@ -1,10 +1,11 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use std::panic::{catch_unwind, AssertUnwindSafe};
use std::sync::atomic::Ordering;
use std::sync::Arc;
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::progress::{EmbedderStats, Progress, VariableNameStep};
use meilisearch_types::milli::{self, ChannelCongestion};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
@ -163,7 +164,7 @@ impl IndexScheduler {
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
self.apply_index_operation(&mut index_wtxn, &index, op, &progress, current_batch.clone_embedder_stats())?;
{
progress.update_progress(FinalizingIndexStep::Committing);
@ -238,11 +239,21 @@ impl IndexScheduler {
);
builder.set_primary_key(primary_key);
let must_stop_processing = self.scheduler.must_stop_processing.clone();
let embedder_stats = match current_batch.embedder_stats {
Some(ref stats) => stats.clone(),
None => {
let embedder_stats: Arc<EmbedderStats> = Default::default();
current_batch.embedder_stats = Some(embedder_stats.clone());
embedder_stats
},
};
builder
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
Some(progress.embedder_stats),
embedder_stats,
)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
index_wtxn.commit()?;

View file

@ -4,7 +4,7 @@ use bumpalo::collections::CollectIn;
use bumpalo::Bump;
use meilisearch_types::heed::RwTxn;
use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::DocumentAdditionResult;
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
@ -26,7 +26,7 @@ impl IndexScheduler {
/// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index, progress),
skip(self, index_wtxn, index, progress, embedder_stats),
target = "indexing::scheduler"
)]
pub(crate) fn apply_index_operation<'i>(
@ -35,6 +35,7 @@ impl IndexScheduler {
index: &'i Index,
operation: IndexOperation,
progress: &Progress,
embedder_stats: Arc<EmbedderStats>,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now();
@ -179,6 +180,7 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
progress,
embedder_stats,
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
);
@ -290,6 +292,7 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
progress,
embedder_stats,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
@ -438,6 +441,7 @@ impl IndexScheduler {
embedders,
&|| must_stop_processing.get(),
progress,
embedder_stats,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
@ -474,7 +478,7 @@ impl IndexScheduler {
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
Some(Arc::clone(&progress.embedder_stats))
embedder_stats,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
@ -494,6 +498,7 @@ impl IndexScheduler {
tasks: cleared_tasks,
},
progress,
embedder_stats.clone(),
)?;
let (settings_tasks, _congestion) = self.apply_index_operation(
@ -501,6 +506,7 @@ impl IndexScheduler {
index,
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
progress,
embedder_stats,
)?;
let mut tasks = settings_tasks;