mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
Compute and store the database sizes
This commit is contained in:
parent
fd079c6757
commit
637bea0370
6 changed files with 77 additions and 24 deletions
|
@ -24,6 +24,7 @@ use meilisearch_types::error::ResponseError;
|
|||
use meilisearch_types::heed::{Env, WithoutTls};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use process_batch::ProcessBatchInfo;
|
||||
use rayon::current_num_threads;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use roaring::RoaringBitmap;
|
||||
|
@ -223,16 +224,16 @@ impl IndexScheduler {
|
|||
let mut stop_scheduler_forever = false;
|
||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
let mut canceled = RoaringBitmap::new();
|
||||
let mut congestion = None;
|
||||
let mut process_batch_info = ProcessBatchInfo::default();
|
||||
|
||||
match res {
|
||||
Ok((tasks, cong)) => {
|
||||
Ok((tasks, info)) => {
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
|
||||
|
||||
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||
progress.update_progress(task_progress_obj);
|
||||
congestion = cong;
|
||||
process_batch_info = info;
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
let mut canceled_by = None;
|
||||
|
@ -350,6 +351,9 @@ impl IndexScheduler {
|
|||
// We must re-add the canceled task so they're part of the same batch.
|
||||
ids |= canceled;
|
||||
|
||||
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
|
||||
process_batch_info;
|
||||
|
||||
processing_batch.stats.progress_trace =
|
||||
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
|
||||
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
|
||||
|
@ -359,6 +363,30 @@ impl IndexScheduler {
|
|||
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
|
||||
congestion_info
|
||||
});
|
||||
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
|
||||
.iter()
|
||||
.flat_map(|(dbname, pre_size)| {
|
||||
post_commit_dabases_sizes
|
||||
.get(dbname)
|
||||
.map(|post_size| {
|
||||
use byte_unit::{Byte, UnitType::Binary};
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
|
||||
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
|
||||
let diff_size = post_size.abs_diff(*pre_size) as u64;
|
||||
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
|
||||
let sign = match post_size.cmp(pre_size) {
|
||||
Equal => return None,
|
||||
Greater => "+",
|
||||
Less => "-",
|
||||
};
|
||||
|
||||
Some((dbname.to_string(), format!("{post:#.2} ({sign}{diff:#.2})").into()))
|
||||
})
|
||||
.into_iter()
|
||||
.flatten()
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(congestion) = congestion {
|
||||
tracing::debug!(
|
||||
|
|
|
@ -22,6 +22,16 @@ use crate::utils::{
|
|||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ProcessBatchInfo {
|
||||
/// The write channel congestion. None when unavailable: settings update.
|
||||
pub congestion: Option<ChannelCongestion>,
|
||||
/// The sizes of the different databases before starting the indexation.
|
||||
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
/// The sizes of the different databases after commiting the indexation.
|
||||
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
/// Apply the operation associated with the given batch.
|
||||
///
|
||||
|
@ -35,7 +45,7 @@ impl IndexScheduler {
|
|||
batch: Batch,
|
||||
current_batch: &mut ProcessingBatch,
|
||||
progress: Progress,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
|
||||
|
@ -76,7 +86,7 @@ impl IndexScheduler {
|
|||
|
||||
canceled_tasks.push(task);
|
||||
|
||||
Ok((canceled_tasks, None))
|
||||
Ok((canceled_tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::TaskDeletions(mut tasks) => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
|
@ -115,14 +125,14 @@ impl IndexScheduler {
|
|||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Ok((tasks, None))
|
||||
}
|
||||
Batch::SnapshotCreation(tasks) => {
|
||||
self.process_snapshot(progress, tasks).map(|tasks| (tasks, None))
|
||||
}
|
||||
Batch::Dump(task) => {
|
||||
self.process_dump_creation(progress, task).map(|tasks| (tasks, None))
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::SnapshotCreation(tasks) => self
|
||||
.process_snapshot(progress, tasks)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::Dump(task) => self
|
||||
.process_dump_creation(progress, task)
|
||||
.map(|tasks| (tasks, ProcessBatchInfo::default())),
|
||||
Batch::IndexOperation { op, must_create_index } => {
|
||||
let index_uid = op.index_uid().to_string();
|
||||
let index = if must_create_index {
|
||||
|
@ -139,6 +149,7 @@ impl IndexScheduler {
|
|||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
|
||||
|
||||
|
@ -153,12 +164,14 @@ impl IndexScheduler {
|
|||
// stats of the index. Since the tasks have already been processed and
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let mut post_commit_dabases_sizes = None;
|
||||
let res = || -> Result<()> {
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
|
||||
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
@ -171,7 +184,16 @@ impl IndexScheduler {
|
|||
),
|
||||
}
|
||||
|
||||
Ok((tasks, congestion))
|
||||
let info = ProcessBatchInfo {
|
||||
congestion,
|
||||
// In case we fail to the get post-commit sizes we decide
|
||||
// that nothing changed and use the pre-commit sizes.
|
||||
post_commit_dabases_sizes: post_commit_dabases_sizes
|
||||
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
|
||||
pre_commit_dabases_sizes,
|
||||
};
|
||||
|
||||
Ok((tasks, info))
|
||||
}
|
||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
||||
|
@ -239,7 +261,7 @@ impl IndexScheduler {
|
|||
),
|
||||
}
|
||||
|
||||
Ok((vec![task], None))
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
||||
|
@ -273,7 +295,9 @@ impl IndexScheduler {
|
|||
};
|
||||
}
|
||||
|
||||
Ok((tasks, None))
|
||||
// Here we could also show that all the internal database sizes goes to 0
|
||||
// but it would mean opening the index and that's costly.
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::IndexSwap { mut task } => {
|
||||
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
||||
|
@ -321,7 +345,7 @@ impl IndexScheduler {
|
|||
}
|
||||
wtxn.commit()?;
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], None))
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
|
@ -351,7 +375,7 @@ impl IndexScheduler {
|
|||
task.error = None;
|
||||
}
|
||||
|
||||
Ok((tasks, None))
|
||||
Ok((tasks, ProcessBatchInfo::default()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue