mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-09 13:04:30 +01:00
Progress, in the task queue
This commit is contained in:
parent
3658f57f93
commit
33b1f54b41
@ -22,7 +22,8 @@ use std::ffi::OsStr;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::io::BufWriter;
|
use std::io::BufWriter;
|
||||||
use std::sync::atomic::{self, AtomicU16, AtomicU32};
|
use std::sync::atomic::{self, AtomicU64};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use bumpalo::collections::CollectIn;
|
use bumpalo::collections::CollectIn;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@ -31,7 +32,6 @@ use meilisearch_types::error::Code;
|
|||||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
|
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
|
||||||
use meilisearch_types::milli::heed::CompactionOption;
|
use meilisearch_types::milli::heed::CompactionOption;
|
||||||
use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
|
|
||||||
use meilisearch_types::milli::update::new::indexer::{
|
use meilisearch_types::milli::update::new::indexer::{
|
||||||
self, retrieve_or_guess_primary_key, UpdateByFunction,
|
self, retrieve_or_guess_primary_key, UpdateByFunction,
|
||||||
};
|
};
|
||||||
@ -531,7 +531,7 @@ impl IndexScheduler {
|
|||||||
if let Some(task_id) = to_cancel.max() {
|
if let Some(task_id) = to_cancel.max() {
|
||||||
// We retrieve the tasks that were processing before this tasks cancelation started.
|
// We retrieve the tasks that were processing before this tasks cancelation started.
|
||||||
// We must *not* reset the processing tasks before calling this method.
|
// We must *not* reset the processing tasks before calling this method.
|
||||||
let ProcessingTasks { started_at, processing } =
|
let ProcessingTasks { started_at, processing, progress: _ } =
|
||||||
&*self.processing_tasks.read().unwrap();
|
&*self.processing_tasks.read().unwrap();
|
||||||
return Ok(Some(Batch::TaskCancelation {
|
return Ok(Some(Batch::TaskCancelation {
|
||||||
task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?,
|
task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?,
|
||||||
@ -1223,39 +1223,29 @@ impl IndexScheduler {
|
|||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
|
|
||||||
let last_finished_steps = AtomicU16::new(0);
|
let started_processing_at = std::time::Instant::now();
|
||||||
let last_finished_documents = AtomicU32::new(0);
|
let secs_since_started_processing_at = AtomicU64::new(0);
|
||||||
|
const PRINT_SECS_DELTA: u64 = 1;
|
||||||
|
|
||||||
let send_progress =
|
let processing_tasks = self.processing_tasks.clone();
|
||||||
|Progress { finished_steps, total_steps, step_name, finished_total_documents }| {
|
|
||||||
/*
|
|
||||||
let current = rayon::current_thread_index();
|
|
||||||
|
|
||||||
let last_finished_steps =
|
let must_stop_processing = self.must_stop_processing.clone();
|
||||||
last_finished_steps.fetch_max(finished_steps, atomic::Ordering::Relaxed);
|
|
||||||
|
|
||||||
if last_finished_steps > finished_steps {
|
let send_progress = |progress| {
|
||||||
return;
|
let now = std::time::Instant::now();
|
||||||
}
|
let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed);
|
||||||
|
let previous = started_processing_at + Duration::from_secs(elapsed);
|
||||||
|
let elapsed = now - previous;
|
||||||
|
|
||||||
if let Some((finished_documents, total_documents)) = finished_total_documents {
|
if elapsed.as_secs() < PRINT_SECS_DELTA {
|
||||||
if last_finished_steps < finished_steps {
|
return;
|
||||||
last_finished_documents.store(finished_documents, atomic::Ordering::Relaxed);
|
}
|
||||||
} else {
|
|
||||||
let last_finished_documents = last_finished_documents
|
secs_since_started_processing_at
|
||||||
.fetch_max(finished_documents, atomic::Ordering::Relaxed);
|
.store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed);
|
||||||
if last_finished_documents > finished_documents {
|
|
||||||
return;
|
processing_tasks.write().unwrap().update_progress(progress);
|
||||||
}
|
};
|
||||||
}
|
|
||||||
tracing::warn!("Progress from {current:?}: {step_name} ({finished_steps}/{total_steps}), document {finished_documents}/{total_documents}")
|
|
||||||
} else {
|
|
||||||
tracing::warn!(
|
|
||||||
"Progress from {current:?}: {step_name} ({finished_steps}/{total_steps})"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
};
|
|
||||||
|
|
||||||
match operation {
|
match operation {
|
||||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||||
@ -1286,8 +1276,6 @@ impl IndexScheduler {
|
|||||||
operations,
|
operations,
|
||||||
mut tasks,
|
mut tasks,
|
||||||
} => {
|
} => {
|
||||||
let started_processing_at = std::time::Instant::now();
|
|
||||||
let must_stop_processing = self.must_stop_processing.clone();
|
|
||||||
let indexer_config = self.index_mapper.indexer_config();
|
let indexer_config = self.index_mapper.indexer_config();
|
||||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||||
@ -1503,7 +1491,6 @@ impl IndexScheduler {
|
|||||||
let document_changes = indexer.into_changes(&primary_key)?;
|
let document_changes = indexer.into_changes(&primary_key)?;
|
||||||
let embedders = index.embedding_configs(index_wtxn)?;
|
let embedders = index.embedding_configs(index_wtxn)?;
|
||||||
let embedders = self.embedders(embedders)?;
|
let embedders = self.embedders(embedders)?;
|
||||||
let must_stop_processing = &self.must_stop_processing;
|
|
||||||
|
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
@ -1645,7 +1632,6 @@ impl IndexScheduler {
|
|||||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||||
let embedders = index.embedding_configs(index_wtxn)?;
|
let embedders = index.embedding_configs(index_wtxn)?;
|
||||||
let embedders = self.embedders(embedders)?;
|
let embedders = self.embedders(embedders)?;
|
||||||
let must_stop_processing = &self.must_stop_processing;
|
|
||||||
|
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
@ -1679,7 +1665,6 @@ impl IndexScheduler {
|
|||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
}
|
}
|
||||||
|
|
||||||
let must_stop_processing = self.must_stop_processing.clone();
|
|
||||||
builder.execute(
|
builder.execute(
|
||||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||||
|| must_stop_processing.get(),
|
|| must_stop_processing.get(),
|
||||||
|
@ -148,6 +148,7 @@ pub fn snapshot_task(task: &Task) -> String {
|
|||||||
enqueued_at: _,
|
enqueued_at: _,
|
||||||
started_at: _,
|
started_at: _,
|
||||||
finished_at: _,
|
finished_at: _,
|
||||||
|
progress: _,
|
||||||
error,
|
error,
|
||||||
canceled_by,
|
canceled_by,
|
||||||
details,
|
details,
|
||||||
|
@ -55,11 +55,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
|||||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||||
|
use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
use meilisearch_types::task_view::TaskView;
|
use meilisearch_types::task_view::TaskView;
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress};
|
||||||
use rayon::current_num_threads;
|
use rayon::current_num_threads;
|
||||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -161,12 +162,18 @@ struct ProcessingTasks {
|
|||||||
started_at: OffsetDateTime,
|
started_at: OffsetDateTime,
|
||||||
/// The list of tasks ids that are currently running.
|
/// The list of tasks ids that are currently running.
|
||||||
processing: RoaringBitmap,
|
processing: RoaringBitmap,
|
||||||
|
/// The progress on processing tasks
|
||||||
|
progress: Option<TaskProgress>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ProcessingTasks {
|
impl ProcessingTasks {
|
||||||
/// Creates an empty `ProcessingAt` struct.
|
/// Creates an empty `ProcessingAt` struct.
|
||||||
fn new() -> ProcessingTasks {
|
fn new() -> ProcessingTasks {
|
||||||
ProcessingTasks { started_at: OffsetDateTime::now_utc(), processing: RoaringBitmap::new() }
|
ProcessingTasks {
|
||||||
|
started_at: OffsetDateTime::now_utc(),
|
||||||
|
processing: RoaringBitmap::new(),
|
||||||
|
progress: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Stores the currently processing tasks, and the date time at which it started.
|
/// Stores the currently processing tasks, and the date time at which it started.
|
||||||
@ -175,8 +182,13 @@ impl ProcessingTasks {
|
|||||||
self.processing = processing;
|
self.processing = processing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_progress(&mut self, progress: Progress) {
|
||||||
|
self.progress.get_or_insert_with(TaskProgress::default).update(progress);
|
||||||
|
}
|
||||||
|
|
||||||
/// Set the processing tasks to an empty list
|
/// Set the processing tasks to an empty list
|
||||||
fn stop_processing(&mut self) -> RoaringBitmap {
|
fn stop_processing(&mut self) -> RoaringBitmap {
|
||||||
|
self.progress = None;
|
||||||
std::mem::take(&mut self.processing)
|
std::mem::take(&mut self.processing)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -956,7 +968,7 @@ impl IndexScheduler {
|
|||||||
tasks.into_iter().rev().take(query.limit.unwrap_or(u32::MAX) as usize),
|
tasks.into_iter().rev().take(query.limit.unwrap_or(u32::MAX) as usize),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let ProcessingTasks { started_at, processing, .. } =
|
let ProcessingTasks { started_at, processing, progress, .. } =
|
||||||
self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone();
|
self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone();
|
||||||
|
|
||||||
let ret = tasks.into_iter();
|
let ret = tasks.into_iter();
|
||||||
@ -966,7 +978,12 @@ impl IndexScheduler {
|
|||||||
Ok((
|
Ok((
|
||||||
ret.map(|task| {
|
ret.map(|task| {
|
||||||
if processing.contains(task.uid) {
|
if processing.contains(task.uid) {
|
||||||
Task { status: Status::Processing, started_at: Some(started_at), ..task }
|
Task {
|
||||||
|
status: Status::Processing,
|
||||||
|
progress: progress.clone(),
|
||||||
|
started_at: Some(started_at),
|
||||||
|
..task
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
task
|
task
|
||||||
}
|
}
|
||||||
@ -1008,6 +1025,7 @@ impl IndexScheduler {
|
|||||||
enqueued_at: OffsetDateTime::now_utc(),
|
enqueued_at: OffsetDateTime::now_utc(),
|
||||||
started_at: None,
|
started_at: None,
|
||||||
finished_at: None,
|
finished_at: None,
|
||||||
|
progress: None,
|
||||||
error: None,
|
error: None,
|
||||||
canceled_by: None,
|
canceled_by: None,
|
||||||
details: kind.default_details(),
|
details: kind.default_details(),
|
||||||
@ -1588,6 +1606,8 @@ impl<'a> Dump<'a> {
|
|||||||
enqueued_at: task.enqueued_at,
|
enqueued_at: task.enqueued_at,
|
||||||
started_at: task.started_at,
|
started_at: task.started_at,
|
||||||
finished_at: task.finished_at,
|
finished_at: task.finished_at,
|
||||||
|
/// FIXME: should we update dump to contain progress information? 🤔
|
||||||
|
progress: None,
|
||||||
error: task.error,
|
error: task.error,
|
||||||
canceled_by: task.canceled_by,
|
canceled_by: task.canceled_by,
|
||||||
details: task.details,
|
details: task.details,
|
||||||
|
@ -345,6 +345,8 @@ impl IndexScheduler {
|
|||||||
enqueued_at,
|
enqueued_at,
|
||||||
started_at,
|
started_at,
|
||||||
finished_at,
|
finished_at,
|
||||||
|
/// FIXME: assert something here? ask tamo 🤔
|
||||||
|
progress: _,
|
||||||
error: _,
|
error: _,
|
||||||
canceled_by,
|
canceled_by,
|
||||||
details,
|
details,
|
||||||
|
@ -4,7 +4,9 @@ use time::{Duration, OffsetDateTime};
|
|||||||
|
|
||||||
use crate::error::ResponseError;
|
use crate::error::ResponseError;
|
||||||
use crate::settings::{Settings, Unchecked};
|
use crate::settings::{Settings, Unchecked};
|
||||||
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
use crate::tasks::{
|
||||||
|
serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId, TaskProgress,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
@ -27,6 +29,8 @@ pub struct TaskView {
|
|||||||
pub started_at: Option<OffsetDateTime>,
|
pub started_at: Option<OffsetDateTime>,
|
||||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||||
pub finished_at: Option<OffsetDateTime>,
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub progress: Option<TaskProgress>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TaskView {
|
impl TaskView {
|
||||||
@ -43,6 +47,7 @@ impl TaskView {
|
|||||||
enqueued_at: task.enqueued_at,
|
enqueued_at: task.enqueued_at,
|
||||||
started_at: task.started_at,
|
started_at: task.started_at,
|
||||||
finished_at: task.finished_at,
|
finished_at: task.finished_at,
|
||||||
|
progress: task.progress.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ use std::fmt::{Display, Write};
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
|
use milli::update::new::indexer::document_changes::Progress;
|
||||||
use milli::update::IndexDocumentsMethod;
|
use milli::update::IndexDocumentsMethod;
|
||||||
use milli::Object;
|
use milli::Object;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -30,6 +31,8 @@ pub struct Task {
|
|||||||
#[serde(with = "time::serde::rfc3339::option")]
|
#[serde(with = "time::serde::rfc3339::option")]
|
||||||
pub finished_at: Option<OffsetDateTime>,
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
|
|
||||||
|
pub progress: Option<TaskProgress>,
|
||||||
|
|
||||||
pub error: Option<ResponseError>,
|
pub error: Option<ResponseError>,
|
||||||
pub canceled_by: Option<TaskId>,
|
pub canceled_by: Option<TaskId>,
|
||||||
pub details: Option<Details>,
|
pub details: Option<Details>,
|
||||||
@ -38,6 +41,59 @@ pub struct Task {
|
|||||||
pub kind: KindWithContent,
|
pub kind: KindWithContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct TaskProgress {
|
||||||
|
pub current_step: String,
|
||||||
|
pub finished_steps: u16,
|
||||||
|
pub total_steps: u16,
|
||||||
|
pub finished_documents: Option<u32>,
|
||||||
|
pub total_documents: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for TaskProgress {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TaskProgress {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
current_step: String::new(),
|
||||||
|
finished_steps: 0,
|
||||||
|
total_steps: 1,
|
||||||
|
finished_documents: None,
|
||||||
|
total_documents: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update(&mut self, progress: Progress) {
|
||||||
|
if self.current_step != progress.step_name {
|
||||||
|
self.current_step.clear();
|
||||||
|
self.current_step.push_str(progress.step_name);
|
||||||
|
}
|
||||||
|
self.total_steps = progress.total_steps;
|
||||||
|
if self.finished_steps > progress.finished_steps {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if self.finished_steps < progress.finished_steps {
|
||||||
|
self.finished_documents = None;
|
||||||
|
self.total_documents = None;
|
||||||
|
}
|
||||||
|
self.finished_steps = progress.finished_steps;
|
||||||
|
if let Some((finished_documents, total_documents)) = progress.finished_total_documents {
|
||||||
|
if let Some(task_finished_documents) = self.finished_documents {
|
||||||
|
if task_finished_documents > finished_documents {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.finished_documents = Some(finished_documents);
|
||||||
|
self.total_documents = Some(total_documents);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Task {
|
impl Task {
|
||||||
pub fn index_uid(&self) -> Option<&str> {
|
pub fn index_uid(&self) -> Option<&str> {
|
||||||
use KindWithContent::*;
|
use KindWithContent::*;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user