diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs deleted file mode 100644 index a8b67436e..000000000 --- a/crates/index-scheduler/src/batch.rs +++ /dev/null @@ -1,1950 +0,0 @@ -/*! -This module handles the creation and processing of batch operations. - -A batch is a combination of multiple tasks that can be processed at once. -Executing a batch operation should always be functionally equivalent to -executing each of its tasks' operations individually and in order. - -For example, if the user sends two tasks: -1. import documents X -2. import documents Y - -We can combine the two tasks in a single batch: -1. import documents X and Y - -Processing this batch is functionally equivalent to processing the two -tasks individually, but should be much faster since we are only performing -one indexing operation. -*/ - -use std::collections::{BTreeSet, HashMap, HashSet}; -use std::ffi::OsStr; -use std::fmt; -use std::fs::{self, File}; -use std::io::BufWriter; -use std::sync::atomic::Ordering; - -use bumpalo::collections::CollectIn; -use bumpalo::Bump; -use dump::IndexMetadata; -use meilisearch_types::batches::BatchId; -use meilisearch_types::heed::{RoTxn, RwTxn}; -use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; -use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; -use meilisearch_types::milli::heed::CompactionOption; -use meilisearch_types::milli::progress::Progress; -use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; -use meilisearch_types::milli::update::{ - DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings, -}; -use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; -use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; -use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; -use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; -use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; -use roaring::RoaringBitmap; -use time::macros::format_description; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::autobatcher::{self, BatchKind}; -use crate::processing::{ - AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress, - DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress, - DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress, - SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, - UpdateIndexProgress, VariableNameStep, -}; -use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; -use crate::{Error, IndexScheduler, Result, TaskId}; - -/// Represents a combination of tasks that can all be processed at the same time. -/// -/// A batch contains the set of tasks that it represents (accessible through -/// [`self.ids()`](Batch::ids)), as well as additional information on how to -/// be processed. -#[derive(Debug)] -pub(crate) enum Batch { - TaskCancelation { - /// The task cancelation itself. - task: Task, - }, - TaskDeletions(Vec), - SnapshotCreation(Vec), - Dump(Task), - IndexOperation { - op: IndexOperation, - must_create_index: bool, - }, - IndexCreation { - index_uid: String, - primary_key: Option, - task: Task, - }, - IndexUpdate { - index_uid: String, - primary_key: Option, - task: Task, - }, - IndexDeletion { - index_uid: String, - tasks: Vec, - index_has_been_created: bool, - }, - IndexSwap { - task: Task, - }, -} - -#[derive(Debug)] -pub(crate) enum DocumentOperation { - Add(Uuid), - Delete(Vec), -} - -/// A [batch](Batch) that combines multiple tasks operating on an index. -#[derive(Debug)] -pub(crate) enum IndexOperation { - DocumentOperation { - index_uid: String, - primary_key: Option, - method: IndexDocumentsMethod, - operations: Vec, - tasks: Vec, - }, - DocumentEdition { - index_uid: String, - task: Task, - }, - DocumentDeletion { - index_uid: String, - tasks: Vec, - }, - DocumentClear { - index_uid: String, - tasks: Vec, - }, - Settings { - index_uid: String, - // The boolean indicates if it's a settings deletion or creation. - settings: Vec<(bool, Settings)>, - tasks: Vec, - }, - DocumentClearAndSetting { - index_uid: String, - cleared_tasks: Vec, - - // The boolean indicates if it's a settings deletion or creation. - settings: Vec<(bool, Settings)>, - settings_tasks: Vec, - }, -} - -impl Batch { - /// Return the task ids associated with this batch. - pub fn ids(&self) -> RoaringBitmap { - match self { - Batch::TaskCancelation { task, .. } - | Batch::Dump(task) - | Batch::IndexCreation { task, .. } - | Batch::IndexUpdate { task, .. } => { - RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() - } - Batch::SnapshotCreation(tasks) - | Batch::TaskDeletions(tasks) - | Batch::IndexDeletion { tasks, .. } => { - RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) - } - Batch::IndexOperation { op, .. } => match op { - IndexOperation::DocumentOperation { tasks, .. } - | IndexOperation::Settings { tasks, .. } - | IndexOperation::DocumentDeletion { tasks, .. } - | IndexOperation::DocumentClear { tasks, .. } => { - RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) - } - IndexOperation::DocumentEdition { task, .. } => { - RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() - } - IndexOperation::DocumentClearAndSetting { - cleared_tasks: tasks, - settings_tasks: other, - .. - } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)), - }, - Batch::IndexSwap { task } => { - RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() - } - } - } - - /// Return the index UID associated with this batch - pub fn index_uid(&self) -> Option<&str> { - use Batch::*; - match self { - TaskCancelation { .. } - | TaskDeletions(_) - | SnapshotCreation(_) - | Dump(_) - | IndexSwap { .. } => None, - IndexOperation { op, .. } => Some(op.index_uid()), - IndexCreation { index_uid, .. } - | IndexUpdate { index_uid, .. } - | IndexDeletion { index_uid, .. } => Some(index_uid), - } - } -} - -impl fmt::Display for Batch { - /// A text used when we debug the profiling reports. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let index_uid = self.index_uid(); - let tasks = self.ids(); - match self { - Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, - Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?, - Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, - Batch::Dump(_) => f.write_str("Dump")?, - Batch::IndexOperation { op, .. } => write!(f, "{op}")?, - Batch::IndexCreation { .. } => f.write_str("IndexCreation")?, - Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, - Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, - Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, - }; - match index_uid { - Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), - None => f.write_fmt(format_args!(" from tasks: {tasks:?}")), - } - } -} - -impl IndexOperation { - pub fn index_uid(&self) -> &str { - match self { - IndexOperation::DocumentOperation { index_uid, .. } - | IndexOperation::DocumentEdition { index_uid, .. } - | IndexOperation::DocumentDeletion { index_uid, .. } - | IndexOperation::DocumentClear { index_uid, .. } - | IndexOperation::Settings { index_uid, .. } - | IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid, - } - } -} - -impl fmt::Display for IndexOperation { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - IndexOperation::DocumentOperation { .. } => { - f.write_str("IndexOperation::DocumentOperation") - } - IndexOperation::DocumentEdition { .. } => { - f.write_str("IndexOperation::DocumentEdition") - } - IndexOperation::DocumentDeletion { .. } => { - f.write_str("IndexOperation::DocumentDeletion") - } - IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), - IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), - IndexOperation::DocumentClearAndSetting { .. } => { - f.write_str("IndexOperation::DocumentClearAndSetting") - } - } - } -} - -impl IndexScheduler { - /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. - /// - /// ## Arguments - /// - `rtxn`: read transaction - /// - `index_uid`: name of the index affected by the operations of the autobatch - /// - `batch`: the result of the autobatcher - pub(crate) fn create_next_batch_index( - &self, - rtxn: &RoTxn, - index_uid: String, - batch: BatchKind, - current_batch: &mut ProcessingBatch, - must_create_index: bool, - ) -> Result> { - match batch { - BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentClear { - tasks: self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - ids, - )?, - index_uid, - }, - must_create_index, - })), - BatchKind::DocumentEdition { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - match &task.kind { - KindWithContent::DocumentEdition { index_uid, .. } => { - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentEdition { - index_uid: index_uid.clone(), - task, - }, - must_create_index: false, - })) - } - _ => unreachable!(), - } - } - BatchKind::DocumentOperation { method, operation_ids, .. } => { - let tasks = self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - operation_ids, - )?; - let primary_key = tasks - .iter() - .find_map(|task| match task.kind { - KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => { - // we want to stop on the first document addition - Some(primary_key.clone()) - } - KindWithContent::DocumentDeletion { .. } => None, - _ => unreachable!(), - }) - .flatten(); - - let mut operations = Vec::new(); - - for task in tasks.iter() { - match task.kind { - KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => { - operations.push(DocumentOperation::Add(content_file)); - } - KindWithContent::DocumentDeletion { ref documents_ids, .. } => { - operations.push(DocumentOperation::Delete(documents_ids.clone())); - } - _ => unreachable!(), - } - } - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentOperation { - index_uid, - primary_key, - method, - operations, - tasks, - }, - must_create_index, - })) - } - BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => { - let tasks = self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - deletion_ids, - )?; - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentDeletion { index_uid, tasks }, - must_create_index, - })) - } - BatchKind::Settings { settings_ids, .. } => { - let tasks = self.get_existing_tasks_for_processing_batch( - rtxn, - current_batch, - settings_ids, - )?; - - let mut settings = Vec::new(); - for task in &tasks { - match task.kind { - KindWithContent::SettingsUpdate { - ref new_settings, is_deletion, .. - } => settings.push((is_deletion, *new_settings.clone())), - _ => unreachable!(), - } - } - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::Settings { index_uid, settings, tasks }, - must_create_index, - })) - } - BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => { - let (index_uid, settings, settings_tasks) = match self - .create_next_batch_index( - rtxn, - index_uid, - BatchKind::Settings { settings_ids, allow_index_creation }, - current_batch, - must_create_index, - )? - .unwrap() - { - Batch::IndexOperation { - op: IndexOperation::Settings { index_uid, settings, tasks, .. }, - .. - } => (index_uid, settings, tasks), - _ => unreachable!(), - }; - let (index_uid, cleared_tasks) = match self - .create_next_batch_index( - rtxn, - index_uid, - BatchKind::DocumentClear { ids: other }, - current_batch, - must_create_index, - )? - .unwrap() - { - Batch::IndexOperation { - op: IndexOperation::DocumentClear { index_uid, tasks }, - .. - } => (index_uid, tasks), - _ => unreachable!(), - }; - - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentClearAndSetting { - index_uid, - cleared_tasks, - settings, - settings_tasks, - }, - must_create_index, - })) - } - BatchKind::IndexCreation { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - let (index_uid, primary_key) = match &task.kind { - KindWithContent::IndexCreation { index_uid, primary_key } => { - (index_uid.clone(), primary_key.clone()) - } - _ => unreachable!(), - }; - Ok(Some(Batch::IndexCreation { index_uid, primary_key, task })) - } - BatchKind::IndexUpdate { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - let primary_key = match &task.kind { - KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(), - _ => unreachable!(), - }; - Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task })) - } - BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion { - index_uid, - index_has_been_created: must_create_index, - tasks: self.get_existing_tasks_for_processing_batch(rtxn, current_batch, ids)?, - })), - BatchKind::IndexSwap { id } => { - let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - Ok(Some(Batch::IndexSwap { task })) - } - } - } - - /// Create the next batch to be processed; - /// 1. We get the *last* task to cancel. - /// 2. We get the *next* task to delete. - /// 3. We get the *next* snapshot to process. - /// 4. We get the *next* dump to process. - /// 5. We get the *next* tasks to process for a specific index. - #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] - pub(crate) fn create_next_batch( - &self, - rtxn: &RoTxn, - ) -> Result> { - #[cfg(test)] - self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; - - let batch_id = self.next_batch_id(rtxn)?; - let mut current_batch = ProcessingBatch::new(batch_id); - - let enqueued = &self.get_status(rtxn, Status::Enqueued)?; - let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; - - // 1. we get the last task to cancel. - if let Some(task_id) = to_cancel.max() { - let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - return Ok(Some((Batch::TaskCancelation { task }, current_batch))); - } - - // 2. we get the next task to delete - let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; - if !to_delete.is_empty() { - let mut tasks = self.get_existing_tasks(rtxn, to_delete)?; - current_batch.processing(&mut tasks); - return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); - } - - // 3. we batch the snapshot. - let to_snapshot = self.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; - if !to_snapshot.is_empty() { - let mut tasks = self.get_existing_tasks(rtxn, to_snapshot)?; - current_batch.processing(&mut tasks); - return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); - } - - // 4. we batch the dumps. - let to_dump = self.get_kind(rtxn, Kind::DumpCreation)? & enqueued; - if let Some(to_dump) = to_dump.min() { - let mut task = self.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?; - current_batch.processing(Some(&mut task)); - return Ok(Some((Batch::Dump(task), current_batch))); - } - - // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. - let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; - let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - - // If the task is not associated with any index, verify that it is an index swap and - // create the batch directly. Otherwise, get the index name associated with the task - // and use the autobatcher to batch the enqueued tasks associated with it - - let index_name = if let Some(&index_name) = task.indexes().first() { - index_name - } else { - assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); - current_batch.processing(Some(&mut task)); - return Ok(Some((Batch::IndexSwap { task }, current_batch))); - }; - - let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; - let mut primary_key = None; - if index_already_exists { - let index = self.index_mapper.index(rtxn, index_name)?; - let rtxn = index.read_txn()?; - primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); - } - - let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued; - - // If autobatching is disabled we only take one task at a time. - // Otherwise, we take only a maximum of tasks to create batches. - let tasks_limit = - if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 }; - - let enqueued = index_tasks - .into_iter() - .take(tasks_limit) - .map(|task_id| { - self.get_task(rtxn, task_id) - .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) - .map(|task| (task.uid, task.kind)) - }) - .collect::>>()?; - - if let Some((batchkind, create_index)) = - autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref()) - { - return Ok(self - .create_next_batch_index( - rtxn, - index_name.to_string(), - batchkind, - &mut current_batch, - create_index, - )? - .map(|batch| (batch, current_batch))); - } - - // If we found no tasks then we were notified for something that got autobatched - // somehow and there is nothing to do. - Ok(None) - } - - /// Apply the operation associated with the given batch. - /// - /// ## Return - /// The list of tasks that were processed. The metadata of each task in the returned - /// list is updated accordingly, with the exception of the its date fields - /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). - #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))] - pub(crate) fn process_batch( - &self, - batch: Batch, - current_batch: &mut ProcessingBatch, - progress: Progress, - ) -> Result> { - #[cfg(test)] - { - self.maybe_fail(crate::tests::FailureLocation::InsideProcessBatch)?; - self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?; - self.breakpoint(crate::Breakpoint::InsideProcessBatch); - } - - match batch { - Batch::TaskCancelation { mut task } => { - // 1. Retrieve the tasks that matched the query at enqueue-time. - let matched_tasks = - if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind { - tasks - } else { - unreachable!() - }; - - let rtxn = self.env.read_txn()?; - let mut canceled_tasks = self.cancel_matched_tasks( - &rtxn, - task.uid, - current_batch, - matched_tasks, - &progress, - )?; - - task.status = Status::Succeeded; - match &mut task.details { - Some(Details::TaskCancelation { - matched_tasks: _, - canceled_tasks: canceled_tasks_details, - original_filter: _, - }) => { - *canceled_tasks_details = Some(canceled_tasks.len() as u64); - } - _ => unreachable!(), - } - - canceled_tasks.push(task); - - Ok(canceled_tasks) - } - Batch::TaskDeletions(mut tasks) => { - // 1. Retrieve the tasks that matched the query at enqueue-time. - let mut matched_tasks = RoaringBitmap::new(); - - for task in tasks.iter() { - if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { - matched_tasks |= tasks; - } else { - unreachable!() - } - } - - let mut wtxn = self.env.write_txn()?; - let mut deleted_tasks = - self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?; - wtxn.commit()?; - - for task in tasks.iter_mut() { - task.status = Status::Succeeded; - let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else { - unreachable!() - }; - - let deleted_tasks_count = deleted_tasks.intersection_len(tasks); - deleted_tasks -= tasks; - - match &mut task.details { - Some(Details::TaskDeletion { - matched_tasks: _, - deleted_tasks, - original_filter: _, - }) => { - *deleted_tasks = Some(deleted_tasks_count); - } - _ => unreachable!(), - } - } - Ok(tasks) - } - Batch::SnapshotCreation(mut tasks) => { - progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation); - - fs::create_dir_all(&self.snapshots_path)?; - let temp_snapshot_dir = tempfile::tempdir()?; - - // 1. Snapshot the version file. - let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME); - fs::copy(&self.version_file_path, dst)?; - - // 2. Snapshot the index-scheduler LMDB env - // - // When we call copy_to_file, LMDB opens a read transaction by itself, - // we can't provide our own. It is an issue as we would like to know - // the update files to copy but new ones can be enqueued between the copy - // of the env and the new transaction we open to retrieve the enqueued tasks. - // So we prefer opening a new transaction after copying the env and copy more - // update files than not enough. - // - // Note that there cannot be any update files deleted between those - // two read operations as the task processing is synchronous. - - // 2.1 First copy the LMDB env of the index-scheduler - progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); - let dst = temp_snapshot_dir.path().join("tasks"); - fs::create_dir_all(&dst)?; - self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; - - // 2.2 Create a read transaction on the index-scheduler - let rtxn = self.env.read_txn()?; - - // 2.3 Create the update files directory - let update_files_dir = temp_snapshot_dir.path().join("update_files"); - fs::create_dir_all(&update_files_dir)?; - - // 2.4 Only copy the update files of the enqueued tasks - progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles); - let enqueued = self.get_status(&rtxn, Status::Enqueued)?; - let (atomic, update_file_progress) = - AtomicUpdateFileStep::new(enqueued.len() as u32); - progress.update_progress(update_file_progress); - for task_id in enqueued { - let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - if let Some(content_uuid) = task.content_uuid() { - let src = self.file_store.get_update_path(content_uuid); - let dst = update_files_dir.join(content_uuid.to_string()); - fs::copy(src, dst)?; - } - atomic.fetch_add(1, Ordering::Relaxed); - } - - // 3. Snapshot every indexes - progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes); - let index_mapping = self.index_mapper.index_mapping; - let nb_indexes = index_mapping.len(&rtxn)? as u32; - - for (i, result) in index_mapping.iter(&rtxn)?.enumerate() { - let (name, uuid) = result?; - progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes)); - let index = self.index_mapper.index(&rtxn, name)?; - let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); - fs::create_dir_all(&dst)?; - index - .copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) - .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; - } - - drop(rtxn); - - // 4. Snapshot the auth LMDB env - progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); - let dst = temp_snapshot_dir.path().join("auth"); - fs::create_dir_all(&dst)?; - // TODO We can't use the open_auth_store_env function here but we should - let auth = unsafe { - milli::heed::EnvOpenOptions::new() - .map_size(1024 * 1024 * 1024) // 1 GiB - .max_dbs(2) - .open(&self.auth_path) - }?; - auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; - - // 5. Copy and tarball the flat snapshot - progress.update_progress(SnapshotCreationProgress::CreateTheTarball); - // 5.1 Find the original name of the database - // TODO find a better way to get this path - let mut base_path = self.env.path().to_owned(); - base_path.pop(); - let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms"); - - // 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension - let snapshot_path = self.snapshots_path.join(format!("{}.snapshot", db_name)); - let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.snapshots_path)?; - compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; - let file = temp_snapshot_file.persist(snapshot_path)?; - - // 5.3 Change the permission to make the snapshot readonly - let mut permissions = file.metadata()?.permissions(); - permissions.set_readonly(true); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - #[allow(clippy::non_octal_unix_permissions)] - // rwxrwxrwx - permissions.set_mode(0b100100100); - } - - file.set_permissions(permissions)?; - - for task in &mut tasks { - task.status = Status::Succeeded; - } - - Ok(tasks) - } - Batch::Dump(mut task) => { - progress.update_progress(DumpCreationProgress::StartTheDumpCreation); - let started_at = OffsetDateTime::now_utc(); - let (keys, instance_uid) = - if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { - (keys, instance_uid) - } else { - unreachable!(); - }; - let dump = dump::DumpWriter::new(*instance_uid)?; - - // 1. dump the keys - progress.update_progress(DumpCreationProgress::DumpTheApiKeys); - let mut dump_keys = dump.create_keys()?; - for key in keys { - dump_keys.push_key(key)?; - } - dump_keys.flush()?; - - let rtxn = self.env.read_txn()?; - - // 2. dump the tasks - progress.update_progress(DumpCreationProgress::DumpTheTasks); - let mut dump_tasks = dump.create_tasks_queue()?; - - let (atomic, update_task_progress) = - AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32); - progress.update_progress(update_task_progress); - - for ret in self.all_tasks.iter(&rtxn)? { - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - - let (_, mut t) = ret?; - let status = t.status; - let content_file = t.content_uuid(); - - // In the case we're dumping ourselves we want to be marked as finished - // to not loop over ourselves indefinitely. - if t.uid == task.uid { - let finished_at = OffsetDateTime::now_utc(); - - // We're going to fake the date because we don't know if everything is going to go well. - // But we need to dump the task as finished and successful. - // If something fail everything will be set appropriately in the end. - t.status = Status::Succeeded; - t.started_at = Some(started_at); - t.finished_at = Some(finished_at); - } - let mut dump_content_file = dump_tasks.push_task(&t.into())?; - - // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. - if let Some(content_file) = content_file { - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - if status == Status::Enqueued { - let content_file = self.file_store.get_update(content_file)?; - - let reader = DocumentsBatchReader::from_reader(content_file) - .map_err(|e| Error::from_milli(e.into(), None))?; - - let (mut cursor, documents_batch_index) = - reader.into_cursor_and_fields_index(); - - while let Some(doc) = cursor - .next_document() - .map_err(|e| Error::from_milli(e.into(), None))? - { - dump_content_file.push_document( - &obkv_to_object(doc, &documents_batch_index) - .map_err(|e| Error::from_milli(e, None))?, - )?; - } - dump_content_file.flush()?; - } - } - atomic.fetch_add(1, Ordering::Relaxed); - } - dump_tasks.flush()?; - - // 3. Dump the indexes - progress.update_progress(DumpCreationProgress::DumpTheIndexes); - let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; - let mut count = 0; - self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { - progress.update_progress(VariableNameStep::new( - uid.to_string(), - count, - nb_indexes, - )); - count += 1; - - let rtxn = index.read_txn()?; - let metadata = IndexMetadata { - uid: uid.to_owned(), - primary_key: index.primary_key(&rtxn)?.map(String::from), - created_at: index - .created_at(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, - updated_at: index - .updated_at(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, - }; - let mut index_dumper = dump.create_index(uid, &metadata)?; - - let fields_ids_map = index.fields_ids_map(&rtxn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - let embedding_configs = index - .embedding_configs(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - let nb_documents = index - .number_of_documents(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? - as u32; - let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); - progress.update_progress(update_document_progress); - let documents = index - .all_documents(&rtxn) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - // 3.1. Dump the documents - for ret in documents { - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - - let (id, doc) = - ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - let mut document = - milli::obkv_to_json(&all_fields, &fields_ids_map, doc) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - 'inject_vectors: { - let embeddings = index - .embeddings(&rtxn, id) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - - if embeddings.is_empty() { - break 'inject_vectors; - } - - let vectors = document - .entry(RESERVED_VECTORS_FIELD_NAME.to_owned()) - .or_insert(serde_json::Value::Object(Default::default())); - - let serde_json::Value::Object(vectors) = vectors else { - let user_err = milli::Error::UserError( - milli::UserError::InvalidVectorsMapType { - document_id: { - if let Ok(Some(Ok(index))) = index - .external_id_of(&rtxn, std::iter::once(id)) - .map(|it| it.into_iter().next()) - { - index - } else { - format!("internal docid={id}") - } - }, - value: vectors.clone(), - }, - ); - - return Err(Error::from_milli(user_err, Some(uid.to_string()))); - }; - - for (embedder_name, embeddings) in embeddings { - let user_provided = embedding_configs - .iter() - .find(|conf| conf.name == embedder_name) - .is_some_and(|conf| conf.user_provided.contains(id)); - - let embeddings = ExplicitVectors { - embeddings: Some( - VectorOrArrayOfVectors::from_array_of_vectors(embeddings), - ), - regenerate: !user_provided, - }; - vectors.insert( - embedder_name, - serde_json::to_value(embeddings).unwrap(), - ); - } - } - - index_dumper.push_document(&document)?; - atomic.fetch_add(1, Ordering::Relaxed); - } - - // 3.2. Dump the settings - let settings = meilisearch_types::settings::settings( - index, - &rtxn, - meilisearch_types::settings::SecretPolicy::RevealSecrets, - ) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; - index_dumper.settings(&settings)?; - Ok(()) - })?; - - // 4. Dump experimental feature settings - progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); - let features = self.features().runtime_features(); - dump.create_experimental_features(features)?; - - let dump_uid = started_at.format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )).unwrap(); - - if self.must_stop_processing.get() { - return Err(Error::AbortedTask); - } - progress.update_progress(DumpCreationProgress::CompressTheDump); - let path = self.dumps_path.join(format!("{}.dump", dump_uid)); - let file = File::create(path)?; - dump.persist_to(BufWriter::new(file))?; - - // if we reached this step we can tell the scheduler we succeeded to dump ourselves. - task.status = Status::Succeeded; - task.details = Some(Details::Dump { dump_uid: Some(dump_uid) }); - Ok(vec![task]) - } - Batch::IndexOperation { op, must_create_index } => { - let index_uid = op.index_uid().to_string(); - let index = if must_create_index { - // create the index if it doesn't already exist - let wtxn = self.env.write_txn()?; - self.index_mapper.create_index(wtxn, &index_uid, None)? - } else { - let rtxn = self.env.read_txn()?; - self.index_mapper.index(&rtxn, &index_uid)? - }; - - // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick - self.index_mapper - .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); - - let mut index_wtxn = index.write_txn()?; - let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; - - { - let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); - let _entered = span.enter(); - - index_wtxn.commit()?; - } - - // if the update processed successfully, we're going to store the new - // stats of the index. Since the tasks have already been processed and - // this is a non-critical operation. If it fails, we should not fail - // the entire batch. - let res = || -> Result<()> { - let index_rtxn = index.read_txn()?; - let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; - let mut wtxn = self.env.write_txn()?; - self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; - wtxn.commit()?; - Ok(()) - }(); - - match res { - Ok(_) => (), - Err(e) => tracing::error!( - error = &e as &dyn std::error::Error, - "Could not write the stats of the index" - ), - } - - Ok(tasks) - } - Batch::IndexCreation { index_uid, primary_key, task } => { - progress.update_progress(CreateIndexProgress::CreatingTheIndex); - - let wtxn = self.env.write_txn()?; - if self.index_mapper.exists(&wtxn, &index_uid)? { - return Err(Error::IndexAlreadyExists(index_uid)); - } - self.index_mapper.create_index(wtxn, &index_uid, None)?; - - self.process_batch( - Batch::IndexUpdate { index_uid, primary_key, task }, - current_batch, - progress, - ) - } - Batch::IndexUpdate { index_uid, primary_key, mut task } => { - progress.update_progress(UpdateIndexProgress::UpdatingTheIndex); - let rtxn = self.env.read_txn()?; - let index = self.index_mapper.index(&rtxn, &index_uid)?; - - if let Some(primary_key) = primary_key.clone() { - let mut index_wtxn = index.write_txn()?; - let mut builder = MilliSettings::new( - &mut index_wtxn, - &index, - self.index_mapper.indexer_config(), - ); - builder.set_primary_key(primary_key); - let must_stop_processing = self.must_stop_processing.clone(); - builder - .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - ) - .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; - index_wtxn.commit()?; - } - - // drop rtxn before starting a new wtxn on the same db - rtxn.commit()?; - - task.status = Status::Succeeded; - task.details = Some(Details::IndexInfo { primary_key }); - - // if the update processed successfully, we're going to store the new - // stats of the index. Since the tasks have already been processed and - // this is a non-critical operation. If it fails, we should not fail - // the entire batch. - let res = || -> Result<()> { - let mut wtxn = self.env.write_txn()?; - let index_rtxn = index.read_txn()?; - let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; - wtxn.commit()?; - Ok(()) - }(); - - match res { - Ok(_) => (), - Err(e) => tracing::error!( - error = &e as &dyn std::error::Error, - "Could not write the stats of the index" - ), - } - - Ok(vec![task]) - } - Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { - progress.update_progress(DeleteIndexProgress::DeletingTheIndex); - let wtxn = self.env.write_txn()?; - - // it's possible that the index doesn't exist - let number_of_documents = || -> Result { - let index = self.index_mapper.index(&wtxn, &index_uid)?; - let index_rtxn = index.read_txn()?; - index - .number_of_documents(&index_rtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.to_string()))) - }() - .unwrap_or_default(); - - // The write transaction is directly owned and committed inside. - match self.index_mapper.delete_index(wtxn, &index_uid) { - Ok(()) => (), - Err(Error::IndexNotFound(_)) if index_has_been_created => (), - Err(e) => return Err(e), - } - - // We set all the tasks details to the default value. - for task in &mut tasks { - task.status = Status::Succeeded; - task.details = match &task.kind { - KindWithContent::IndexDeletion { .. } => { - Some(Details::ClearAll { deleted_documents: Some(number_of_documents) }) - } - otherwise => otherwise.default_finished_details(), - }; - } - - Ok(tasks) - } - Batch::IndexSwap { mut task } => { - progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); - - let mut wtxn = self.env.write_txn()?; - let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { - swaps - } else { - unreachable!() - }; - let mut not_found_indexes = BTreeSet::new(); - for IndexSwap { indexes: (lhs, rhs) } in swaps { - for index in [lhs, rhs] { - let index_exists = self.index_mapper.index_exists(&wtxn, index)?; - if !index_exists { - not_found_indexes.insert(index); - } - } - } - if !not_found_indexes.is_empty() { - if not_found_indexes.len() == 1 { - return Err(Error::SwapIndexNotFound( - not_found_indexes.into_iter().next().unwrap().clone(), - )); - } else { - return Err(Error::SwapIndexesNotFound( - not_found_indexes.into_iter().cloned().collect(), - )); - } - } - progress.update_progress(SwappingTheIndexes::SwappingTheIndexes); - for (step, swap) in swaps.iter().enumerate() { - progress.update_progress(VariableNameStep::new( - format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1), - step as u32, - swaps.len() as u32, - )); - self.apply_index_swap( - &mut wtxn, - &progress, - task.uid, - &swap.indexes.0, - &swap.indexes.1, - )?; - } - wtxn.commit()?; - task.status = Status::Succeeded; - Ok(vec![task]) - } - } - } - - /// Swap the index `lhs` with the index `rhs`. - fn apply_index_swap( - &self, - wtxn: &mut RwTxn, - progress: &Progress, - task_id: u32, - lhs: &str, - rhs: &str, - ) -> Result<()> { - progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks); - // 1. Verify that both lhs and rhs are existing indexes - let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; - if !index_lhs_exists { - return Err(Error::IndexNotFound(lhs.to_owned())); - } - let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?; - if !index_rhs_exists { - return Err(Error::IndexNotFound(rhs.to_owned())); - } - - // 2. Get the task set for index = name that appeared before the index swap task - let mut index_lhs_task_ids = self.index_tasks(wtxn, lhs)?; - index_lhs_task_ids.remove_range(task_id..); - let mut index_rhs_task_ids = self.index_tasks(wtxn, rhs)?; - index_rhs_task_ids.remove_range(task_id..); - - // 3. before_name -> new_name in the task's KindWithContent - progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks); - let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids; - let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32); - progress.update_progress(task_progress); - - for task_id in tasks_to_update { - let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - swap_index_uid_in_task(&mut task, (lhs, rhs)); - self.all_tasks.put(wtxn, &task_id, &task)?; - atomic.fetch_add(1, Ordering::Relaxed); - } - - // 4. remove the task from indexuid = before_name - // 5. add the task to indexuid = after_name - progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata); - self.update_index(wtxn, lhs, |lhs_tasks| { - *lhs_tasks -= &index_lhs_task_ids; - *lhs_tasks |= &index_rhs_task_ids; - })?; - self.update_index(wtxn, rhs, |rhs_tasks| { - *rhs_tasks -= &index_rhs_task_ids; - *rhs_tasks |= &index_lhs_task_ids; - })?; - - // 6. Swap in the index mapper - self.index_mapper.swap(wtxn, lhs, rhs)?; - - Ok(()) - } - - /// Process the index operation on the given index. - /// - /// ## Return - /// The list of processed tasks. - #[tracing::instrument( - level = "trace", - skip(self, index_wtxn, index, progress), - target = "indexing::scheduler" - )] - fn apply_index_operation<'i>( - &self, - index_wtxn: &mut RwTxn<'i>, - index: &'i Index, - operation: IndexOperation, - progress: Progress, - ) -> Result> { - let indexer_alloc = Bump::new(); - - let started_processing_at = std::time::Instant::now(); - let must_stop_processing = self.must_stop_processing.clone(); - - match operation { - IndexOperation::DocumentClear { index_uid, mut tasks } => { - let count = milli::update::ClearDocuments::new(index_wtxn, index) - .execute() - .map_err(|e| Error::from_milli(e, Some(index_uid)))?; - - let mut first_clear_found = false; - for task in &mut tasks { - task.status = Status::Succeeded; - // The first document clear will effectively delete every documents - // in the database but the next ones will clear 0 documents. - task.details = match &task.kind { - KindWithContent::DocumentClear { .. } => { - let count = if first_clear_found { 0 } else { count }; - first_clear_found = true; - Some(Details::ClearAll { deleted_documents: Some(count) }) - } - otherwise => otherwise.default_details(), - }; - } - - Ok(tasks) - } - IndexOperation::DocumentOperation { - index_uid, - primary_key, - method, - operations, - mut tasks, - } => { - progress.update_progress(DocumentOperationProgress::RetrievingConfig); - // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. - // this is made difficult by the fact we're doing private clones of the index scheduler and sending it - // to a fresh thread. - let mut content_files = Vec::new(); - for operation in &operations { - if let DocumentOperation::Add(content_uuid) = operation { - let content_file = self.file_store.get_update(*content_uuid)?; - let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; - if !mmap.is_empty() { - content_files.push(mmap); - } - } - } - - let rtxn = index.read_txn()?; - let db_fields_ids_map = index.fields_ids_map(&rtxn)?; - let mut new_fields_ids_map = db_fields_ids_map.clone(); - - let mut content_files_iter = content_files.iter(); - let mut indexer = indexer::DocumentOperation::new(method); - let embedders = index - .embedding_configs(index_wtxn) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - let embedders = self.embedders(index_uid.clone(), embedders)?; - for operation in operations { - match operation { - DocumentOperation::Add(_content_uuid) => { - let mmap = content_files_iter.next().unwrap(); - indexer - .add_documents(mmap) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - } - DocumentOperation::Delete(document_ids) => { - let document_ids: bumpalo::collections::vec::Vec<_> = document_ids - .iter() - .map(|s| &*indexer_alloc.alloc_str(s)) - .collect_in(&indexer_alloc); - indexer.delete_documents(document_ids.into_bump_slice()); - } - } - } - - let local_pool; - let indexer_config = self.index_mapper.indexer_config(); - let pool = match &indexer_config.thread_pool { - Some(pool) => pool, - None => { - local_pool = ThreadPoolNoAbortBuilder::new() - .thread_name(|i| format!("indexing-thread-{i}")) - .build() - .unwrap(); - &local_pool - } - }; - - progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges); - let (document_changes, operation_stats, primary_key) = indexer - .into_changes( - &indexer_alloc, - index, - &rtxn, - primary_key.as_deref(), - &mut new_fields_ids_map, - &|| must_stop_processing.get(), - progress.clone(), - ) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - - let mut candidates_count = 0; - for (stats, task) in operation_stats.into_iter().zip(&mut tasks) { - candidates_count += stats.document_count; - match stats.error { - Some(error) => { - task.status = Status::Failed; - task.error = Some(milli::Error::UserError(error).into()); - } - None => task.status = Status::Succeeded, - } - - task.details = match task.details { - Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => { - Some(Details::DocumentAdditionOrUpdate { - received_documents, - indexed_documents: Some(stats.document_count), - }) - } - Some(Details::DocumentDeletion { provided_ids, .. }) => { - Some(Details::DocumentDeletion { - provided_ids, - deleted_documents: Some(stats.document_count), - }) - } - _ => { - // In the case of a `documentAdditionOrUpdate` or `DocumentDeletion` - // the details MUST be set to either addition or deletion - unreachable!(); - } - } - } - - progress.update_progress(DocumentOperationProgress::Indexing); - if tasks.iter().any(|res| res.error.is_none()) { - indexer::index( - index_wtxn, - index, - pool, - indexer_config.grenad_parameters(), - &db_fields_ids_map, - new_fields_ids_map, - primary_key, - &document_changes, - embedders, - &|| must_stop_processing.get(), - &progress, - ) - .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; - - let addition = DocumentAdditionResult { - indexed_documents: candidates_count, - number_of_documents: index - .number_of_documents(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - }; - - tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); - } - - Ok(tasks) - } - IndexOperation::DocumentEdition { index_uid, mut task } => { - progress.update_progress(DocumentEditionProgress::RetrievingConfig); - - let (filter, code) = if let KindWithContent::DocumentEdition { - filter_expr, - context: _, - function, - .. - } = &task.kind - { - (filter_expr, function) - } else { - unreachable!() - }; - - let candidates = match filter.as_ref().map(Filter::from_json) { - Some(Ok(Some(filter))) => filter - .evaluate(index_wtxn, index) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - None | Some(Ok(None)) => index.documents_ids(index_wtxn)?, - Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))), - }; - - let (original_filter, context, function) = if let Some(Details::DocumentEdition { - original_filter, - context, - function, - .. - }) = task.details - { - (original_filter, context, function) - } else { - // In the case of a `documentEdition` the details MUST be set - unreachable!(); - }; - - if candidates.is_empty() { - task.status = Status::Succeeded; - task.details = Some(Details::DocumentEdition { - original_filter, - context, - function, - deleted_documents: Some(0), - edited_documents: Some(0), - }); - - return Ok(vec![task]); - } - - let rtxn = index.read_txn()?; - let db_fields_ids_map = index.fields_ids_map(&rtxn)?; - let mut new_fields_ids_map = db_fields_ids_map.clone(); - // candidates not empty => index not empty => a primary key is set - let primary_key = index.primary_key(&rtxn)?.unwrap(); - - let primary_key = - PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) - .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; - - let result_count = Ok((candidates.len(), candidates.len())) as Result<_>; - - if task.error.is_none() { - let local_pool; - let indexer_config = self.index_mapper.indexer_config(); - let pool = match &indexer_config.thread_pool { - Some(pool) => pool, - None => { - local_pool = ThreadPoolNoAbortBuilder::new() - .thread_name(|i| format!("indexing-thread-{i}")) - .build() - .unwrap(); - &local_pool - } - }; - - let candidates_count = candidates.len(); - progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges); - let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone()); - let document_changes = pool - .install(|| { - indexer - .into_changes(&primary_key) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))) - }) - .unwrap()?; - let embedders = index - .embedding_configs(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - let embedders = self.embedders(index_uid.clone(), embedders)?; - - progress.update_progress(DocumentEditionProgress::Indexing); - indexer::index( - index_wtxn, - index, - pool, - indexer_config.grenad_parameters(), - &db_fields_ids_map, - new_fields_ids_map, - None, // cannot change primary key in DocumentEdition - &document_changes, - embedders, - &|| must_stop_processing.get(), - &progress, - ) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - - let addition = DocumentAdditionResult { - indexed_documents: candidates_count, - number_of_documents: index - .number_of_documents(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - }; - - tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); - } - - match result_count { - Ok((deleted_documents, edited_documents)) => { - task.status = Status::Succeeded; - task.details = Some(Details::DocumentEdition { - original_filter, - context, - function, - deleted_documents: Some(deleted_documents), - edited_documents: Some(edited_documents), - }); - } - Err(e) => { - task.status = Status::Failed; - task.details = Some(Details::DocumentEdition { - original_filter, - context, - function, - deleted_documents: Some(0), - edited_documents: Some(0), - }); - task.error = Some(e.into()); - } - } - - Ok(vec![task]) - } - IndexOperation::DocumentDeletion { mut tasks, index_uid } => { - progress.update_progress(DocumentDeletionProgress::RetrievingConfig); - - let mut to_delete = RoaringBitmap::new(); - let external_documents_ids = index.external_documents_ids(); - - for task in tasks.iter_mut() { - let before = to_delete.len(); - task.status = Status::Succeeded; - - match &task.kind { - KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { - for id in documents_ids { - if let Some(id) = external_documents_ids.get(index_wtxn, id)? { - to_delete.insert(id); - } - } - let will_be_removed = to_delete.len() - before; - task.details = Some(Details::DocumentDeletion { - provided_ids: documents_ids.len(), - deleted_documents: Some(will_be_removed), - }); - } - KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => { - let before = to_delete.len(); - let filter = match Filter::from_json(filter_expr) { - Ok(filter) => filter, - Err(err) => { - // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens - task.status = Status::Failed; - task.error = Some( - Error::from_milli(err, Some(index_uid.clone())).into(), - ); - None - } - }; - if let Some(filter) = filter { - let candidates = filter - .evaluate(index_wtxn, index) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))); - match candidates { - Ok(candidates) => to_delete |= candidates, - Err(err) => { - task.status = Status::Failed; - task.error = Some(err.into()); - } - }; - } - let will_be_removed = to_delete.len() - before; - if let Some(Details::DocumentDeletionByFilter { - original_filter: _, - deleted_documents, - }) = &mut task.details - { - *deleted_documents = Some(will_be_removed); - } else { - // In the case of a `documentDeleteByFilter` the details MUST be set - unreachable!() - } - } - _ => unreachable!(), - } - } - - if to_delete.is_empty() { - return Ok(tasks); - } - - let rtxn = index.read_txn()?; - let db_fields_ids_map = index.fields_ids_map(&rtxn)?; - let mut new_fields_ids_map = db_fields_ids_map.clone(); - - // to_delete not empty => index not empty => primary key set - let primary_key = index.primary_key(&rtxn)?.unwrap(); - - let primary_key = - PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) - .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; - - if !tasks.iter().all(|res| res.error.is_some()) { - let local_pool; - let indexer_config = self.index_mapper.indexer_config(); - let pool = match &indexer_config.thread_pool { - Some(pool) => pool, - None => { - local_pool = ThreadPoolNoAbortBuilder::new() - .thread_name(|i| format!("indexing-thread-{i}")) - .build() - .unwrap(); - &local_pool - } - }; - - progress.update_progress(DocumentDeletionProgress::DeleteDocuments); - let mut indexer = indexer::DocumentDeletion::new(); - let candidates_count = to_delete.len(); - indexer.delete_documents_by_docids(to_delete); - let document_changes = indexer.into_changes(&indexer_alloc, primary_key); - let embedders = index - .embedding_configs(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - let embedders = self.embedders(index_uid.clone(), embedders)?; - - progress.update_progress(DocumentDeletionProgress::Indexing); - indexer::index( - index_wtxn, - index, - pool, - indexer_config.grenad_parameters(), - &db_fields_ids_map, - new_fields_ids_map, - None, // document deletion never changes primary key - &document_changes, - embedders, - &|| must_stop_processing.get(), - &progress, - ) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - - let addition = DocumentAdditionResult { - indexed_documents: candidates_count, - number_of_documents: index - .number_of_documents(index_wtxn) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, - }; - - tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); - } - - Ok(tasks) - } - IndexOperation::Settings { index_uid, settings, mut tasks } => { - progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); - let indexer_config = self.index_mapper.indexer_config(); - let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); - - for (task, (_, settings)) in tasks.iter_mut().zip(settings) { - let checked_settings = settings.clone().check(); - task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); - apply_settings_to_builder(&checked_settings, &mut builder); - - // We can apply the status right now and if an update fail later - // the whole batch will be marked as failed. - task.status = Status::Succeeded; - } - - progress.update_progress(SettingsProgress::ApplyTheSettings); - builder - .execute( - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - ) - .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; - - Ok(tasks) - } - IndexOperation::DocumentClearAndSetting { - index_uid, - cleared_tasks, - settings, - settings_tasks, - } => { - let mut import_tasks = self.apply_index_operation( - index_wtxn, - index, - IndexOperation::DocumentClear { - index_uid: index_uid.clone(), - tasks: cleared_tasks, - }, - progress.clone(), - )?; - - let settings_tasks = self.apply_index_operation( - index_wtxn, - index, - IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, - progress, - )?; - - let mut tasks = settings_tasks; - tasks.append(&mut import_tasks); - Ok(tasks) - } - } - } - - /// Delete each given task from all the databases (if it is deleteable). - /// - /// Return the number of tasks that were actually deleted. - fn delete_matched_tasks( - &self, - wtxn: &mut RwTxn, - matched_tasks: &RoaringBitmap, - progress: &Progress, - ) -> Result { - progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime); - - // 1. Remove from this list the tasks that we are not allowed to delete - let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; - let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); - - let all_task_ids = self.all_task_ids(wtxn)?; - let mut to_delete_tasks = all_task_ids & matched_tasks; - to_delete_tasks -= &**processing_tasks; - to_delete_tasks -= &enqueued_tasks; - - // 2. We now have a list of tasks to delete, delete them - - let mut affected_indexes = HashSet::new(); - let mut affected_statuses = HashSet::new(); - let mut affected_kinds = HashSet::new(); - let mut affected_canceled_by = RoaringBitmap::new(); - // The tasks that have been removed *per batches*. - let mut affected_batches: HashMap = HashMap::new(); - - let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); - progress.update_progress(task_progress); - for task_id in to_delete_tasks.iter() { - let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; - - affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned())); - affected_statuses.insert(task.status); - affected_kinds.insert(task.kind.as_kind()); - // Note: don't delete the persisted task data since - // we can only delete succeeded, failed, and canceled tasks. - // In each of those cases, the persisted data is supposed to - // have been deleted already. - utils::remove_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; - if let Some(started_at) = task.started_at { - utils::remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?; - } - if let Some(finished_at) = task.finished_at { - utils::remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; - } - if let Some(canceled_by) = task.canceled_by { - affected_canceled_by.insert(canceled_by); - } - if let Some(batch_uid) = task.batch_uid { - affected_batches.entry(batch_uid).or_default().insert(task_id); - } - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata); - let (atomic_progress, task_progress) = AtomicTaskStep::new( - (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32, - ); - progress.update_progress(task_progress); - for index in affected_indexes.iter() { - self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - for status in affected_statuses.iter() { - self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - for kind in affected_kinds.iter() { - self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - progress.update_progress(TaskDeletionProgress::DeletingTasks); - let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); - progress.update_progress(task_progress); - for task in to_delete_tasks.iter() { - self.all_tasks.delete(wtxn, &task)?; - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - for canceled_by in affected_canceled_by { - if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { - tasks -= &to_delete_tasks; - if tasks.is_empty() { - self.canceled_by.delete(wtxn, &canceled_by)?; - } else { - self.canceled_by.put(wtxn, &canceled_by, &tasks)?; - } - } - } - progress.update_progress(TaskDeletionProgress::DeletingBatches); - let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32); - progress.update_progress(batch_progress); - for (batch_id, to_delete_tasks) in affected_batches { - if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? { - tasks -= &to_delete_tasks; - // We must remove the batch entirely - if tasks.is_empty() { - self.all_batches.delete(wtxn, &batch_id)?; - self.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; - } - // Anyway, we must remove the batch from all its reverse indexes. - // The only way to do that is to check - - for index in affected_indexes.iter() { - let index_tasks = self.index_tasks(wtxn, index)?; - let remaining_index_tasks = index_tasks & &tasks; - if remaining_index_tasks.is_empty() { - self.update_batch_index(wtxn, index, |bitmap| { - bitmap.remove(batch_id); - })?; - } - } - - for status in affected_statuses.iter() { - let status_tasks = self.get_status(wtxn, *status)?; - let remaining_status_tasks = status_tasks & &tasks; - if remaining_status_tasks.is_empty() { - self.update_batch_status(wtxn, *status, |bitmap| { - bitmap.remove(batch_id); - })?; - } - } - - for kind in affected_kinds.iter() { - let kind_tasks = self.get_kind(wtxn, *kind)?; - let remaining_kind_tasks = kind_tasks & &tasks; - if remaining_kind_tasks.is_empty() { - self.update_batch_kind(wtxn, *kind, |bitmap| { - bitmap.remove(batch_id); - })?; - } - } - } - atomic_progress.fetch_add(1, Ordering::Relaxed); - } - - Ok(to_delete_tasks) - } - - /// Cancel each given task from all the databases (if it is cancelable). - /// - /// Returns the list of tasks that matched the filter and must be written in the database. - fn cancel_matched_tasks( - &self, - rtxn: &RoTxn, - cancel_task_id: TaskId, - current_batch: &mut ProcessingBatch, - matched_tasks: &RoaringBitmap, - progress: &Progress, - ) -> Result> { - progress.update_progress(TaskCancelationProgress::RetrievingTasks); - - // 1. Remove from this list the tasks that we are not allowed to cancel - // Notice that only the _enqueued_ ones are cancelable and we should - // have already aborted the indexation of the _processing_ ones - let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?; - let tasks_to_cancel = cancelable_tasks & matched_tasks; - - let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); - progress.update_progress(progress_obj); - - // 2. We now have a list of tasks to cancel, cancel them - let mut tasks = self.get_existing_tasks( - rtxn, - tasks_to_cancel.iter().inspect(|_| { - task_progress.fetch_add(1, Ordering::Relaxed); - }), - )?; - - progress.update_progress(TaskCancelationProgress::UpdatingTasks); - let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); - progress.update_progress(progress_obj); - for task in tasks.iter_mut() { - task.status = Status::Canceled; - task.canceled_by = Some(cancel_task_id); - task.details = task.details.as_ref().map(|d| d.to_failed()); - current_batch.processing(Some(task)); - task_progress.fetch_add(1, Ordering::Relaxed); - } - - Ok(tasks) - } -} diff --git a/crates/index-scheduler/src/dump.rs b/crates/index-scheduler/src/dump.rs new file mode 100644 index 000000000..643255ac2 --- /dev/null +++ b/crates/index-scheduler/src/dump.rs @@ -0,0 +1,203 @@ +use std::collections::HashMap; + +use dump::{KindDump, TaskDump, UpdateFile}; +use meilisearch_types::heed::RwTxn; +use meilisearch_types::milli::documents::DocumentsBatchBuilder; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use uuid::Uuid; + +use crate::{utils, Error, IndexScheduler, Result}; + +pub struct Dump<'a> { + index_scheduler: &'a IndexScheduler, + wtxn: RwTxn<'a>, + + indexes: HashMap, + statuses: HashMap, + kinds: HashMap, +} + +impl<'a> Dump<'a> { + pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result { + // While loading a dump no one should be able to access the scheduler thus I can block everything. + let wtxn = index_scheduler.env.write_txn()?; + + Ok(Dump { + index_scheduler, + wtxn, + indexes: HashMap::new(), + statuses: HashMap::new(), + kinds: HashMap::new(), + }) + } + + /// Register a new task coming from a dump in the scheduler. + /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. + pub fn register_dumped_task( + &mut self, + task: TaskDump, + content_file: Option>, + ) -> Result { + let content_uuid = match content_file { + Some(content_file) if task.status == Status::Enqueued => { + let (uuid, mut file) = self.index_scheduler.queue.create_update_file(false)?; + let mut builder = DocumentsBatchBuilder::new(&mut file); + for doc in content_file { + builder.append_json_object(&doc?)?; + } + builder.into_inner()?; + file.persist()?; + + Some(uuid) + } + // If the task isn't `Enqueued` then just generate a recognisable `Uuid` + // in case we try to open it later. + _ if task.status != Status::Enqueued => Some(Uuid::nil()), + _ => None, + }; + + let task = Task { + uid: task.uid, + batch_uid: task.batch_uid, + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + error: task.error, + canceled_by: task.canceled_by, + details: task.details, + status: task.status, + kind: match task.kind { + KindDump::DocumentImport { + primary_key, + method, + documents_count, + allow_index_creation, + } => KindWithContent::DocumentAdditionOrUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + method, + content_file: content_uuid.ok_or(Error::CorruptedDump)?, + documents_count, + allow_index_creation, + }, + KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { + documents_ids, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::DocumentDeletionByFilter { filter } => { + KindWithContent::DocumentDeletionByFilter { + filter_expr: filter, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + } + } + KindDump::DocumentEdition { filter, context, function } => { + KindWithContent::DocumentEdition { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + filter_expr: filter, + context, + function, + } + } + KindDump::DocumentClear => KindWithContent::DocumentClear { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::Settings { settings, is_deletion, allow_index_creation } => { + KindWithContent::SettingsUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + new_settings: settings, + is_deletion, + allow_index_creation, + } + } + KindDump::IndexDeletion => KindWithContent::IndexDeletion { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, + KindDump::TaskCancelation { query, tasks } => { + KindWithContent::TaskCancelation { query, tasks } + } + KindDump::TasksDeletion { query, tasks } => { + KindWithContent::TaskDeletion { query, tasks } + } + KindDump::DumpCreation { keys, instance_uid } => { + KindWithContent::DumpCreation { keys, instance_uid } + } + KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, + }, + }; + + self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; + + for index in task.indexes() { + match self.indexes.get_mut(index) { + Some(bitmap) => { + bitmap.insert(task.uid); + } + None => { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(task.uid); + self.indexes.insert(index.to_string(), bitmap); + } + }; + } + + utils::insert_task_datetime( + &mut self.wtxn, + self.index_scheduler.queue.tasks.enqueued_at, + task.enqueued_at, + task.uid, + )?; + + // we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change + if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) { + if let Some(started_at) = task.started_at { + utils::insert_task_datetime( + &mut self.wtxn, + self.index_scheduler.queue.tasks.started_at, + started_at, + task.uid, + )?; + } + if let Some(finished_at) = task.finished_at { + utils::insert_task_datetime( + &mut self.wtxn, + self.index_scheduler.queue.tasks.finished_at, + finished_at, + task.uid, + )?; + } + } + + self.statuses.entry(task.status).or_default().insert(task.uid); + self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid); + + Ok(task) + } + + /// Commit all the changes and exit the importing dump state + pub fn finish(mut self) -> Result<()> { + for (index, bitmap) in self.indexes { + self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; + } + for (status, bitmap) in self.statuses { + self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?; + } + for (kind, bitmap) in self.kinds { + self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?; + } + + self.wtxn.commit()?; + self.index_scheduler.scheduler.wake_up.signal(); + + Ok(()) + } +} diff --git a/crates/index-scheduler/src/index_mapper/index_map.rs b/crates/index-scheduler/src/index_mapper/index_map.rs index 480dafa7c..947f558aa 100644 --- a/crates/index-scheduler/src/index_mapper/index_map.rs +++ b/crates/index-scheduler/src/index_mapper/index_map.rs @@ -323,7 +323,7 @@ mod tests { use uuid::Uuid; use super::super::IndexMapper; - use crate::tests::IndexSchedulerHandle; + use crate::test_utils::IndexSchedulerHandle; use crate::utils::clamp_to_page_size; use crate::IndexScheduler; diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs index 98272542b..d8624d7b9 100644 --- a/crates/index-scheduler/src/index_mapper/mod.rs +++ b/crates/index-scheduler/src/index_mapper/mod.rs @@ -16,7 +16,7 @@ use uuid::Uuid; use self::index_map::IndexMap; use self::IndexStatus::{Available, BeingDeleted, Closing, Missing}; use crate::uuid_codec::UuidCodec; -use crate::{Error, Result}; +use crate::{Error, IndexBudget, IndexSchedulerOptions, Result}; mod index_map; @@ -140,27 +140,19 @@ impl IndexStats { impl IndexMapper { pub fn new( env: &Env, - base_path: PathBuf, - index_base_map_size: usize, - index_growth_amount: usize, - index_count: usize, - enable_mdb_writemap: bool, - indexer_config: IndexerConfig, + wtxn: &mut RwTxn, + options: &IndexSchedulerOptions, + budget: IndexBudget, ) -> Result { - let mut wtxn = env.write_txn()?; - let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?; - let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?; - wtxn.commit()?; - Ok(Self { - index_map: Arc::new(RwLock::new(IndexMap::new(index_count))), - index_mapping, - index_stats, - base_path, - index_base_map_size, - index_growth_amount, - enable_mdb_writemap, - indexer_config: Arc::new(indexer_config), + index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))), + index_mapping: env.create_database(wtxn, Some(INDEX_MAPPING))?, + index_stats: env.create_database(wtxn, Some(INDEX_STATS))?, + base_path: options.indexes_path.clone(), + index_base_map_size: budget.map_size, + index_growth_amount: options.index_growth_amount, + enable_mdb_writemap: options.enable_mdb_writemap, + indexer_config: options.indexer_config.clone(), currently_updating_index: Default::default(), }) } diff --git a/crates/index-scheduler/src/insta_snapshot.rs b/crates/index-scheduler/src/insta_snapshot.rs index 67627d8c1..de79cd7c0 100644 --- a/crates/index-scheduler/src/insta_snapshot.rs +++ b/crates/index-scheduler/src/insta_snapshot.rs @@ -5,11 +5,11 @@ use meilisearch_types::batches::Batch; use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::{Database, RoTxn}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; -use meilisearch_types::tasks::{Details, Task}; +use meilisearch_types::tasks::{Details, Kind, Status, Task}; use roaring::RoaringBitmap; use crate::index_mapper::IndexMapper; -use crate::{IndexScheduler, Kind, Status, BEI128}; +use crate::{IndexScheduler, BEI128}; pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { // Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run. @@ -18,41 +18,14 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { scheduler.assert_internally_consistent(); let IndexScheduler { - autobatching_enabled, cleanup_enabled: _, - must_stop_processing: _, processing_tasks, - file_store, env, - all_tasks, - all_batches, - batch_to_tasks_mapping, - // task reverse index - status, - kind, - index_tasks, - canceled_by, - enqueued_at, - started_at, - finished_at, - - // batch reverse index - batch_status, - batch_kind, - batch_index_tasks, - batch_enqueued_at, - batch_started_at, - batch_finished_at, + queue, + scheduler, index_mapper, features: _, - max_number_of_tasks: _, - max_number_of_batched_tasks: _, - wake_up: _, - dumps_path: _, - snapshots_path: _, - auth_path: _, - version_file_path: _, webhook_url: _, webhook_authorization_header: _, test_breakpoint_sdr: _, @@ -66,7 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { let mut snap = String::new(); let processing = processing_tasks.read().unwrap().clone(); - snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n")); + snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled)); snap.push_str(&format!( "### Processing batch {:?}:\n", processing.batch.as_ref().map(|batch| batch.uid) @@ -79,19 +52,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { snap.push_str("\n----------------------------------------------------------------------\n"); snap.push_str("### All Tasks:\n"); - snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks)); + snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Status:\n"); - snap.push_str(&snapshot_status(&rtxn, *status)); + snap.push_str(&snapshot_status(&rtxn, queue.tasks.status)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Kind:\n"); - snap.push_str(&snapshot_kind(&rtxn, *kind)); + snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Index Tasks:\n"); - snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks)); + snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Index Mapper:\n"); @@ -99,55 +72,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { snap.push_str("\n----------------------------------------------------------------------\n"); snap.push_str("### Canceled By:\n"); - snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by)); + snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by)); snap.push_str("\n----------------------------------------------------------------------\n"); snap.push_str("### Enqueued At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Started At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *started_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Finished At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *finished_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### All Batches:\n"); - snap.push_str(&snapshot_all_batches(&rtxn, *all_batches)); + snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batch to tasks mapping:\n"); - snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping)); + snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Status:\n"); - snap.push_str(&snapshot_status(&rtxn, *batch_status)); + snap.push_str(&snapshot_status(&rtxn, queue.batches.status)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Kind:\n"); - snap.push_str(&snapshot_kind(&rtxn, *batch_kind)); + snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Index Tasks:\n"); - snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks)); + snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Enqueued At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Started At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### Batches Finished At:\n"); - snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at)); + snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at)); snap.push_str("----------------------------------------------------------------------\n"); snap.push_str("### File Store:\n"); - snap.push_str(&snapshot_file_store(file_store)); + snap.push_str(&snapshot_file_store(&queue.file_store)); snap.push_str("\n----------------------------------------------------------------------\n"); snap diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index 8bceaddf6..d5b12e99f 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -18,8 +18,7 @@ called asynchronously from any thread. These methods can either query the content of the scheduler or enqueue new tasks. */ -mod autobatcher; -mod batch; +mod dump; pub mod error; mod features; mod index_mapper; @@ -27,6 +26,10 @@ mod index_mapper; mod insta_snapshot; mod lru; mod processing; +mod queue; +mod scheduler; +#[cfg(test)] +mod test_utils; mod utils; pub mod uuid_codec; @@ -35,190 +38,39 @@ pub type TaskId = u32; use std::collections::{BTreeMap, HashMap}; use std::io::{self, BufReader, Read}; -use std::ops::{Bound, RangeBounds}; use std::panic::{catch_unwind, AssertUnwindSafe}; use std::path::{Path, PathBuf}; -use std::sync::atomic::Ordering::{self, Relaxed}; -use std::sync::atomic::{AtomicBool, AtomicU32}; use std::sync::{Arc, RwLock}; use std::time::Duration; -use dump::{KindDump, TaskDump, UpdateFile}; +use dump::Dump; pub use error::Error; pub use features::RoFeatures; -use file_store::FileStore; use flate2::bufread::GzEncoder; use flate2::Compression; -use meilisearch_types::batches::{Batch, BatchId}; -use meilisearch_types::error::ResponseError; +use meilisearch_types::batches::Batch; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::heed::byteorder::BE; -use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; -use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; -use meilisearch_types::milli::documents::DocumentsBatchBuilder; +use meilisearch_types::heed::types::I128; +use meilisearch_types::heed::{self, Env, RoTxn}; use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; -use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; +use meilisearch_types::milli::{self, Index}; use meilisearch_types::task_view::TaskView; -use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use meilisearch_types::tasks::{KindWithContent, Task}; use processing::ProcessingTasks; -use rayon::current_num_threads; -use rayon::prelude::{IntoParallelIterator, ParallelIterator}; +pub use queue::Query; +use queue::Queue; use roaring::RoaringBitmap; -use synchronoise::SignalEvent; -use time::format_description::well_known::Rfc3339; +use scheduler::Scheduler; use time::OffsetDateTime; -use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map_bound}; -use uuid::Uuid; use crate::index_mapper::IndexMapper; -use crate::processing::{AtomicTaskStep, BatchProgress}; -use crate::utils::{check_index_swap_validity, clamp_to_page_size}; +use crate::utils::clamp_to_page_size; pub(crate) type BEI128 = I128; -/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. -/// -/// An empty/default query (where each field is set to `None`) matches all tasks. -/// Each non-null field restricts the set of tasks further. -#[derive(Default, Debug, Clone, PartialEq, Eq)] -pub struct Query { - /// The maximum number of tasks to be matched - pub limit: Option, - /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched - pub from: Option, - /// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`. - pub reverse: Option, - /// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched - pub uids: Option>, - /// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched - pub batch_uids: Option>, - /// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls - pub statuses: Option>, - /// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks. - /// - /// The kind of a task is given by: - /// ``` - /// # use meilisearch_types::tasks::{Task, Kind}; - /// # fn doc_func(task: Task) -> Kind { - /// task.kind.as_kind() - /// # } - /// ``` - pub types: Option>, - /// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks - pub index_uids: Option>, - /// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks - /// that canceled the matched tasks. - pub canceled_by: Option>, - /// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. - pub before_enqueued_at: Option, - /// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. - pub after_enqueued_at: Option, - /// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. - pub before_started_at: Option, - /// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. - pub after_started_at: Option, - /// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. - pub before_finished_at: Option, - /// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. - pub after_finished_at: Option, -} - -impl Query { - /// Return `true` if every field of the query is set to `None`, such that the query - /// matches all tasks. - pub fn is_empty(&self) -> bool { - matches!( - self, - Query { - limit: None, - from: None, - reverse: None, - uids: None, - batch_uids: None, - statuses: None, - types: None, - index_uids: None, - canceled_by: None, - before_enqueued_at: None, - after_enqueued_at: None, - before_started_at: None, - after_started_at: None, - before_finished_at: None, - after_finished_at: None, - } - ) - } - - /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. - pub fn with_index(self, index_uid: String) -> Self { - let mut index_vec = self.index_uids.unwrap_or_default(); - index_vec.push(index_uid); - Self { index_uids: Some(index_vec), ..self } - } - - // Removes the `from` and `limit` restrictions from the query. - // Useful to get the total number of tasks matching a filter. - pub fn without_limits(self) -> Self { - Query { limit: None, from: None, ..self } - } -} - -#[derive(Default, Clone, Debug)] -struct MustStopProcessing(Arc); - -impl MustStopProcessing { - fn get(&self) -> bool { - self.0.load(Relaxed) - } - - fn must_stop(&self) { - self.0.store(true, Relaxed); - } - - fn reset(&self) { - self.0.store(false, Relaxed); - } -} - -/// Database const names for the `IndexScheduler`. -mod db_name { - pub const ALL_TASKS: &str = "all-tasks"; - pub const ALL_BATCHES: &str = "all-batches"; - pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping"; - pub const STATUS: &str = "status"; - pub const KIND: &str = "kind"; - pub const INDEX_TASKS: &str = "index-tasks"; - pub const CANCELED_BY: &str = "canceled_by"; - pub const ENQUEUED_AT: &str = "enqueued-at"; - pub const STARTED_AT: &str = "started-at"; - pub const FINISHED_AT: &str = "finished-at"; - - pub const BATCH_STATUS: &str = "batch-status"; - pub const BATCH_KIND: &str = "batch-kind"; - pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks"; - pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at"; - pub const BATCH_STARTED_AT: &str = "batch-started-at"; - pub const BATCH_FINISHED_AT: &str = "batch-finished-at"; -} - -#[cfg(test)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Breakpoint { - // this state is only encountered while creating the scheduler in the test suite. - Init, - - Start, - BatchCreated, - BeforeProcessing, - AfterProcessing, - AbortedIndexation, - ProcessBatchSucceeded, - ProcessBatchFailed, - InsideProcessBatch, -} - #[derive(Debug)] pub struct IndexSchedulerOptions { /// The path to the version file of Meilisearch. @@ -250,7 +102,7 @@ pub struct IndexSchedulerOptions { /// The number of indexes that can be concurrently opened in memory. pub index_count: usize, /// Configuration used during indexing for each meilisearch index. - pub indexer_config: IndexerConfig, + pub indexer_config: Arc, /// Set to `true` iff the index scheduler is allowed to automatically /// batch tasks together, to process multiple tasks at once. pub autobatching_enabled: bool, @@ -273,52 +125,13 @@ pub struct IndexScheduler { /// The LMDB environment which the DBs are associated with. pub(crate) env: Env, - /// A boolean that can be set to true to stop the currently processing tasks. - pub(crate) must_stop_processing: MustStopProcessing, - /// The list of tasks currently processing pub(crate) processing_tasks: Arc>, - /// The list of files referenced by the tasks - pub(crate) file_store: FileStore, + /// The queue containing both the tasks and the batches. + pub queue: queue::Queue, - /// The main database, it contains all the tasks accessible by their Id. - pub(crate) all_tasks: Database>, - - /// Contains all the batches accessible by their Id. - pub(crate) all_batches: Database>, - - /// Matches a batch id with the associated task ids. - pub(crate) batch_to_tasks_mapping: Database, - - /// All the tasks ids grouped by their status. - // TODO we should not be able to serialize a `Status::Processing` in this database. - pub(crate) status: Database, RoaringBitmapCodec>, - /// All the tasks ids grouped by their kind. - pub(crate) kind: Database, RoaringBitmapCodec>, - /// Store the tasks associated to an index. - pub(crate) index_tasks: Database, - /// Store the tasks that were canceled by a task uid - pub(crate) canceled_by: Database, - /// Store the task ids of tasks which were enqueued at a specific date - pub(crate) enqueued_at: Database, - /// Store the task ids of finished tasks which started being processed at a specific date - pub(crate) started_at: Database, - /// Store the task ids of tasks which finished at a specific date - pub(crate) finished_at: Database, - - /// All the batches containing a task matching the selected status. - pub(crate) batch_status: Database, RoaringBitmapCodec>, - /// All the batches ids grouped by the kind of their task. - pub(crate) batch_kind: Database, RoaringBitmapCodec>, - /// Store the batches associated to an index. - pub(crate) batch_index_tasks: Database, - /// Store the batches containing tasks which were enqueued at a specific date - pub(crate) batch_enqueued_at: Database, - /// Store the batches containing finished tasks started at a specific date - pub(crate) batch_started_at: Database, - /// Store the batches containing tasks finished at a specific date - pub(crate) batch_finished_at: Database, + pub scheduler: scheduler::Scheduler, /// In charge of creating, opening, storing and returning indexes. pub(crate) index_mapper: IndexMapper, @@ -326,39 +139,14 @@ pub struct IndexScheduler { /// In charge of fetching and setting the status of experimental features. features: features::FeatureData, - /// Get a signal when a batch needs to be processed. - pub(crate) wake_up: Arc, - - /// Whether auto-batching is enabled or not. - pub(crate) autobatching_enabled: bool, - /// Whether we should automatically cleanup the task queue or not. pub(crate) cleanup_enabled: bool, - /// The max number of tasks allowed before the scheduler starts to delete - /// the finished tasks automatically. - pub(crate) max_number_of_tasks: usize, - - /// The maximum number of tasks that will be batched together. - pub(crate) max_number_of_batched_tasks: usize, - /// The webhook url we should send tasks to after processing every batches. pub(crate) webhook_url: Option, /// The Authorization header to send to the webhook URL. pub(crate) webhook_authorization_header: Option, - /// The path used to create the dumps. - pub(crate) dumps_path: PathBuf, - - /// The path used to create the snapshots. - pub(crate) snapshots_path: PathBuf, - - /// The path to the folder containing the auth LMDB env. - pub(crate) auth_path: PathBuf, - - /// The path to the version file of Meilisearch. - pub(crate) version_file_path: PathBuf, - embedders: Arc>>>, // ================= test @@ -367,13 +155,13 @@ pub struct IndexScheduler { /// /// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation. #[cfg(test)] - test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>, + test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, /// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler. /// /// The first field is the iteration index and the second field identifies a location in the code. #[cfg(test)] - planned_failures: Vec<(usize, tests::FailureLocation)>, + planned_failures: Vec<(usize, test_utils::FailureLocation)>, /// A counter that is incremented before every call to [`tick`](IndexScheduler::tick) #[cfg(test)] @@ -384,40 +172,12 @@ impl IndexScheduler { fn private_clone(&self) -> IndexScheduler { IndexScheduler { env: self.env.clone(), - must_stop_processing: self.must_stop_processing.clone(), processing_tasks: self.processing_tasks.clone(), - file_store: self.file_store.clone(), - all_tasks: self.all_tasks, - all_batches: self.all_batches, - batch_to_tasks_mapping: self.batch_to_tasks_mapping, - - // Tasks reverse index - status: self.status, - kind: self.kind, - index_tasks: self.index_tasks, - canceled_by: self.canceled_by, - enqueued_at: self.enqueued_at, - started_at: self.started_at, - finished_at: self.finished_at, - - // Batches reverse index - batch_status: self.batch_status, - batch_kind: self.batch_kind, - batch_index_tasks: self.batch_index_tasks, - batch_enqueued_at: self.batch_enqueued_at, - batch_started_at: self.batch_started_at, - batch_finished_at: self.batch_finished_at, + queue: self.queue.private_clone(), + scheduler: self.scheduler.private_clone(), index_mapper: self.index_mapper.clone(), - wake_up: self.wake_up.clone(), - autobatching_enabled: self.autobatching_enabled, cleanup_enabled: self.cleanup_enabled, - max_number_of_tasks: self.max_number_of_tasks, - max_number_of_batched_tasks: self.max_number_of_batched_tasks, - snapshots_path: self.snapshots_path.clone(), - dumps_path: self.dumps_path.clone(), - auth_path: self.auth_path.clone(), - version_file_path: self.version_file_path.clone(), webhook_url: self.webhook_url.clone(), webhook_authorization_header: self.webhook_authorization_header.clone(), embedders: self.embedders.clone(), @@ -430,14 +190,13 @@ impl IndexScheduler { features: self.features.clone(), } } -} -impl IndexScheduler { /// Create an index scheduler and start its run loop. + #[allow(private_interfaces)] // because test_utils is private pub fn new( options: IndexSchedulerOptions, - #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>, - #[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>, + #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, + #[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>, ) -> Result { std::fs::create_dir_all(&options.tasks_path)?; std::fs::create_dir_all(&options.update_file_path)?; @@ -469,80 +228,25 @@ impl IndexScheduler { heed::EnvOpenOptions::new() .max_dbs(19) .map_size(budget.task_db_size) - .open(options.tasks_path) + .open(&options.tasks_path) }?; let features = features::FeatureData::new(&env, options.instance_features)?; - let file_store = FileStore::new(&options.update_file_path)?; - let mut wtxn = env.write_txn()?; - let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?; - let all_batches = env.create_database(&mut wtxn, Some(db_name::ALL_BATCHES))?; - let batch_to_tasks_mapping = - env.create_database(&mut wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?; - - let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?; - let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?; - let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?; - let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?; - let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?; - let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?; - let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?; - - let batch_status = env.create_database(&mut wtxn, Some(db_name::BATCH_STATUS))?; - let batch_kind = env.create_database(&mut wtxn, Some(db_name::BATCH_KIND))?; - let batch_index_tasks = env.create_database(&mut wtxn, Some(db_name::BATCH_INDEX_TASKS))?; - let batch_enqueued_at = env.create_database(&mut wtxn, Some(db_name::BATCH_ENQUEUED_AT))?; - let batch_started_at = env.create_database(&mut wtxn, Some(db_name::BATCH_STARTED_AT))?; - let batch_finished_at = env.create_database(&mut wtxn, Some(db_name::BATCH_FINISHED_AT))?; + let queue = Queue::new(&env, &mut wtxn, &options)?; + let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?; wtxn.commit()?; // allow unreachable_code to get rids of the warning in the case of a test build. let this = Self { - must_stop_processing: MustStopProcessing::default(), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), - file_store, - all_tasks, - all_batches, - batch_to_tasks_mapping, - // Task reverse indexes - status, - kind, - index_tasks, - canceled_by, - enqueued_at, - started_at, - finished_at, + queue, + scheduler: Scheduler::new(&options), - // Batch reverse indexes - batch_status, - batch_kind, - batch_index_tasks, - batch_enqueued_at, - batch_started_at, - batch_finished_at, - - index_mapper: IndexMapper::new( - &env, - options.indexes_path, - budget.map_size, - options.index_growth_amount, - budget.index_count, - options.enable_mdb_writemap, - options.indexer_config, - )?, + index_mapper, env, - // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things - wake_up: Arc::new(SignalEvent::auto(true)), - autobatching_enabled: options.autobatching_enabled, cleanup_enabled: options.cleanup_enabled, - max_number_of_tasks: options.max_number_of_tasks, - max_number_of_batched_tasks: options.max_number_of_batched_tasks, - dumps_path: options.dumps_path, - snapshots_path: options.snapshots_path, - auth_path: options.auth_path, - version_file_path: options.version_file_path, webhook_url: options.webhook_url, webhook_authorization_header: options.webhook_authorization_header, embedders: Default::default(), @@ -563,7 +267,7 @@ impl IndexScheduler { /// Return `Ok(())` if the index scheduler is able to access one of its database. pub fn health(&self) -> Result<()> { let rtxn = self.env.read_txn()?; - self.all_tasks.first(&rtxn)?; + self.queue.batch_to_tasks_mapping.first(&rtxn)?; Ok(()) } @@ -650,15 +354,15 @@ impl IndexScheduler { .name(String::from("scheduler")) .spawn(move || { #[cfg(test)] - run.breakpoint(Breakpoint::Init); + run.breakpoint(test_utils::Breakpoint::Init); - run.wake_up.wait_timeout(std::time::Duration::from_secs(60)); + run.scheduler.wake_up.wait_timeout(std::time::Duration::from_secs(60)); loop { let ret = catch_unwind(AssertUnwindSafe(|| run.tick())); match ret { Ok(Ok(TickOutcome::TickAgain(_))) => (), - Ok(Ok(TickOutcome::WaitForSignal)) => run.wake_up.wait(), + Ok(Ok(TickOutcome::WaitForSignal)) => run.scheduler.wake_up.wait(), Ok(Err(e)) => { tracing::error!("{e}"); // Wait one second when an irrecoverable error occurs. @@ -704,14 +408,14 @@ impl IndexScheduler { /// If you need to fetch information from or perform an action on all indexes, /// see the `try_for_each_index` function. pub fn index(&self, name: &str) -> Result { - let rtxn = self.env.read_txn()?; - self.index_mapper.index(&rtxn, name) + self.index_mapper.index(&self.env.read_txn()?, name) } + /// Return the boolean referring if index exists. pub fn index_exists(&self, name: &str) -> Result { - let rtxn = self.env.read_txn()?; - self.index_mapper.index_exists(&rtxn, name) + self.index_mapper.index_exists(&self.env.read_txn()?, name) } + /// Return the name of all indexes without opening them. pub fn index_names(&self) -> Result> { let rtxn = self.env.read_txn()?; @@ -736,391 +440,6 @@ impl IndexScheduler { self.index_mapper.try_for_each_index(&rtxn, f) } - /// Return the task ids matched by the given query from the index scheduler's point of view. - pub(crate) fn get_task_ids(&self, rtxn: &RoTxn, query: &Query) -> Result { - let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } = - self.processing_tasks.read().unwrap().clone(); - let Query { - limit, - from, - reverse, - uids, - batch_uids, - statuses, - types, - index_uids, - canceled_by, - before_enqueued_at, - after_enqueued_at, - before_started_at, - after_started_at, - before_finished_at, - after_finished_at, - } = query; - - let mut tasks = self.all_task_ids(rtxn)?; - - if let Some(from) = from { - let range = if reverse.unwrap_or_default() { - u32::MIN..*from - } else { - from.saturating_add(1)..u32::MAX - }; - tasks.remove_range(range); - } - - if let Some(batch_uids) = batch_uids { - let mut batch_tasks = RoaringBitmap::new(); - for batch_uid in batch_uids { - if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { - batch_tasks |= &*processing_tasks; - } else { - batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; - } - } - tasks &= batch_tasks; - } - - if let Some(status) = statuses { - let mut status_tasks = RoaringBitmap::new(); - for status in status { - match status { - // special case for Processing tasks - Status::Processing => { - status_tasks |= &*processing_tasks; - } - status => status_tasks |= &self.get_status(rtxn, *status)?, - }; - } - if !status.contains(&Status::Processing) { - tasks -= &*processing_tasks; - } - tasks &= status_tasks; - } - - if let Some(uids) = uids { - let uids = RoaringBitmap::from_iter(uids); - tasks &= &uids; - } - - if let Some(canceled_by) = canceled_by { - let mut all_canceled_tasks = RoaringBitmap::new(); - for cancel_task_uid in canceled_by { - if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? { - all_canceled_tasks |= canceled_by_uid; - } - } - - // if the canceled_by has been specified but no task - // matches then we prefer matching zero than all tasks. - if all_canceled_tasks.is_empty() { - return Ok(RoaringBitmap::new()); - } else { - tasks &= all_canceled_tasks; - } - } - - if let Some(kind) = types { - let mut kind_tasks = RoaringBitmap::new(); - for kind in kind { - kind_tasks |= self.get_kind(rtxn, *kind)?; - } - tasks &= &kind_tasks; - } - - if let Some(index) = index_uids { - let mut index_tasks = RoaringBitmap::new(); - for index in index { - index_tasks |= self.index_tasks(rtxn, index)?; - } - tasks &= &index_tasks; - } - - // For the started_at filter, we need to treat the part of the tasks that are processing from the part of the - // tasks that are not processing. The non-processing ones are filtered normally while the processing ones - // are entirely removed unless the in-memory startedAt variable falls within the date filter. - // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. - tasks = { - let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = - (&tasks - &*processing_tasks, &tasks & &*processing_tasks); - - // special case for Processing tasks - // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds - let clear_filtered_processing_tasks = - |start: Bound, end: Bound| { - let start = map_bound(start, |b| b.unix_timestamp_nanos()); - let end = map_bound(end, |b| b.unix_timestamp_nanos()); - let is_within_dates = RangeBounds::contains( - &(start, end), - &processing_batch - .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) - .unix_timestamp_nanos(), - ); - if !is_within_dates { - filtered_processing_tasks.clear(); - } - }; - match (after_started_at, before_started_at) { - (None, None) => (), - (None, Some(before)) => { - clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before)) - } - (Some(after), None) => { - clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded) - } - (Some(after), Some(before)) => clear_filtered_processing_tasks( - Bound::Excluded(*after), - Bound::Excluded(*before), - ), - }; - - keep_ids_within_datetimes( - rtxn, - &mut filtered_non_processing_tasks, - self.started_at, - *after_started_at, - *before_started_at, - )?; - filtered_non_processing_tasks | filtered_processing_tasks - }; - - keep_ids_within_datetimes( - rtxn, - &mut tasks, - self.enqueued_at, - *after_enqueued_at, - *before_enqueued_at, - )?; - - keep_ids_within_datetimes( - rtxn, - &mut tasks, - self.finished_at, - *after_finished_at, - *before_finished_at, - )?; - - if let Some(limit) = limit { - tasks = if query.reverse.unwrap_or_default() { - tasks.into_iter().take(*limit as usize).collect() - } else { - tasks.into_iter().rev().take(*limit as usize).collect() - }; - } - - Ok(tasks) - } - - /// Return the batch ids matched by the given query from the index scheduler's point of view. - pub(crate) fn get_batch_ids( - &self, - rtxn: &RoTxn, - processing: &ProcessingTasks, - query: &Query, - ) -> Result { - let Query { - limit, - from, - reverse, - uids, - batch_uids, - statuses, - types, - index_uids, - canceled_by, - before_enqueued_at, - after_enqueued_at, - before_started_at, - after_started_at, - before_finished_at, - after_finished_at, - } = query; - - let mut batches = self.all_batch_ids(rtxn)?; - if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { - batches.insert(batch_id); - } - - if let Some(from) = from { - let range = if reverse.unwrap_or_default() { - u32::MIN..*from - } else { - from.saturating_add(1)..u32::MAX - }; - batches.remove_range(range); - } - - if let Some(batch_uids) = &batch_uids { - let batches_uids = RoaringBitmap::from_iter(batch_uids); - batches &= batches_uids; - } - - if let Some(status) = &statuses { - let mut status_batches = RoaringBitmap::new(); - for status in status { - match status { - // special case for Processing batches - Status::Processing => { - if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { - status_batches.insert(batch_id); - } - } - // Enqueued tasks are not stored in batches - Status::Enqueued => (), - status => status_batches |= &self.get_batch_status(rtxn, *status)?, - }; - } - if !status.contains(&Status::Processing) { - if let Some(ref batch) = processing.batch { - batches.remove(batch.uid); - } - } - batches &= status_batches; - } - - if let Some(task_uids) = &uids { - let mut batches_by_task_uids = RoaringBitmap::new(); - for task_uid in task_uids { - if let Some(task) = self.get_task(rtxn, *task_uid)? { - if let Some(batch_uid) = task.batch_uid { - batches_by_task_uids.insert(batch_uid); - } - } - } - batches &= batches_by_task_uids; - } - - // There is no database for this query, we must retrieve the task queried by the client and ensure it's valid - if let Some(canceled_by) = &canceled_by { - let mut all_canceled_batches = RoaringBitmap::new(); - for cancel_uid in canceled_by { - if let Some(task) = self.get_task(rtxn, *cancel_uid)? { - if task.kind.as_kind() == Kind::TaskCancelation - && task.status == Status::Succeeded - { - if let Some(batch_uid) = task.batch_uid { - all_canceled_batches.insert(batch_uid); - } - } - } - } - - // if the canceled_by has been specified but no batch - // matches then we prefer matching zero than all batches. - if all_canceled_batches.is_empty() { - return Ok(RoaringBitmap::new()); - } else { - batches &= all_canceled_batches; - } - } - - if let Some(kind) = &types { - let mut kind_batches = RoaringBitmap::new(); - for kind in kind { - kind_batches |= self.get_batch_kind(rtxn, *kind)?; - if let Some(uid) = processing - .batch - .as_ref() - .and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid)) - { - kind_batches.insert(uid); - } - } - batches &= &kind_batches; - } - - if let Some(index) = &index_uids { - let mut index_batches = RoaringBitmap::new(); - for index in index { - index_batches |= self.index_batches(rtxn, index)?; - if let Some(uid) = processing - .batch - .as_ref() - .and_then(|batch| batch.indexes.contains(index).then_some(batch.uid)) - { - index_batches.insert(uid); - } - } - batches &= &index_batches; - } - - // For the started_at filter, we need to treat the part of the batches that are processing from the part of the - // batches that are not processing. The non-processing ones are filtered normally while the processing ones - // are entirely removed unless the in-memory startedAt variable falls within the date filter. - // Once we have filtered the two subsets, we put them back together and assign it back to `batches`. - batches = { - let (mut filtered_non_processing_batches, mut filtered_processing_batches) = - (&batches - &*processing.processing, &batches & &*processing.processing); - - // special case for Processing batches - // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds - let mut clear_filtered_processing_batches = - |start: Bound, end: Bound| { - let start = map_bound(start, |b| b.unix_timestamp_nanos()); - let end = map_bound(end, |b| b.unix_timestamp_nanos()); - let is_within_dates = RangeBounds::contains( - &(start, end), - &processing - .batch - .as_ref() - .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) - .unix_timestamp_nanos(), - ); - if !is_within_dates { - filtered_processing_batches.clear(); - } - }; - match (after_started_at, before_started_at) { - (None, None) => (), - (None, Some(before)) => { - clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before)) - } - (Some(after), None) => { - clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded) - } - (Some(after), Some(before)) => clear_filtered_processing_batches( - Bound::Excluded(*after), - Bound::Excluded(*before), - ), - }; - - keep_ids_within_datetimes( - rtxn, - &mut filtered_non_processing_batches, - self.batch_started_at, - *after_started_at, - *before_started_at, - )?; - filtered_non_processing_batches | filtered_processing_batches - }; - - keep_ids_within_datetimes( - rtxn, - &mut batches, - self.batch_enqueued_at, - *after_enqueued_at, - *before_enqueued_at, - )?; - - keep_ids_within_datetimes( - rtxn, - &mut batches, - self.batch_finished_at, - *after_finished_at, - *before_finished_at, - )?; - - if let Some(limit) = limit { - batches = if query.reverse.unwrap_or_default() { - batches.into_iter().take(*limit as usize).collect() - } else { - batches.into_iter().rev().take(*limit as usize).collect() - }; - } - - Ok(batches) - } - /// Returns the total number of indexes available for the specified filter. /// And a `Vec` of the index_uid + its stats pub fn get_paginated_indexes_stats( @@ -1169,40 +488,7 @@ impl IndexScheduler { /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. /// 3. The number of times the properties appeared. pub fn get_stats(&self) -> Result>> { - let rtxn = self.read_txn()?; - - let mut res = BTreeMap::new(); - - let processing_tasks = { self.processing_tasks.read().unwrap().processing.len() }; - - res.insert( - "statuses".to_string(), - enum_iterator::all::() - .map(|s| { - let tasks = self.get_status(&rtxn, s)?.len(); - match s { - Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)), - Status::Processing => Ok((s.to_string(), processing_tasks)), - s => Ok((s.to_string(), tasks)), - } - }) - .collect::>>()?, - ); - res.insert( - "types".to_string(), - enum_iterator::all::() - .map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len()))) - .collect::>>()?, - ); - res.insert( - "indexes".to_string(), - self.index_tasks - .iter(&rtxn)? - .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?)) - .collect::>>()?, - ); - - Ok(res) + self.queue.get_stats(&self.read_txn()?, &self.processing_tasks.read().unwrap()) } // Return true if there is at least one task that is processing. @@ -1215,131 +501,11 @@ impl IndexScheduler { pub fn is_index_processing(&self, index: &str) -> Result { let rtxn = self.env.read_txn()?; let processing_tasks = self.processing_tasks.read().unwrap().processing.clone(); - let index_tasks = self.index_tasks(&rtxn, index)?; + let index_tasks = self.queue.tasks.index_tasks(&rtxn, index)?; let nbr_index_processing_tasks = processing_tasks.intersection_len(&index_tasks); Ok(nbr_index_processing_tasks > 0) } - /// Return the task ids matching the query along with the total number of tasks - /// by ignoring the from and limit parameters from the user's point of view. - /// - /// There are two differences between an internal query and a query executed by - /// the user. - /// - /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated - /// with many indexes internally. - /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. - pub fn get_task_ids_from_authorized_indexes( - &self, - rtxn: &RoTxn, - query: &Query, - filters: &meilisearch_auth::AuthFilter, - ) -> Result<(RoaringBitmap, u64)> { - // compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching - // the filter. - // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares - // us from modifying the underlying implementation, and the performance remains sufficient. - // Should this change, we would modify `get_task_ids` to directly return the number of matching tasks. - let total_tasks = self.get_task_ids(rtxn, &query.clone().without_limits())?; - let mut tasks = self.get_task_ids(rtxn, query)?; - - // If the query contains a list of index uid or there is a finite list of authorized indexes, - // then we must exclude all the kinds that aren't associated to one and only one index. - if query.index_uids.is_some() || !filters.all_indexes_authorized() { - for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { - tasks -= self.get_kind(rtxn, kind)?; - } - } - - // Any task that is internally associated with a non-authorized index - // must be discarded. - if !filters.all_indexes_authorized() { - let all_indexes_iter = self.index_tasks.iter(rtxn)?; - for result in all_indexes_iter { - let (index, index_tasks) = result?; - if !filters.is_index_authorized(index) { - tasks -= index_tasks; - } - } - } - - Ok((tasks, total_tasks.len())) - } - - /// Return the batch ids matching the query along with the total number of batches - /// by ignoring the from and limit parameters from the user's point of view. - /// - /// There are two differences between an internal query and a query executed by - /// the user. - /// - /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated - /// with many indexes internally. - /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. - fn get_batch_ids_from_authorized_indexes( - &self, - rtxn: &RoTxn, - processing: &ProcessingTasks, - query: &Query, - filters: &meilisearch_auth::AuthFilter, - ) -> Result<(RoaringBitmap, u64)> { - // compute all batches matching the filter by ignoring the limits, to find the number of batches matching - // the filter. - // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares - // us from modifying the underlying implementation, and the performance remains sufficient. - // Should this change, we would modify `get_batch_ids` to directly return the number of matching batches. - let total_batches = - self.get_batch_ids(rtxn, processing, &query.clone().without_limits())?; - let mut batches = self.get_batch_ids(rtxn, processing, query)?; - - // If the query contains a list of index uid or there is a finite list of authorized indexes, - // then we must exclude all the batches that only contains tasks associated to multiple indexes. - // This works because we don't autobatch tasks associated to multiple indexes with tasks associated - // to a single index. e.g: IndexSwap cannot be batched with IndexCreation. - if query.index_uids.is_some() || !filters.all_indexes_authorized() { - for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { - batches -= self.get_kind(rtxn, kind)?; - if let Some(batch) = processing.batch.as_ref() { - if batch.kinds.contains(&kind) { - batches.remove(batch.uid); - } - } - } - } - - // Any batch that is internally associated with at least one authorized index - // must be returned. - if !filters.all_indexes_authorized() { - let mut valid_indexes = RoaringBitmap::new(); - let mut forbidden_indexes = RoaringBitmap::new(); - - let all_indexes_iter = self.batch_index_tasks.iter(rtxn)?; - for result in all_indexes_iter { - let (index, index_tasks) = result?; - if filters.is_index_authorized(index) { - valid_indexes |= index_tasks; - } else { - forbidden_indexes |= index_tasks; - } - } - if let Some(batch) = processing.batch.as_ref() { - for index in &batch.indexes { - if filters.is_index_authorized(index) { - valid_indexes.insert(batch.uid); - } else { - forbidden_indexes.insert(batch.uid); - } - } - } - - // If a batch had ONE valid task then it should be returned - let invalid_batches = forbidden_indexes - valid_indexes; - - batches -= invalid_batches; - } - - Ok((batches, total_batches.len())) - } - /// Return the tasks matching the query from the user's point of view along /// with the total number of tasks matching the query, ignoring from and limit. /// @@ -1351,49 +517,31 @@ impl IndexScheduler { /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. pub fn get_tasks_from_authorized_indexes( &self, - query: Query, + query: &Query, filters: &meilisearch_auth::AuthFilter, ) -> Result<(Vec, u64)> { - let rtxn = self.env.read_txn()?; + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_tasks_from_authorized_indexes(&rtxn, query, filters, &processing) + } - let (tasks, total) = self.get_task_ids_from_authorized_indexes(&rtxn, &query, filters)?; - let tasks = if query.reverse.unwrap_or_default() { - Box::new(tasks.into_iter()) as Box> - } else { - Box::new(tasks.into_iter().rev()) as Box> - }; - let tasks = - self.get_existing_tasks(&rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?; - - let ProcessingTasks { batch, processing, progress } = - self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone(); - - // ignored for now, might be added to batch details later - let _ = progress; - - let ret = tasks.into_iter(); - if processing.is_empty() || batch.is_none() { - Ok((ret.collect(), total)) - } else { - // Safe because we ensured there was a batch in the previous branch - let batch = batch.unwrap(); - Ok(( - ret.map(|task| { - if processing.contains(task.uid) { - Task { - status: Status::Processing, - batch_uid: Some(batch.uid), - started_at: Some(batch.started_at), - ..task - } - } else { - task - } - }) - .collect(), - total, - )) - } + /// Return the task ids matching the query along with the total number of tasks + /// by ignoring the from and limit parameters from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub fn get_task_ids_from_authorized_indexes( + &self, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + ) -> Result<(RoaringBitmap, u64)> { + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing) } /// Return the batches matching the query from the user's point of view along @@ -1407,27 +555,31 @@ impl IndexScheduler { /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. pub fn get_batches_from_authorized_indexes( &self, - query: Query, + query: &Query, filters: &meilisearch_auth::AuthFilter, ) -> Result<(Vec, u64)> { - let rtxn = self.env.read_txn()?; - let processing = self.processing_tasks.read().unwrap().clone(); + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_batches_from_authorized_indexes(&rtxn, query, filters, &processing) + } - let (batches, total) = - self.get_batch_ids_from_authorized_indexes(&rtxn, &processing, &query, filters)?; - let batches = if query.reverse.unwrap_or_default() { - Box::new(batches.into_iter()) as Box> - } else { - Box::new(batches.into_iter().rev()) as Box> - }; - - let batches = self.get_existing_batches( - &rtxn, - &processing, - batches.take(query.limit.unwrap_or(u32::MAX) as usize), - )?; - - Ok((batches, total)) + /// Return the batch ids matching the query along with the total number of batches + /// by ignoring the from and limit parameters from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub fn get_batch_ids_from_authorized_indexes( + &self, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + ) -> Result<(RoaringBitmap, u64)> { + let rtxn = self.read_txn()?; + let processing = self.processing_tasks.read().unwrap(); + self.queue.get_batch_ids_from_authorized_indexes(&rtxn, query, filters, &processing) } /// Register a new task in the scheduler. @@ -1439,8 +591,6 @@ impl IndexScheduler { task_id: Option, dry_run: bool, ) -> Result { - let mut wtxn = self.env.write_txn()?; - // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty()) && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40 @@ -1448,64 +598,8 @@ impl IndexScheduler { return Err(Error::NoSpaceLeftInTaskQueue); } - let next_task_id = self.next_task_id(&wtxn)?; - - if let Some(uid) = task_id { - if uid < next_task_id { - return Err(Error::BadTaskId { received: uid, expected: next_task_id }); - } - } - - let mut task = Task { - uid: task_id.unwrap_or(next_task_id), - // The batch is defined once we starts processing the task - batch_uid: None, - enqueued_at: OffsetDateTime::now_utc(), - started_at: None, - finished_at: None, - error: None, - canceled_by: None, - details: kind.default_details(), - status: Status::Enqueued, - kind: kind.clone(), - }; - // For deletion and cancelation tasks, we want to make extra sure that they - // don't attempt to delete/cancel tasks that are newer than themselves. - filter_out_references_to_newer_tasks(&mut task); - // If the register task is an index swap task, verify that it is well-formed - // (that it does not contain duplicate indexes). - check_index_swap_validity(&task)?; - - // At this point the task is going to be registered and no further checks will be done - if dry_run { - return Ok(task); - } - - // Get rid of the mutability. - let task = task; - - self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?; - - for index in task.indexes() { - self.update_index(&mut wtxn, index, |bitmap| { - bitmap.insert(task.uid); - })?; - } - - self.update_status(&mut wtxn, Status::Enqueued, |bitmap| { - bitmap.insert(task.uid); - })?; - - self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { - bitmap.insert(task.uid); - })?; - - utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; - - if let Err(e) = wtxn.commit() { - self.delete_persisted_task_data(&task)?; - return Err(e.into()); - } + let mut wtxn = self.env.write_txn()?; + let task = self.queue.register(&mut wtxn, &kind, task_id, dry_run)?; // If the registered task is a task cancelation // we inform the processing tasks to stop (if necessary). @@ -1513,13 +607,17 @@ impl IndexScheduler { let tasks_to_cancel = RoaringBitmap::from_iter(tasks); if self.processing_tasks.read().unwrap().must_cancel_processing_tasks(&tasks_to_cancel) { - self.must_stop_processing.must_stop(); + self.scheduler.must_stop_processing.must_stop(); } } - // notify the scheduler loop to execute a new tick - self.wake_up.signal(); + if let Err(e) = wtxn.commit() { + self.queue.delete_persisted_task_data(&task)?; + return Err(e.into()); + } + // notify the scheduler loop to execute a new tick + self.scheduler.wake_up.signal(); Ok(task) } @@ -1553,263 +651,6 @@ impl IndexScheduler { Ok(()) } - /// Create a file and register it in the index scheduler. - /// - /// The returned file and uuid can be used to associate - /// some data to a task. The file will be kept until - /// the task has been fully processed. - pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { - if dry_run { - Ok((Uuid::nil(), file_store::File::dry_file()?)) - } else { - Ok(self.file_store.new_update()?) - } - } - - #[cfg(test)] - pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { - Ok(self.file_store.new_update_with_uuid(uuid)?) - } - - /// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes. - pub fn compute_update_file_size(&self) -> Result { - Ok(self.file_store.compute_total_size()?) - } - - /// Delete a file from the index scheduler. - /// - /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. - pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> { - Ok(self.file_store.delete(uuid)?) - } - - /// Perform one iteration of the run loop. - /// - /// 1. See if we need to cleanup the task queue - /// 2. Find the next batch of tasks to be processed. - /// 3. Update the information of these tasks following the start of their processing. - /// 4. Update the in-memory list of processed tasks accordingly. - /// 5. Process the batch: - /// - perform the actions of each batched task - /// - update the information of each batched task following the end - /// of their processing. - /// 6. Reset the in-memory list of processed tasks. - /// - /// Returns the number of processed tasks. - fn tick(&self) -> Result { - #[cfg(test)] - { - *self.run_loop_iteration.write().unwrap() += 1; - self.breakpoint(Breakpoint::Start); - } - - if self.cleanup_enabled { - self.cleanup_task_queue()?; - } - - let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; - let (batch, mut processing_batch) = - match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { - Some(batch) => batch, - None => return Ok(TickOutcome::WaitForSignal), - }; - let index_uid = batch.index_uid().map(ToOwned::to_owned); - drop(rtxn); - - // 1. store the starting date with the bitmap of processing tasks. - let mut ids = batch.ids(); - let processed_tasks = ids.len(); - - // We reset the must_stop flag to be sure that we don't stop processing tasks - self.must_stop_processing.reset(); - let progress = self - .processing_tasks - .write() - .unwrap() - // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches - .start_processing(processing_batch.clone(), ids.clone()); - - #[cfg(test)] - self.breakpoint(Breakpoint::BatchCreated); - - // 2. Process the tasks - let res = { - let cloned_index_scheduler = self.private_clone(); - let processing_batch = &mut processing_batch; - let progress = progress.clone(); - std::thread::scope(|s| { - let handle = std::thread::Builder::new() - .name(String::from("batch-operation")) - .spawn_scoped(s, move || { - cloned_index_scheduler.process_batch(batch, processing_batch, progress) - }) - .unwrap(); - handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) - }) - }; - - // Reset the currently updating index to relinquish the index handle - self.index_mapper.set_currently_updating_index(None); - - #[cfg(test)] - self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; - - progress.update_progress(BatchProgress::WritingTasksToDisk); - processing_batch.finished(); - let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; - let mut canceled = RoaringBitmap::new(); - - match res { - Ok(tasks) => { - #[cfg(test)] - self.breakpoint(Breakpoint::ProcessBatchSucceeded); - - let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); - progress.update_progress(task_progress_obj); - let mut success = 0; - let mut failure = 0; - let mut canceled_by = None; - - #[allow(unused_variables)] - for (i, mut task) in tasks.into_iter().enumerate() { - task_progress.fetch_add(1, Ordering::Relaxed); - processing_batch.update(&mut task); - if task.status == Status::Canceled { - canceled.insert(task.uid); - canceled_by = task.canceled_by; - } - - #[cfg(test)] - self.maybe_fail( - tests::FailureLocation::UpdatingTaskAfterProcessBatchSuccess { - task_uid: i as u32, - }, - )?; - - match task.error { - Some(_) => failure += 1, - None => success += 1, - } - - self.update_task(&mut wtxn, &task) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; - } - if let Some(canceled_by) = canceled_by { - self.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?; - } - tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks."); - } - // If we have an abortion error we must stop the tick here and re-schedule tasks. - Err(Error::Milli { - error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), - .. - }) - | Err(Error::AbortedTask) => { - #[cfg(test)] - self.breakpoint(Breakpoint::AbortedIndexation); - wtxn.abort(); - - tracing::info!("A batch of tasks was aborted."); - // We make sure that we don't call `stop_processing` on the `processing_tasks`, - // this is because we want to let the next tick call `create_next_batch` and keep - // the `started_at` date times and `processings` of the current processing tasks. - // This date time is used by the task cancelation to store the right `started_at` - // date in the task on disk. - return Ok(TickOutcome::TickAgain(0)); - } - // If an index said it was full, we need to: - // 1. identify which index is full - // 2. close the associated environment - // 3. resize it - // 4. re-schedule tasks - Err(Error::Milli { - error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), - .. - }) if index_uid.is_some() => { - // fixme: add index_uid to match to avoid the unwrap - let index_uid = index_uid.unwrap(); - // fixme: handle error more gracefully? not sure when this could happen - self.index_mapper.resize_index(&wtxn, &index_uid)?; - wtxn.abort(); - - tracing::info!("The max database size was reached. Resizing the index."); - - return Ok(TickOutcome::TickAgain(0)); - } - // In case of a failure we must get back and patch all the tasks with the error. - Err(err) => { - #[cfg(test)] - self.breakpoint(Breakpoint::ProcessBatchFailed); - let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); - progress.update_progress(task_progress_obj); - - let error: ResponseError = err.into(); - for id in ids.iter() { - task_progress.fetch_add(1, Ordering::Relaxed); - let mut task = self - .get_task(&wtxn, id) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? - .ok_or(Error::CorruptedTaskQueue)?; - task.status = Status::Failed; - task.error = Some(error.clone()); - task.details = task.details.map(|d| d.to_failed()); - processing_batch.update(&mut task); - - #[cfg(test)] - self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; - - tracing::error!("Batch failed {}", error); - - self.update_task(&mut wtxn, &task) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; - } - } - } - - // We must re-add the canceled task so they're part of the same batch. - ids |= canceled; - self.write_batch(&mut wtxn, processing_batch, &ids)?; - - #[cfg(test)] - self.maybe_fail(tests::FailureLocation::CommittingWtxn)?; - - wtxn.commit().map_err(Error::HeedTransaction)?; - - // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task - // and then become « not found » for some time until the commit everything is written and the final commit is made. - self.processing_tasks.write().unwrap().stop_processing(); - - // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart - tracing::debug!("Deleting the update files"); - - //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap - let idx = AtomicU32::new(0); - (0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> { - let rtxn = self.read_txn()?; - while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) { - let task = self - .get_task(&rtxn, id) - .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? - .ok_or(Error::CorruptedTaskQueue)?; - if let Err(e) = self.delete_persisted_task_data(&task) { - tracing::error!( - "Failure to delete the content files associated with task {}. Error: {e}", - task.uid - ); - } - } - Ok(()) - })?; - - // We shouldn't crash the tick function if we can't send data to the webhook. - let _ = self.notify_webhook(&ids); - - #[cfg(test)] - self.breakpoint(Breakpoint::AfterProcessing); - - Ok(TickOutcome::TickAgain(processed_tasks)) - } - /// Once the tasks changes have been committed we must send all the tasks that were updated to our webhook if there is one. fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> { if let Some(ref url) = self.webhook_url { @@ -1829,6 +670,8 @@ impl IndexScheduler { Some(task_id) => { let task = self .index_scheduler + .queue + .tasks .get_task(self.rtxn, task_id) .map_err(|err| io::Error::new(io::ErrorKind::Other, err))? .ok_or_else(|| { @@ -1890,59 +733,6 @@ impl IndexScheduler { Ok(()) } - /// Register a task to cleanup the task queue if needed - fn cleanup_task_queue(&self) -> Result<()> { - let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; - - let nb_tasks = self.all_task_ids(&rtxn)?.len(); - // if we have less than 1M tasks everything is fine - if nb_tasks < self.max_number_of_tasks as u64 { - return Ok(()); - } - - let finished = self.status.get(&rtxn, &Status::Succeeded)?.unwrap_or_default() - | self.status.get(&rtxn, &Status::Failed)?.unwrap_or_default() - | self.status.get(&rtxn, &Status::Canceled)?.unwrap_or_default(); - - let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000)); - - // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete - // the deletion tasks we enqueued ourselves. - if to_delete.len() < 2 { - tracing::warn!("The task queue is almost full, but no task can be deleted yet."); - // the only thing we can do is hope that the user tasks are going to finish - return Ok(()); - } - - tracing::info!( - "The task queue is almost full. Deleting the oldest {} finished tasks.", - to_delete.len() - ); - - // it's safe to unwrap here because we checked the len above - let newest_task_id = to_delete.iter().last().unwrap(); - let last_task_to_delete = - self.get_task(&rtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?; - drop(rtxn); - - // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. - let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); - - self.register( - KindWithContent::TaskDeletion { - query: format!( - "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", - delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, - ), - tasks: to_delete, - }, - None, - false, - )?; - - Ok(()) - } - pub fn index_stats(&self, index_uid: &str) -> Result { let is_indexing = self.is_index_processing(index_uid)?; let rtxn = self.read_txn()?; @@ -1961,13 +751,6 @@ impl IndexScheduler { Ok(()) } - pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { - match task.content_uuid() { - Some(content_file) => self.delete_update_file(content_file), - None => Ok(()), - } - } - // TODO: consider using a type alias or a struct embedder/template pub fn embedders( &self, @@ -2017,223 +800,6 @@ impl IndexScheduler { .collect(); res.map(EmbeddingConfigs::new) } - - /// Blocks the thread until the test handle asks to progress to/through this breakpoint. - /// - /// Two messages are sent through the channel for each breakpoint. - /// The first message is `(b, false)` and the second message is `(b, true)`. - /// - /// Since the channel has a capacity of zero, the `send` and `recv` calls wait for each other. - /// So when the index scheduler calls `test_breakpoint_sdr.send(b, false)`, it blocks - /// the thread until the test catches up by calling `test_breakpoint_rcv.recv()` enough. - /// From the test side, we call `recv()` repeatedly until we find the message `(breakpoint, false)`. - /// As soon as we find it, the index scheduler is unblocked but then wait again on the call to - /// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the - /// test asks to progress to the next `(b2, false)`. - #[cfg(test)] - fn breakpoint(&self, b: Breakpoint) { - // We send two messages. The first one will sync with the call - // to `handle.wait_until(b)`. The second one will block until the - // the next call to `handle.wait_until(..)`. - self.test_breakpoint_sdr.send((b, false)).unwrap(); - // This one will only be able to be sent if the test handle stays alive. - // If it fails, then it means that we have exited the test. - // By crashing with `unwrap`, we kill the run loop. - self.test_breakpoint_sdr.send((b, true)).unwrap(); - } -} - -pub struct Dump<'a> { - index_scheduler: &'a IndexScheduler, - wtxn: RwTxn<'a>, - - indexes: HashMap, - statuses: HashMap, - kinds: HashMap, -} - -impl<'a> Dump<'a> { - pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result { - // While loading a dump no one should be able to access the scheduler thus I can block everything. - let wtxn = index_scheduler.env.write_txn()?; - - Ok(Dump { - index_scheduler, - wtxn, - indexes: HashMap::new(), - statuses: HashMap::new(), - kinds: HashMap::new(), - }) - } - - /// Register a new task coming from a dump in the scheduler. - /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. - pub fn register_dumped_task( - &mut self, - task: TaskDump, - content_file: Option>, - ) -> Result { - let content_uuid = match content_file { - Some(content_file) if task.status == Status::Enqueued => { - let (uuid, mut file) = self.index_scheduler.create_update_file(false)?; - let mut builder = DocumentsBatchBuilder::new(&mut file); - for doc in content_file { - builder.append_json_object(&doc?)?; - } - builder.into_inner()?; - file.persist()?; - - Some(uuid) - } - // If the task isn't `Enqueued` then just generate a recognisable `Uuid` - // in case we try to open it later. - _ if task.status != Status::Enqueued => Some(Uuid::nil()), - _ => None, - }; - - let task = Task { - uid: task.uid, - batch_uid: task.batch_uid, - enqueued_at: task.enqueued_at, - started_at: task.started_at, - finished_at: task.finished_at, - error: task.error, - canceled_by: task.canceled_by, - details: task.details, - status: task.status, - kind: match task.kind { - KindDump::DocumentImport { - primary_key, - method, - documents_count, - allow_index_creation, - } => KindWithContent::DocumentAdditionOrUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - method, - content_file: content_uuid.ok_or(Error::CorruptedDump)?, - documents_count, - allow_index_creation, - }, - KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { - documents_ids, - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::DocumentDeletionByFilter { filter } => { - KindWithContent::DocumentDeletionByFilter { - filter_expr: filter, - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - } - } - KindDump::DocumentEdition { filter, context, function } => { - KindWithContent::DocumentEdition { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - filter_expr: filter, - context, - function, - } - } - KindDump::DocumentClear => KindWithContent::DocumentClear { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::Settings { settings, is_deletion, allow_index_creation } => { - KindWithContent::SettingsUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - new_settings: settings, - is_deletion, - allow_index_creation, - } - } - KindDump::IndexDeletion => KindWithContent::IndexDeletion { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - }, - KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - }, - KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, - KindDump::TaskCancelation { query, tasks } => { - KindWithContent::TaskCancelation { query, tasks } - } - KindDump::TasksDeletion { query, tasks } => { - KindWithContent::TaskDeletion { query, tasks } - } - KindDump::DumpCreation { keys, instance_uid } => { - KindWithContent::DumpCreation { keys, instance_uid } - } - KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, - }, - }; - - self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; - - for index in task.indexes() { - match self.indexes.get_mut(index) { - Some(bitmap) => { - bitmap.insert(task.uid); - } - None => { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(task.uid); - self.indexes.insert(index.to_string(), bitmap); - } - }; - } - - utils::insert_task_datetime( - &mut self.wtxn, - self.index_scheduler.enqueued_at, - task.enqueued_at, - task.uid, - )?; - - // we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change - if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) { - if let Some(started_at) = task.started_at { - utils::insert_task_datetime( - &mut self.wtxn, - self.index_scheduler.started_at, - started_at, - task.uid, - )?; - } - if let Some(finished_at) = task.finished_at { - utils::insert_task_datetime( - &mut self.wtxn, - self.index_scheduler.finished_at, - finished_at, - task.uid, - )?; - } - } - - self.statuses.entry(task.status).or_default().insert(task.uid); - self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid); - - Ok(task) - } - - /// Commit all the changes and exit the importing dump state - pub fn finish(mut self) -> Result<()> { - for (index, bitmap) in self.indexes { - self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; - } - for (status, bitmap) in self.statuses { - self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?; - } - for (kind, bitmap) in self.kinds { - self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?; - } - - self.wtxn.commit()?; - self.index_scheduler.wake_up.signal(); - - Ok(()) - } } /// The outcome of calling the [`IndexScheduler::tick`] function. @@ -2266,4685 +832,3 @@ pub struct IndexStats { /// Internal stats computed from the index. pub inner_stats: index_mapper::IndexStats, } - -#[cfg(test)] -mod tests { - use std::io::{BufWriter, Write}; - use std::time::Instant; - - use big_s::S; - use crossbeam_channel::RecvTimeoutError; - use file_store::File; - use insta::assert_json_snapshot; - use maplit::btreeset; - use meili_snap::{json_string, snapshot}; - use meilisearch_auth::AuthFilter; - use meilisearch_types::document_formats::DocumentFormatError; - use meilisearch_types::error::ErrorCode; - use meilisearch_types::index_uid_pattern::IndexUidPattern; - use meilisearch_types::milli::obkv_to_json; - use meilisearch_types::milli::update::IndexDocumentsMethod::{ - ReplaceDocuments, UpdateDocuments, - }; - use meilisearch_types::milli::update::Setting; - use meilisearch_types::milli::vector::settings::EmbeddingSettings; - use meilisearch_types::settings::Unchecked; - use meilisearch_types::tasks::IndexSwap; - use meilisearch_types::VERSION_FILE_NAME; - use tempfile::{NamedTempFile, TempDir}; - use time::Duration; - use uuid::Uuid; - use Breakpoint::*; - - use super::*; - use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; - - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - pub enum FailureLocation { - InsideCreateBatch, - InsideProcessBatch, - PanicInsideProcessBatch, - AcquiringWtxn, - UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, - UpdatingTaskAfterProcessBatchFailure, - CommittingWtxn, - } - - impl IndexScheduler { - pub fn test( - autobatching_enabled: bool, - planned_failures: Vec<(usize, FailureLocation)>, - ) -> (Self, IndexSchedulerHandle) { - Self::test_with_custom_config(planned_failures, |config| { - config.autobatching_enabled = autobatching_enabled; - }) - } - - pub fn test_with_custom_config( - planned_failures: Vec<(usize, FailureLocation)>, - configuration: impl Fn(&mut IndexSchedulerOptions), - ) -> (Self, IndexSchedulerHandle) { - let tempdir = TempDir::new().unwrap(); - let (sender, receiver) = crossbeam_channel::bounded(0); - - let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() }; - - let mut options = IndexSchedulerOptions { - version_file_path: tempdir.path().join(VERSION_FILE_NAME), - auth_path: tempdir.path().join("auth"), - tasks_path: tempdir.path().join("db_path"), - update_file_path: tempdir.path().join("file_store"), - indexes_path: tempdir.path().join("indexes"), - snapshots_path: tempdir.path().join("snapshots"), - dumps_path: tempdir.path().join("dumps"), - webhook_url: None, - webhook_authorization_header: None, - task_db_size: 1000 * 1000 * 10, // 10 MB, we don't use MiB on purpose. - index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. - enable_mdb_writemap: false, - index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB - index_count: 5, - indexer_config, - autobatching_enabled: true, - cleanup_enabled: true, - max_number_of_tasks: 1_000_000, - max_number_of_batched_tasks: usize::MAX, - instance_features: Default::default(), - }; - configuration(&mut options); - - let index_scheduler = Self::new(options, sender, planned_failures).unwrap(); - - // To be 100% consistent between all test we're going to start the scheduler right now - // and ensure it's in the expected starting state. - let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) { - Ok(b) => b, - Err(RecvTimeoutError::Timeout) => { - panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") - } - Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), - }; - assert_eq!(breakpoint, (Init, false)); - let index_scheduler_handle = IndexSchedulerHandle { - _tempdir: tempdir, - index_scheduler: index_scheduler.private_clone(), - test_breakpoint_rcv: receiver, - last_breakpoint: breakpoint.0, - }; - - (index_scheduler, index_scheduler_handle) - } - - /// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned - /// for the given location and current run loop iteration. - pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> { - if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location)) - { - match location { - FailureLocation::PanicInsideProcessBatch => { - panic!("simulated panic") - } - _ => Err(Error::PlannedFailure), - } - } else { - Ok(()) - } - } - } - - /// Return a `KindWithContent::IndexCreation` task - fn index_creation_task(index: &'static str, primary_key: &'static str) -> KindWithContent { - KindWithContent::IndexCreation { index_uid: S(index), primary_key: Some(S(primary_key)) } - } - /// Create a `KindWithContent::DocumentImport` task that imports documents. - /// - /// - `index_uid` is given as parameter - /// - `primary_key` is given as parameter - /// - `method` is set to `ReplaceDocuments` - /// - `content_file` is given as parameter - /// - `documents_count` is given as parameter - /// - `allow_index_creation` is set to `true` - fn replace_document_import_task( - index: &'static str, - primary_key: Option<&'static str>, - content_file_uuid: u128, - documents_count: u64, - ) -> KindWithContent { - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S(index), - primary_key: primary_key.map(ToOwned::to_owned), - method: ReplaceDocuments, - content_file: Uuid::from_u128(content_file_uuid), - documents_count, - allow_index_creation: true, - } - } - - /// Adapting to the new json reading interface - pub fn read_json( - bytes: &[u8], - write: impl Write, - ) -> std::result::Result { - let temp_file = NamedTempFile::new().unwrap(); - let mut buffer = BufWriter::new(temp_file.reopen().unwrap()); - buffer.write_all(bytes).unwrap(); - buffer.flush().unwrap(); - meilisearch_types::document_formats::read_json(temp_file.as_file(), write) - } - - /// Create an update file with the given file uuid. - /// - /// The update file contains just one simple document whose id is given by `document_id`. - /// - /// The uuid of the file and its documents count is returned. - fn sample_documents( - index_scheduler: &IndexScheduler, - file_uuid: u128, - document_id: usize, - ) -> (File, u64) { - let content = format!( - r#" - {{ - "id" : "{document_id}" - }}"# - ); - - let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - (file, documents_count) - } - - pub struct IndexSchedulerHandle { - _tempdir: TempDir, - index_scheduler: IndexScheduler, - test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>, - last_breakpoint: Breakpoint, - } - - impl IndexSchedulerHandle { - /// Advance the scheduler to the next tick. - /// Panic - /// * If the scheduler is waiting for a task to be registered. - /// * If the breakpoint queue is in a bad state. - #[track_caller] - fn advance(&mut self) -> Breakpoint { - let (breakpoint_1, b) = match self - .test_breakpoint_rcv - .recv_timeout(std::time::Duration::from_secs(50)) - { - Ok(b) => b, - Err(RecvTimeoutError::Timeout) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") - } - Err(RecvTimeoutError::Disconnected) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler crashed.\n{state}") - } - }; - // if we've already encountered a breakpoint we're supposed to be stuck on the false - // and we expect the same variant with the true to come now. - assert_eq!( - (breakpoint_1, b), - (self.last_breakpoint, true), - "Internal error in the test suite. In the previous iteration I got `({:?}, false)` and now I got `({:?}, {:?})`.", - self.last_breakpoint, - breakpoint_1, - b, - ); - - let (breakpoint_2, b) = match self - .test_breakpoint_rcv - .recv_timeout(std::time::Duration::from_secs(50)) - { - Ok(b) => b, - Err(RecvTimeoutError::Timeout) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") - } - Err(RecvTimeoutError::Disconnected) => { - let state = snapshot_index_scheduler(&self.index_scheduler); - panic!("The scheduler crashed.\n{state}") - } - }; - assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); - - self.last_breakpoint = breakpoint_2; - - breakpoint_2 - } - - /// Advance the scheduler until all the provided breakpoints are reached in order. - #[track_caller] - fn advance_till(&mut self, breakpoints: impl IntoIterator) { - for breakpoint in breakpoints { - let b = self.advance(); - assert_eq!( - b, - breakpoint, - "Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{}", - breakpoint, - b, - snapshot_index_scheduler(&self.index_scheduler) - ); - } - } - - /// Wait for `n` successful batches. - #[track_caller] - fn advance_n_successful_batches(&mut self, n: usize) { - for _ in 0..n { - self.advance_one_successful_batch(); - } - } - - /// Wait for `n` failed batches. - #[track_caller] - fn advance_n_failed_batches(&mut self, n: usize) { - for _ in 0..n { - self.advance_one_failed_batch(); - } - } - - // Wait for one successful batch. - #[track_caller] - fn advance_one_successful_batch(&mut self) { - self.advance_till([Start, BatchCreated]); - loop { - match self.advance() { - // the process_batch function can call itself recursively, thus we need to - // accept as may InsideProcessBatch as possible before moving to the next state. - InsideProcessBatch => (), - // the batch went successfully, we can stop the loop and go on with the next states. - ProcessBatchSucceeded => break, - AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), - ProcessBatchFailed => { - while self.advance() != Start {} - panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler)) - }, - breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), - } - } - - self.advance_till([AfterProcessing]); - } - - // Wait for one failed batch. - #[track_caller] - fn advance_one_failed_batch(&mut self) { - self.advance_till([Start, BatchCreated]); - loop { - match self.advance() { - // the process_batch function can call itself recursively, thus we need to - // accept as may InsideProcessBatch as possible before moving to the next state. - InsideProcessBatch => (), - // the batch went failed, we can stop the loop and go on with the next states. - ProcessBatchFailed => break, - ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)), - AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), - breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), - } - } - self.advance_till([AfterProcessing]); - } - } - - #[test] - fn register() { - // In this test, the handle doesn't make any progress, we only check that the tasks are registered - let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]); - - let kinds = [ - index_creation_task("catto", "mouse"), - replace_document_import_task("catto", None, 0, 12), - replace_document_import_task("catto", None, 1, 50), - replace_document_import_task("doggo", Some("bone"), 2, 5000), - ]; - let (_, file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - file.persist().unwrap(); - let (_, file) = index_scheduler.create_update_file_with_uuid(1).unwrap(); - file.persist().unwrap(); - let (_, file) = index_scheduler.create_update_file_with_uuid(2).unwrap(); - file.persist().unwrap(); - - for (idx, kind) in kinds.into_iter().enumerate() { - let k = kind.as_kind(); - let task = index_scheduler.register(kind, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - - assert_eq!(task.uid, idx as u32); - assert_eq!(task.status, Status::Enqueued); - assert_eq!(task.kind.as_kind(), k); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered"); - } - - #[test] - fn insert_task_while_another_task_is_processing() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start, BatchCreated]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); - - // while the task is processing can we register another task? - index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - } - - #[test] - fn test_task_is_processing() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); - - handle.advance_till([Start, BatchCreated]); - assert!(index_scheduler.is_task_processing().unwrap()); - } - - /// We send a lot of tasks but notify the tasks scheduler only once as - /// we send them very fast, we must make sure that they are all processed. - #[test] - fn process_tasks_inserted_without_new_signal() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_second_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_third_task"); - } - - #[test] - fn process_tasks_without_autobatching() { - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth"); - } - - #[test] - fn task_deletion_undeleteable() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file0.persist().unwrap(); - file1.persist().unwrap(); - - let to_enqueue = [ - index_creation_task("catto", "mouse"), - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("doggo", Some("bone"), 1, documents_count1), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - - // here we have registered all the tasks, but the index scheduler - // has not progressed at all - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - - index_scheduler - .register( - KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1]), - }, - None, - false, - ) - .unwrap(); - // again, no progress made at all, but one more task is registered - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); - - // now we create the first batch - handle.advance_till([Start, BatchCreated]); - - // the task deletion should now be "processing" - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing"); - - handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); - // after the task deletion is processed, no task should actually have been deleted, - // because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable - // the "task deletion" task should be marked as "succeeded" and, in its details, the - // number of deleted tasks should be 0 - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done"); - } - - #[test] - fn task_deletion_deleteable() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file0.persist().unwrap(); - file1.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("doggo", Some("bone"), 1, documents_count1), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - - handle.advance_one_successful_batch(); - // first addition of documents should be successful - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); - - // Now we delete the first task - index_scheduler - .register( - KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); - } - - #[test] - fn task_deletion_delete_same_task_twice() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file0.persist().unwrap(); - file1.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("doggo", Some("bone"), 1, documents_count1), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - - handle.advance_one_successful_batch(); - // first addition of documents should be successful - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); - - // Now we delete the first task multiple times in a row - for _ in 0..2 { - index_scheduler - .register( - KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - handle.advance_one_successful_batch(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); - } - - #[test] - fn document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); - - handle.advance_till([Start, BatchCreated]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_batch_creation"); - - handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "once_everything_is_processed"); - } - - #[test] - fn document_addition_and_index_deletion() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_one_successful_batch(); // The index creation. - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "before_index_creation"); - handle.advance_one_successful_batch(); // // after the execution of the two tasks in a single batch. - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded"); - } - - #[test] - fn document_addition_and_document_deletion() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#"[ - { "id": 1, "doggo": "jean bob" }, - { "id": 2, "catto": "jorts" }, - { "id": 3, "doggo": "bork" } - ]"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch"); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn document_deletion_and_document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - let content = r#"[ - { "id": 1, "doggo": "jean bob" }, - { "id": 2, "catto": "jorts" }, - { "id": 3, "doggo": "bork" } - ]"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - - // The deletion should have failed because it can't create an index - handle.advance_one_failed_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion"); - - // The addition should works - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition"); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn fail_in_process_batch_for_document_deletion() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - use meilisearch_types::settings::{Settings, Unchecked}; - let mut new_settings: Box> = Box::default(); - new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); - - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings, - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - let content = r#"[ - { "id": 1, "doggo": "jean bob" }, - { "id": 2, "catto": "jorts" }, - { "id": 3, "doggo": "bork" } - ]"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents"); - - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1")], - }, - None, - false, - ) - .unwrap(); - // This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens - index_scheduler - .register( - KindWithContent::DocumentDeletionByFilter { - index_uid: S("doggos"), - filter_expr: serde_json::json!(true), - }, - None, - false, - ) - .unwrap(); - // Should fail because the ids are not filterable - index_scheduler - .register( - KindWithContent::DocumentDeletionByFilter { - index_uid: S("doggos"), - filter_expr: serde_json::json!("id = 2"), - }, - None, - false, - ) - .unwrap(); - index_scheduler - .register( - KindWithContent::DocumentDeletionByFilter { - index_uid: S("doggos"), - filter_expr: serde_json::json!("catto EXISTS"), - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions"); - - // Everything should be batched together - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents"); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork"); - } - - #[test] - fn do_not_batch_task_of_different_indexes() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - let index_names = ["doggos", "cattos", "girafos"]; - - for name in index_names { - index_scheduler - .register( - KindWithContent::IndexCreation { - index_uid: name.to_string(), - primary_key: None, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - for name in index_names { - index_scheduler - .register( - KindWithContent::DocumentClear { index_uid: name.to_string() }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - for _ in 0..(index_names.len() * 2) { - handle.advance_one_successful_batch(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - } - - #[test] - fn swap_indexes() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let to_enqueue = [ - index_creation_task("a", "id"), - index_creation_task("b", "id"), - index_creation_task("c", "id"), - index_creation_task("d", "id"), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_a"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_b"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_c"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); - - index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, - ], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); - index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); - - index_scheduler - .register(KindWithContent::IndexSwap { swaps: vec![] }, None, false) - .unwrap(); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); - } - - #[test] - fn swap_indexes_errors() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let to_enqueue = [ - index_creation_task("a", "id"), - index_creation_task("b", "id"), - index_creation_task("c", "id"), - index_creation_task("d", "id"), - ]; - - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - handle.advance_n_successful_batches(4); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_index_creation"); - - let first_snap = snapshot_index_scheduler(&index_scheduler); - snapshot!(first_snap, name: "initial_tasks_processed"); - - let err = index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, - ], - }, - None, - false, - ) - .unwrap_err(); - snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); - - let second_snap = snapshot_index_scheduler(&index_scheduler); - assert_eq!(first_snap, second_snap); - - // Index `e` does not exist, but we don't check its existence yet - index_scheduler - .register( - KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, - IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, - ], - }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - // Now the first swap should have an error message saying `e` and `f` do not exist - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_failed"); - } - - #[test] - fn document_addition_and_index_deletion_on_unexisting_index() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - handle.advance_n_successful_batches(1); - - snapshot!(snapshot_index_scheduler(&index_scheduler)); - } - - #[test] - fn cancel_enqueued_task() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - ]; - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn cancel_succeeded_task() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - - let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); - - index_scheduler - .register( - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn cancel_processing_task() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - - let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); - - index_scheduler - .register( - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); - // Now we check that we can reach the AbortedIndexation error handling - handle.advance_till([AbortedIndexation]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); - - // handle.advance_till([Start, BatchCreated, BeforeProcessing, AfterProcessing]); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn cancel_mix_of_tasks() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); - file0.persist().unwrap(); - let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); - file1.persist().unwrap(); - let (file2, documents_count2) = sample_documents(&index_scheduler, 2, 2); - file2.persist().unwrap(); - - let to_enqueue = [ - replace_document_import_task("catto", None, 0, documents_count0), - replace_document_import_task("beavero", None, 1, documents_count1), - replace_document_import_task("wolfo", None, 2, documents_count2), - ]; - for task in to_enqueue { - let _ = index_scheduler.register(task, None, false).unwrap(); - index_scheduler.assert_internally_consistent(); - } - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_processed"); - - handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - index_scheduler - .register( - KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1, 2]), - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); - - handle.advance_till([AbortedIndexation]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn test_document_replace() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // everything should be batched together. - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_update() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // everything should be batched together. - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler)); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_mixed_document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let method = if i % 2 == 0 { UpdateDocuments } else { ReplaceDocuments }; - - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Only half of the task should've been processed since we can't autobatch replace and update together. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_settings_update() { - use meilisearch_types::settings::{Settings, Unchecked}; - use milli::update::Setting; - - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let mut new_settings: Box> = Box::default(); - let mut embedders = BTreeMap::default(); - let embedding_settings = milli::vector::settings::EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), - api_key: Setting::Set(S("My super secret")), - url: Setting::Set(S("http://localhost:7777")), - dimensions: Setting::Set(4), - request: Setting::Set(serde_json::json!("{{text}}")), - response: Setting::Set(serde_json::json!("{{embedding}}")), - ..Default::default() - }; - embedders.insert(S("default"), Setting::Set(embedding_settings)); - new_settings.embedders = Setting::Set(embedders); - - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings, - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - let configs = index.embedding_configs(&rtxn).unwrap(); - let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap(); - insta::assert_snapshot!(name, @"default"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - insta::assert_json_snapshot!(config.embedder_options); - } - - #[test] - fn test_document_replace_without_autobatching() { - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_update_without_autobatching() { - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[macro_export] - macro_rules! debug_snapshot { - ($value:expr, @$snapshot:literal) => {{ - let value = format!("{:?}", $value); - meili_snap::snapshot!(value, @$snapshot); - }}; - } - - #[test] - fn simple_new() { - crate::IndexScheduler::test(true, vec![]); - } - - #[test] - fn query_tasks_from_and_limit() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_n_successful_batches(3); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let query = Query { limit: Some(0), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { limit: Some(1), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { limit: Some(2), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); - - let query = Query { from: Some(1), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - - let query = Query { from: Some(2), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); - - let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[1,]"); - - let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - } - - #[test] - fn query_tasks_simple() { - let start_time = OffsetDateTime::now_utc(); - - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick - - let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test, which should excludes the enqueued tasks - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should excludes all of them - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should exclude the enqueued tasks and include the only processing task - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); - - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let second_start_time = OffsetDateTime::now_utc(); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should include all tasks - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the second part of the test and before one minute after the - // second start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // we run the same query to verify that, and indeed find that the last task is matched - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // enqueued, succeeded, or processing tasks started after the second part of the test, should - // again only return the last task - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - handle.advance_till([ProcessBatchFailed, AfterProcessing]); - let rtxn = index_scheduler.read_txn().unwrap(); - - // now the last task should have failed - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // so running the last query should return nothing - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![1]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // same query but with an invalid uid - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![2]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // same query but with a valid uid - snapshot!(snapshot_bitmap(&tasks), @"[2,]"); - } - - #[test] - fn query_tasks_special_rules() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // only the first task associated with catto is returned, the indexSwap tasks are excluded! - snapshot!(snapshot_bitmap(&tasks), @"[0,]"); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks - // associated with doggo -> empty result - snapshot!(snapshot_bitmap(&tasks), @"[]"); - - let query = Query::default(); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks - // -> only the index creation of doggo should be returned - snapshot!(snapshot_bitmap(&tasks), @"[1,]"); - - let query = Query::default(); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![ - IndexUidPattern::new_unchecked("catto"), - IndexUidPattern::new_unchecked("doggo"), - ] - .into_iter() - .collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks - // -> all tasks except the swap of catto with whalo are returned - snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); - - let query = Query::default(); - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // we asked for all the tasks with all index authorized -> all tasks returned - snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]"); - } - - #[test] - fn query_tasks_canceled_by() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - handle.advance_n_successful_batches(1); - let kind = KindWithContent::TaskCancelation { - query: "test_query".to_string(), - tasks: [0, 1, 2, 3].into_iter().collect(), - }; - let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); - handle.advance_n_successful_batches(1); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - let rtxn = index_scheduler.read_txn().unwrap(); - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) - .unwrap(); - // 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the - // taskCancelation itself - snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); - - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (tasks, _) = index_scheduler - .get_task_ids_from_authorized_indexes( - &rtxn, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // Return only 1 because the user is not authorized to see task 2 - snapshot!(snapshot_bitmap(&tasks), @"[1,]"); - } - - #[test] - fn query_batches_from_and_limit() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); - let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); - - handle.advance_n_successful_batches(3); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); - - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let query = Query { limit: Some(0), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { limit: Some(1), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { limit: Some(2), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[1,2,]"); - - let query = Query { from: Some(1), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - - let query = Query { from: Some(2), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]"); - - let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - - let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - } - - #[test] - fn query_batches_simple() { - let start_time = OffsetDateTime::now_utc(); - - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; - let (mut batches, _) = index_scheduler - .get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default()) - .unwrap(); - assert_eq!(batches.len(), 1); - batches[0].started_at = OffsetDateTime::UNIX_EPOCH; - // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 - let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); - snapshot!(batch, @r#" - { - "uid": 0, - "details": { - "primaryKey": "mouse" - }, - "stats": { - "totalNbTasks": 1, - "status": { - "processing": 1 - }, - "types": { - "indexCreation": 1 - }, - "indexUids": { - "catto": 1 - } - }, - "startedAt": "1970-01-01T00:00:00Z", - "finishedAt": null - } - "#); - - let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test, which should excludes the enqueued tasks - snapshot!(snapshot_bitmap(&batches), @"[0,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should excludes all of them - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both enqueued and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should exclude the enqueued tasks and include the only processing task - snapshot!(snapshot_bitmap(&batches), @"[0,]"); - - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit"); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let second_start_time = OffsetDateTime::now_utc(); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - after_started_at: Some(start_time), - before_started_at: Some(start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the test and before one minute after the start of the test, - // which should include all tasks - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - - let query = Query { - statuses: Some(vec![Status::Succeeded, Status::Processing]), - before_started_at: Some(start_time), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes before the start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // both succeeded and processing tasks in the first tick, but limited to those with a started_at - // that comes after the start of the second part of the test and before one minute after the - // second start of the test, which should exclude all tasks - snapshot!(snapshot_bitmap(&batches), @"[]"); - - // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // we run the same query to verify that, and indeed find that the last task is matched - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // enqueued, succeeded, or processing tasks started after the second part of the test, should - // again only return the last task - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - handle.advance_till([ProcessBatchFailed, AfterProcessing]); - let rtxn = index_scheduler.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - // now the last task should have failed - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // so running the last query should return nothing - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // but the same query on failed tasks should return the last task - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![1]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // same query but with an invalid uid - snapshot!(snapshot_bitmap(&batches), @"[]"); - - let query = Query { - statuses: Some(vec![Status::Failed]), - uids: Some(vec![2]), - after_started_at: Some(second_start_time), - before_started_at: Some(second_start_time + Duration::minutes(1)), - ..Default::default() - }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // same query but with a valid uid - snapshot!(snapshot_bitmap(&batches), @"[2,]"); - } - - #[test] - fn query_batches_special_rules() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - handle.advance_till([Start, BatchCreated]); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // only the first task associated with catto is returned, the indexSwap tasks are excluded! - snapshot!(snapshot_bitmap(&batches), @"[0,]"); - - let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks - // associated with doggo -> empty result - snapshot!(snapshot_bitmap(&batches), @"[]"); - - drop(rtxn); - // We're going to advance and process all the batches for the next query to actually hit the db - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - ]); - handle.advance_one_successful_batch(); - handle.advance_n_failed_batches(2); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything"); - let rtxn = index_scheduler.env.read_txn().unwrap(); - - let query = Query::default(); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks - // -> only the index creation of doggo should be returned - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - - let query = Query::default(); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![ - IndexUidPattern::new_unchecked("catto"), - IndexUidPattern::new_unchecked("doggo"), - ] - .into_iter() - .collect(), - ), - ) - .unwrap(); - // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks - // -> all tasks except the swap of catto with whalo are returned - snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); - - let query = Query::default(); - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // we asked for all the tasks with all index authorized -> all tasks returned - snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]"); - } - - #[test] - fn query_batches_canceled_by() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind, None, false).unwrap(); - let kind = KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], - }; - let _task = index_scheduler.register(kind, None, false).unwrap(); - - handle.advance_n_successful_batches(1); - let kind = KindWithContent::TaskCancelation { - query: "test_query".to_string(), - tasks: [0, 1, 2, 3].into_iter().collect(), - }; - let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); - handle.advance_n_successful_batches(1); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); - - let rtxn = index_scheduler.read_txn().unwrap(); - let proc = index_scheduler.processing_tasks.read().unwrap().clone(); - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default()) - .unwrap(); - // The batch zero was the index creation task, the 1 is the task cancellation - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - - let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let (batches, _) = index_scheduler - .get_batch_ids_from_authorized_indexes( - &rtxn, - &proc, - &query, - &AuthFilter::with_allowed_indexes( - vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), - ), - ) - .unwrap(); - // Return only 1 because the user is not authorized to see task 2 - snapshot!(snapshot_bitmap(&batches), @"[1,]"); - } - - #[test] - fn fail_in_process_batch_for_index_creation() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); - - handle.advance_one_failed_batch(); - - // Still in the first iteration - assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); - } - - #[test] - fn fail_in_process_batch_for_document_addition() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_till([Start, BatchCreated]); - - snapshot!( - snapshot_index_scheduler(&index_scheduler), - name: "document_addition_batch_created" - ); - - handle.advance_till([ProcessBatchFailed, AfterProcessing]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_failed"); - } - - #[test] - fn fail_in_update_task_after_process_batch_success_for_document_addition() { - let (index_scheduler, mut handle) = IndexScheduler::test( - true, - vec![(1, FailureLocation::UpdatingTaskAfterProcessBatchSuccess { task_uid: 0 })], - ); - - let content = r#" - { - "id": 1, - "doggo": "bob" - }"#; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_succeeded_but_index_scheduler_not_updated"); - - handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_succeeded"); - - // At this point the next time the scheduler will try to progress it should encounter - // a critical failure and have to wait for 1s before retrying anything. - - let before_failure = Instant::now(); - handle.advance_till([Start]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_to_commit"); - let failure_duration = before_failure.elapsed(); - assert!(failure_duration.as_millis() >= 1000); - - handle.advance_till([ - BatchCreated, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - ]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_successfully_processed"); - } - - #[test] - fn test_document_addition_cant_create_index_without_index() { - // We're going to autobatch multiple document addition that don't have - // the right to create an index while there is no index currently. - // Thus, everything should be batched together and a IndexDoesNotExists - // error should be throwed. - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Everything should be batched together. - handle.advance_till([ - Start, - BatchCreated, - InsideProcessBatch, - ProcessBatchFailed, - AfterProcessing, - ]); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); - - // The index should not exist. - snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); - } - - #[test] - fn test_document_addition_cant_create_index_without_index_without_autobatching() { - // We're going to execute multiple document addition that don't have - // the right to create an index while there is no index currently. - // Since the auto-batching is disabled, every task should be processed - // sequentially and throw an IndexDoesNotExists. - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_failed_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_failed_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // The index should not exist. - snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); - } - - #[test] - fn test_document_addition_cant_create_index_with_index() { - // We're going to autobatch multiple document addition that don't have - // the right to create an index while there is already an index. - // Thus, everything should be batched together and no error should be - // throwed. - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - // Create the index. - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Everything should be batched together. - handle.advance_n_successful_batches(1); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_cant_create_index_with_index_without_autobatching() { - // We're going to execute multiple document addition that don't have - // the right to create an index while there is no index currently. - // Since the autobatching is disabled, every tasks should be processed - // sequentially and throw an IndexDoesNotExists. - let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); - - // Create the index. - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Nothing should be batched thus half of the tasks are processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - // Everything is processed. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_mixed_rights_with_index() { - // We're going to autobatch multiple document addition. - // - The index already exists - // - The first document addition don't have the right to create an index - // can it batch with the other one? - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - // Create the index. - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, - None, - false, - ) - .unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - let allow_index_creation = i % 2 != 0; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // Everything should be batched together. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_mixed_right_without_index_starts_with_cant_create() { - // We're going to autobatch multiple document addition. - // - The index does not exists - // - The first document addition don't have the right to create an index - // - The second do. They should not batch together. - // - The second should batch with everything else as it's going to create an index. - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for i in 0..10 { - let content = format!( - r#"{{ - "id": {}, - "doggo": "bob {}" - }}"#, - i, i - ); - let allow_index_creation = i % 2 != 0; - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - file.persist().unwrap(); - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - - // A first batch should be processed with only the first documentAddition that's going to fail. - handle.advance_one_failed_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_failed"); - - // Everything else should be batched together. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); - - // Has everything being pushed successfully in milli? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_multiple_primary_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in ["id", "bork", "bloup"].iter().enumerate() { - let content = format!( - r#"{{ - "id": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); - - // A first batch should be processed with only the first documentAddition. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_fails"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_multiple_primary_key_batch_wrong_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in ["id", "bork", "bork"].iter().enumerate() { - let content = format!( - r#"{{ - "id": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); - - // A first batch should be processed with only the first documentAddition. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_and_third_tasks_fails"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_bad_primary_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in ["bork", "bork", "id", "bork", "id"].iter().enumerate() { - let content = format!( - r#"{{ - "id": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_5_tasks"); - - // A first batch should be processed with only the first two documentAddition. - // it should fails because the documents don't contains any `bork` field. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_and_second_task_fails"); - - // The primary key should be set to none since we failed the batch. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap(); - snapshot!(primary_key.is_none(), @"true"); - - // The second batch should succeed and only contains one task. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); - - // The primary key should be set to `id` since this batch succeeded. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // We're trying to `bork` again, but now there is already a primary key set for this index. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth_task_fails"); - - // Finally the last task should succeed since its primary key is the same as the valid one. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fifth_task_succeeds"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"id"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_set_and_null_primary_key() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in - [None, Some("bork"), Some("paw"), None, None, Some("paw")].into_iter().enumerate() - { - let content = format!( - r#"{{ - "paw": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); - - // A first batch should contains only one task that fails because we can't infer the primary key. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_fails"); - - // The second batch should contains only one task that fails because we bork is not a valid primary key. - // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); - - // No primary key should be set at this point. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap(); - snapshot!(primary_key.is_none(), @"true"); - - // The third batch should succeed and only contains one task. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); - - // The primary key should be set to `id` since this batch succeeded. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"paw"); - - // We should be able to batch together the next two tasks that don't specify any primary key - // + the last task that matches the current primary-key. Everything should succeed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"paw"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn test_document_addition_with_set_and_null_primary_key_inference_works() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - for (id, primary_key) in [None, Some("bork"), Some("doggoid"), None, None, Some("doggoid")] - .into_iter() - .enumerate() - { - let content = format!( - r#"{{ - "doggoid": {id}, - "doggo": "jean bob" - }}"#, - ); - let (uuid, mut file) = - index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - } - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); - - // A first batch should contains only one task that succeed and sets the primary key to `doggoid`. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_succeed"); - - // Checking the primary key. - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap(); - snapshot!(primary_key.is_none(), @"false"); - - // The second batch should contains only one task that fails because it tries to update the primary key to `bork`. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); - - // The third batch should succeed and only contains one task. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); - - // We should be able to batch together the next two tasks that don't specify any primary key - // + the last task that matches the current primary-key. Everything should succeed. - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); - - // Is the primary key still what we expect? - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); - snapshot!(primary_key, @"doggoid"); - - // Is the document still the one we expect?. - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); - } - - #[test] - fn panic_in_process_batch_for_index_creation() { - let (index_scheduler, mut handle) = - IndexScheduler::test(true, vec![(1, FailureLocation::PanicInsideProcessBatch)]); - - let kind = index_creation_task("catto", "mouse"); - - let _task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); - - handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); - - // Still in the first iteration - assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); - // No matter what happens in process_batch, the index_scheduler should be internally consistent - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); - } - - #[test] - fn test_task_queue_is_full() { - let (index_scheduler, mut handle) = - IndexScheduler::test_with_custom_config(vec![], |config| { - // that's the minimum map size possible - config.task_db_size = 1048576; - }); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - // on average this task takes ~600 bytes - loop { - let result = index_scheduler.register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ); - if result.is_err() { - break; - } - handle.advance_one_failed_batch(); - } - index_scheduler.assert_internally_consistent(); - - // at this point the task DB shoud have reached its limit and we should not be able to register new tasks - let result = index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap_err(); - snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); - // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code - snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); - - // Even the task deletion that doesn't delete anything shouldn't be accepted - let result = index_scheduler - .register( - KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, - None, - false, - ) - .unwrap_err(); - snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); - // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code - snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); - - // But a task deletion that delete something should works - index_scheduler - .register( - KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - // Now we should be able to enqueue a few tasks again - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - } - - #[test] - fn test_auto_deletion_of_tasks() { - let (index_scheduler, mut handle) = - IndexScheduler::test_with_custom_config(vec![], |config| { - config.max_number_of_tasks = 2; - }); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - - // at this point the max number of tasks is reached - // we can still enqueue multiple tasks - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); - drop(rtxn); - - // now we're above the max number of tasks - // and if we try to advance in the tick function a new task deletion should be enqueued - handle.advance_till([Start, BatchCreated]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); - drop(rtxn); - - handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); - drop(rtxn); - - handle.advance_one_failed_batch(); - // a new task deletion has been enqueued - handle.advance_one_successful_batch(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); - drop(rtxn); - - handle.advance_one_failed_batch(); - handle.advance_one_successful_batch(); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); - drop(rtxn); - } - - #[test] - fn test_disable_auto_deletion_of_tasks() { - let (index_scheduler, mut handle) = - IndexScheduler::test_with_custom_config(vec![], |config| { - config.cleanup_enabled = false; - config.max_number_of_tasks = 2; - }); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - handle.advance_one_failed_batch(); - - // at this point the max number of tasks is reached - // we can still enqueue multiple tasks - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - index_scheduler - .register( - KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, - None, - false, - ) - .unwrap(); - - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); - drop(rtxn); - - // now we're above the max number of tasks - // and if we try to advance in the tick function no new task deletion should be enqueued - handle.advance_till([Start, BatchCreated]); - let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); - let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); - snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); - drop(rtxn); - } - - #[test] - fn basic_get_stats() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None, false).unwrap(); - - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 3, - "failed": 0, - "processing": 0, - "succeeded": 0 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - - handle.advance_till([Start, BatchCreated]); - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 2, - "failed": 0, - "processing": 1, - "succeeded": 0 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 1, - "failed": 0, - "processing": 1, - "succeeded": 1 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - - // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` - handle.advance_till([ - InsideProcessBatch, - InsideProcessBatch, - ProcessBatchSucceeded, - AfterProcessing, - Start, - BatchCreated, - ]); - snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" - { - "indexes": { - "catto": 1, - "doggo": 1, - "whalo": 1 - }, - "statuses": { - "canceled": 0, - "enqueued": 0, - "failed": 0, - "processing": 1, - "succeeded": 2 - }, - "types": { - "documentAdditionOrUpdate": 0, - "documentDeletion": 0, - "documentEdition": 0, - "dumpCreation": 0, - "indexCreation": 3, - "indexDeletion": 0, - "indexSwap": 0, - "indexUpdate": 0, - "settingsUpdate": 0, - "snapshotCreation": 0, - "taskCancelation": 0, - "taskDeletion": 0 - } - } - "###); - } - - #[test] - fn cancel_processing_dump() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None }; - let dump_cancellation = KindWithContent::TaskCancelation { - query: "cancel dump".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }; - let _ = index_scheduler.register(dump_creation, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); - handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - - let _ = index_scheduler.register(dump_cancellation, None, false).unwrap(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); - - snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); - } - - #[test] - fn basic_set_taskid() { - let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, None, false).unwrap(); - snapshot!(task.uid, @"0"); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, Some(12), false).unwrap(); - snapshot!(task.uid, @"12"); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); - snapshot!(error, @"Received bad task id: 5 should be >= to 13."); - } - - #[test] - fn dry_run() { - let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, None, true).unwrap(); - snapshot!(task.uid, @"0"); - snapshot!(snapshot_index_scheduler(&index_scheduler), @r" - ### Autobatching Enabled = true - ### Processing batch None: - [] - ---------------------------------------------------------------------- - ### All Tasks: - ---------------------------------------------------------------------- - ### Status: - ---------------------------------------------------------------------- - ### Kind: - ---------------------------------------------------------------------- - ### Index Tasks: - ---------------------------------------------------------------------- - ### Index Mapper: - - ---------------------------------------------------------------------- - ### Canceled By: - - ---------------------------------------------------------------------- - ### Enqueued At: - ---------------------------------------------------------------------- - ### Started At: - ---------------------------------------------------------------------- - ### Finished At: - ---------------------------------------------------------------------- - ### All Batches: - ---------------------------------------------------------------------- - ### Batch to tasks mapping: - ---------------------------------------------------------------------- - ### Batches Status: - ---------------------------------------------------------------------- - ### Batches Kind: - ---------------------------------------------------------------------- - ### Batches Index Tasks: - ---------------------------------------------------------------------- - ### Batches Enqueued At: - ---------------------------------------------------------------------- - ### Batches Started At: - ---------------------------------------------------------------------- - ### Batches Finished At: - ---------------------------------------------------------------------- - ### File Store: - - ---------------------------------------------------------------------- - "); - - let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, Some(12), true).unwrap(); - snapshot!(task.uid, @"12"); - snapshot!(snapshot_index_scheduler(&index_scheduler), @r" - ### Autobatching Enabled = true - ### Processing batch None: - [] - ---------------------------------------------------------------------- - ### All Tasks: - ---------------------------------------------------------------------- - ### Status: - ---------------------------------------------------------------------- - ### Kind: - ---------------------------------------------------------------------- - ### Index Tasks: - ---------------------------------------------------------------------- - ### Index Mapper: - - ---------------------------------------------------------------------- - ### Canceled By: - - ---------------------------------------------------------------------- - ### Enqueued At: - ---------------------------------------------------------------------- - ### Started At: - ---------------------------------------------------------------------- - ### Finished At: - ---------------------------------------------------------------------- - ### All Batches: - ---------------------------------------------------------------------- - ### Batch to tasks mapping: - ---------------------------------------------------------------------- - ### Batches Status: - ---------------------------------------------------------------------- - ### Batches Kind: - ---------------------------------------------------------------------- - ### Batches Index Tasks: - ---------------------------------------------------------------------- - ### Batches Enqueued At: - ---------------------------------------------------------------------- - ### Batches Started At: - ---------------------------------------------------------------------- - ### Batches Finished At: - ---------------------------------------------------------------------- - ### File Store: - - ---------------------------------------------------------------------- - "); - } - - #[test] - fn import_vectors() { - use meilisearch_types::settings::{Settings, Unchecked}; - use milli::update::Setting; - - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let mut new_settings: Box> = Box::default(); - let mut embedders = BTreeMap::default(); - let embedding_settings = milli::vector::settings::EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), - api_key: Setting::Set(S("My super secret")), - url: Setting::Set(S("http://localhost:7777")), - dimensions: Setting::Set(384), - request: Setting::Set(serde_json::json!("{{text}}")), - response: Setting::Set(serde_json::json!("{{embedding}}")), - ..Default::default() - }; - embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings)); - - let embedding_settings = milli::vector::settings::EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), - model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), - revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), - document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")), - ..Default::default() - }; - embedders.insert(S("B_small_hf"), Setting::Set(embedding_settings)); - - new_settings.embedders = Setting::Set(embedders); - - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings, - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors"); - - { - let rtxn = index_scheduler.read_txn().unwrap(); - let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap(); - let task = meilisearch_types::task_view::TaskView::from_task(&task); - insta::assert_json_snapshot!(task.details); - } - - let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - let configs = index.embedding_configs(&rtxn).unwrap(); - // for consistency with the below - #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } = - configs.get(0).unwrap(); - insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - insta::assert_json_snapshot!(fakerest_config.embedder_options); - let fakerest_name = name.clone(); - - let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } = - configs.get(1).unwrap(); - insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - insta::assert_json_snapshot!(simple_hf_config.embedder_options); - let simple_hf_name = name.clone(); - - let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); - let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); - let beagle_embed = - hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); - let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); - let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); - (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) - }; - - // add one doc, specifying vectors - - let doc = serde_json::json!( - { - "id": 0, - "doggo": "Intel", - "breed": "beagle", - "_vectors": { - &fakerest_name: { - // this will never trigger regeneration, which is good because we can't actually generate with - // this embedder - "regenerate": false, - "embeddings": beagle_embed, - }, - &simple_hf_name: { - // this will be regenerated on updates - "regenerate": true, - "embeddings": lab_embed, - }, - "noise": [0.1, 0.2, 0.3] - } - } - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap(); - let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel"); - - handle.advance_one_successful_batch(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds"); - - // check embeddings - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - // Ensure the document have been inserted into the relevant bitamp - let configs = index.embedding_configs(&rtxn).unwrap(); - // for consistency with the below - #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = - configs.get(0).unwrap(); - insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); - - let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); - insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - - let embeddings = index.embeddings(&rtxn, 0).unwrap(); - - assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); - assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); - - let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let doc = obkv_to_json( - &[ - fields_ids_map.id("doggo").unwrap(), - fields_ids_map.id("breed").unwrap(), - fields_ids_map.id("_vectors").unwrap(), - ], - &fields_ids_map, - doc, - ) - .unwrap(); - assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); - } - - // update the doc, specifying vectors - - let doc = serde_json::json!( - { - "id": 0, - "doggo": "kefir", - "breed": "patou", - } - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1u128).unwrap(); - let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); - assert_eq!(documents_count, 1); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); - - handle.advance_one_successful_batch(); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds"); - - { - // check embeddings - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - - // Ensure the document have been inserted into the relevant bitamp - let configs = index.embedding_configs(&rtxn).unwrap(); - // for consistency with the below - #[allow(clippy::get_first)] - let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = - configs.get(0).unwrap(); - insta::assert_snapshot!(name, @"A_fakerest"); - insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); - - let IndexEmbeddingConfig { name, config: _, user_provided } = - configs.get(1).unwrap(); - insta::assert_snapshot!(name, @"B_small_hf"); - insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); - - let embeddings = index.embeddings(&rtxn, 0).unwrap(); - - // automatically changed to patou because set to regenerate - assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); - // remained beagle - assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); - - let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let doc = obkv_to_json( - &[ - fields_ids_map.id("doggo").unwrap(), - fields_ids_map.id("breed").unwrap(), - fields_ids_map.id("_vectors").unwrap(), - ], - &fields_ids_map, - doc, - ) - .unwrap(); - assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); - } - } - } - - #[test] - fn import_vectors_first_and_embedder_later() { - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let content = serde_json::json!( - [ - { - "id": 0, - "doggo": "kefir", - }, - { - "id": 1, - "doggo": "intel", - "_vectors": { - "my_doggo_embedder": vec![1; 384], - "unknown embedder": vec![1, 2, 3], - } - }, - { - "id": 2, - "doggo": "max", - "_vectors": { - "my_doggo_embedder": { - "regenerate": false, - "embeddings": vec![2; 384], - }, - "unknown embedder": vec![4, 5], - }, - }, - { - "id": 3, - "doggo": "marcel", - "_vectors": { - "my_doggo_embedder": { - "regenerate": true, - "embeddings": vec![3; 384], - }, - }, - }, - { - "id": 4, - "doggo": "sora", - "_vectors": { - "my_doggo_embedder": { - "regenerate": true, - }, - }, - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"5"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push"); - - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), - model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), - revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), - document_template: Setting::Set(S("{{doc.doggo}}")), - ..Default::default() - }) - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - index_scheduler.assert_internally_consistent(); - handle.advance_one_successful_batch(); - index_scheduler.assert_internally_consistent(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - // the all the vectors linked to the new specified embedder have been removed - // Only the unknown embedders stays in the document DB - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###); - let conf = index.embedding_configs(&rtxn).unwrap(); - // even though we specified the vector for the ID 3, it shouldn't be marked - // as user provided since we explicitely marked it as NOT user provided. - snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "my_doggo_embedder", - config: EmbeddingConfig { - embedder_options: HuggingFace( - EmbedderOptions { - model: "sentence-transformers/all-MiniLM-L6-v2", - revision: Some( - "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", - ), - distribution: None, - }, - ), - prompt: PromptData { - template: "{{doc.doggo}}", - max_bytes: Some( - 400, - ), - }, - quantized: None, - }, - user_provided: RoaringBitmap<[1, 2]>, - }, - ] - "###); - let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; - assert!(!embedding.is_empty(), "{embedding:?}"); - - // the document with the id 3 should keep its original embedding - let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embeddings = &embeddings["my_doggo_embedder"]; - - snapshot!(embeddings.len(), @"1"); - assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); - - // If we update marcel it should regenerate its embedding automatically - - let content = serde_json::json!( - [ - { - "id": 3, - "doggo": "marvel", - }, - { - "id": 4, - "doggo": "sorry", - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"2"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - // the document with the id 3 should have its original embedding updated - let rtxn = index.read_txn().unwrap(); - let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let doc = index.documents(&rtxn, Some(docid)).unwrap()[0]; - let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap(); - snapshot!(json_string!(doc), @r###" - { - "id": 3, - "doggo": "marvel" - } - "###); - - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; - - assert!(!embedding.is_empty()); - assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]); - - // the document with the id 4 should generate an embedding - let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["my_doggo_embedder"]; - - assert!(!embedding.is_empty()); - } - - #[test] - fn delete_document_containing_vector() { - // 1. Add an embedder - // 2. Push two documents containing a simple vector - // 3. Delete the first document - // 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore - // 5. Clear the index - // 6. The user defined roaring bitmap shouldn't contains the id of the second document - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("manual") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), - dimensions: Setting::Set(3), - ..Default::default() - }) - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let content = serde_json::json!( - [ - { - "id": 0, - "doggo": "kefir", - "_vectors": { - "manual": vec![0, 0, 0], - } - }, - { - "id": 1, - "doggo": "intel", - "_vectors": { - "manual": vec![1, 1, 1], - } - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"2"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - index_scheduler - .register( - KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1")], - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###); - let conf = index.embedding_configs(&rtxn).unwrap(); - snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "manual", - config: EmbeddingConfig { - embedder_options: UserProvided( - EmbedderOptions { - dimensions: 3, - distribution: None, - }, - ), - prompt: PromptData { - template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", - max_bytes: Some( - 400, - ), - }, - quantized: None, - }, - user_provided: RoaringBitmap<[0]>, - }, - ] - "###); - let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); - let embeddings = index.embeddings(&rtxn, docid).unwrap(); - let embedding = &embeddings["manual"]; - assert!(!embedding.is_empty(), "{embedding:?}"); - - index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) - .unwrap(); - handle.advance_one_successful_batch(); - - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @"[]"); - let conf = index.embedding_configs(&rtxn).unwrap(); - snapshot!(format!("{conf:#?}"), @r###" - [ - IndexEmbeddingConfig { - name: "manual", - config: EmbeddingConfig { - embedder_options: UserProvided( - EmbedderOptions { - dimensions: 3, - distribution: None, - }, - ), - prompt: PromptData { - template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", - max_bytes: Some( - 400, - ), - }, - quantized: None, - }, - user_provided: RoaringBitmap<[]>, - }, - ] - "###); - } - - #[test] - fn delete_embedder_with_user_provided_vectors() { - // 1. Add two embedders - // 2. Push two documents containing a simple vector - // 3. The documents must not contain the vectors after the update as they are in the vectors db - // 3. Delete the embedders - // 4. The documents contain the vectors again - let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("manual") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), - dimensions: Setting::Set(3), - ..Default::default() - }), - S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { - source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), - model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), - revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), - document_template: Setting::Set(S("{{doc.doggo}}")), - ..Default::default() - }), - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - let content = serde_json::json!( - [ - { - "id": 0, - "doggo": "kefir", - "_vectors": { - "manual": vec![0, 0, 0], - "my_doggo_embedder": vec![1; 384], - } - }, - { - "id": 1, - "doggo": "intel", - "_vectors": { - "manual": vec![1, 1, 1], - } - }, - ] - ); - - let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); - let documents_count = - read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) - .unwrap(); - snapshot!(documents_count, @"2"); - file.persist().unwrap(); - - index_scheduler - .register( - KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: None, - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###); - } - - { - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Set(maplit::btreemap! { - S("manual") => Setting::Reset, - }), - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - } - - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###); - } - - { - let setting = meilisearch_types::settings::Settings:: { - embedders: Setting::Reset, - ..Default::default() - }; - index_scheduler - .register( - KindWithContent::SettingsUpdate { - index_uid: S("doggos"), - new_settings: Box::new(setting), - is_deletion: false, - allow_index_creation: true, - }, - None, - false, - ) - .unwrap(); - handle.advance_one_successful_batch(); - } - - { - let index = index_scheduler.index("doggos").unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); - let field_ids = field_ids_map.ids().collect::>(); - let documents = index - .all_documents(&rtxn) - .unwrap() - .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) - .collect::>(); - - // FIXME: redaction - snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###); - } - } -} diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs index aca654de9..d0382a81b 100644 --- a/crates/index-scheduler/src/processing.rs +++ b/crates/index-scheduler/src/processing.rs @@ -8,7 +8,7 @@ use roaring::RoaringBitmap; use crate::utils::ProcessingBatch; -#[derive(Clone)] +#[derive(Clone, Default)] pub struct ProcessingTasks { pub batch: Option>, /// The list of tasks ids that are currently running. @@ -20,7 +20,7 @@ pub struct ProcessingTasks { impl ProcessingTasks { /// Creates an empty `ProcessingAt` struct. pub fn new() -> ProcessingTasks { - ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None } + ProcessingTasks::default() } pub fn get_progress_view(&self) -> Option { diff --git a/crates/index-scheduler/src/queue/batches.rs b/crates/index-scheduler/src/queue/batches.rs new file mode 100644 index 000000000..6abfb42a0 --- /dev/null +++ b/crates/index-scheduler/src/queue/batches.rs @@ -0,0 +1,537 @@ +use std::ops::{Bound, RangeBounds}; + +use meilisearch_types::batches::{Batch, BatchId}; +use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, Status}; +use roaring::{MultiOps, RoaringBitmap}; +use time::OffsetDateTime; + +use super::{Query, Queue}; +use crate::processing::ProcessingTasks; +use crate::utils::{insert_task_datetime, keep_ids_within_datetimes, map_bound, ProcessingBatch}; +use crate::{Error, Result, BEI128}; + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const ALL_BATCHES: &str = "all-batches"; + + pub const BATCH_STATUS: &str = "batch-status"; + pub const BATCH_KIND: &str = "batch-kind"; + pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks"; + pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at"; + pub const BATCH_STARTED_AT: &str = "batch-started-at"; + pub const BATCH_FINISHED_AT: &str = "batch-finished-at"; +} + +pub struct BatchQueue { + /// Contains all the batches accessible by their Id. + pub(crate) all_batches: Database>, + + /// All the batches containing a task matching the selected status. + pub(crate) status: Database, RoaringBitmapCodec>, + /// All the batches ids grouped by the kind of their task. + pub(crate) kind: Database, RoaringBitmapCodec>, + /// Store the batches associated to an index. + pub(crate) index_tasks: Database, + /// Store the batches containing tasks which were enqueued at a specific date + pub(crate) enqueued_at: Database, + /// Store the batches containing finished tasks started at a specific date + pub(crate) started_at: Database, + /// Store the batches containing tasks finished at a specific date + pub(crate) finished_at: Database, +} + +impl BatchQueue { + pub(crate) fn private_clone(&self) -> BatchQueue { + BatchQueue { + all_batches: self.all_batches, + status: self.status, + kind: self.kind, + index_tasks: self.index_tasks, + enqueued_at: self.enqueued_at, + started_at: self.started_at, + finished_at: self.finished_at, + } + } + + pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result { + Ok(Self { + all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?, + status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?, + kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?, + index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?, + enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?, + started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?, + finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?, + }) + } + + pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result { + enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() + } + + pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result { + Ok(self + .all_batches + .remap_data_type::() + .last(rtxn)? + .map(|(k, _)| k + 1) + .unwrap_or_default()) + } + + pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result> { + Ok(self.all_batches.get(rtxn, &batch_id)?) + } + + /// Returns the whole set of batches that belongs to this index. + pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result { + Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) + } + + pub(crate) fn update_index( + &self, + wtxn: &mut RwTxn, + index: &str, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut batches = self.index_batches(wtxn, index)?; + f(&mut batches); + if batches.is_empty() { + self.index_tasks.delete(wtxn, index)?; + } else { + self.index_tasks.put(wtxn, index, &batches)?; + } + + Ok(()) + } + + pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { + Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) + } + + pub(crate) fn put_status( + &self, + wtxn: &mut RwTxn, + status: Status, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.status.put(wtxn, &status, bitmap)?) + } + + pub(crate) fn update_status( + &self, + wtxn: &mut RwTxn, + status: Status, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_status(wtxn, status)?; + f(&mut tasks); + self.put_status(wtxn, status, &tasks)?; + + Ok(()) + } + + pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { + Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) + } + + pub(crate) fn put_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.kind.put(wtxn, &kind, bitmap)?) + } + + pub(crate) fn update_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_kind(wtxn, kind)?; + f(&mut tasks); + self.put_kind(wtxn, kind, &tasks)?; + Ok(()) + } + + pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> { + self.all_batches.put( + wtxn, + &batch.uid, + &Batch { + uid: batch.uid, + progress: None, + details: batch.details, + stats: batch.stats, + started_at: batch.started_at, + finished_at: batch.finished_at, + }, + )?; + + for status in batch.statuses { + self.update_status(wtxn, status, |bitmap| { + bitmap.insert(batch.uid); + })?; + } + + for kind in batch.kinds { + self.update_kind(wtxn, kind, |bitmap| { + bitmap.insert(batch.uid); + })?; + } + + for index in batch.indexes { + self.update_index(wtxn, &index, |bitmap| { + bitmap.insert(batch.uid); + })?; + } + + if let Some(enqueued_at) = batch.oldest_enqueued_at { + insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; + } + if let Some(enqueued_at) = batch.earliest_enqueued_at { + insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; + } + insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?; + insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?; + + Ok(()) + } + + /// Convert an iterator to a `Vec` of batches. The batches MUST exist or a + /// `CorruptedTaskQueue` error will be thrown. + pub(crate) fn get_existing_batches( + &self, + rtxn: &RoTxn, + tasks: impl IntoIterator, + processing: &ProcessingTasks, + ) -> Result> { + tasks + .into_iter() + .map(|batch_id| { + if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { + let mut batch = processing.batch.as_ref().unwrap().to_batch(); + batch.progress = processing.get_progress_view(); + Ok(batch) + } else { + self.get_batch(rtxn, batch_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + } + }) + .collect::>() + } +} + +impl Queue { + /// Return the batch ids matched by the given query from the index scheduler's point of view. + pub(crate) fn get_batch_ids( + &self, + rtxn: &RoTxn, + query: &Query, + processing: &ProcessingTasks, + ) -> Result { + let Query { + limit, + from, + reverse, + uids, + batch_uids, + statuses, + types, + index_uids, + canceled_by, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, + } = query; + + let mut batches = self.batches.all_batch_ids(rtxn)?; + if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { + batches.insert(batch_id); + } + + if let Some(from) = from { + let range = if reverse.unwrap_or_default() { + u32::MIN..*from + } else { + from.saturating_add(1)..u32::MAX + }; + batches.remove_range(range); + } + + if let Some(batch_uids) = &batch_uids { + let batches_uids = RoaringBitmap::from_iter(batch_uids); + batches &= batches_uids; + } + + if let Some(status) = &statuses { + let mut status_batches = RoaringBitmap::new(); + for status in status { + match status { + // special case for Processing batches + Status::Processing => { + if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) { + status_batches.insert(batch_id); + } + } + // Enqueued tasks are not stored in batches + Status::Enqueued => (), + status => status_batches |= &self.batches.get_status(rtxn, *status)?, + }; + } + if !status.contains(&Status::Processing) { + if let Some(ref batch) = processing.batch { + batches.remove(batch.uid); + } + } + batches &= status_batches; + } + + if let Some(task_uids) = &uids { + let mut batches_by_task_uids = RoaringBitmap::new(); + for task_uid in task_uids { + if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? { + if let Some(batch_uid) = task.batch_uid { + batches_by_task_uids.insert(batch_uid); + } + } + } + batches &= batches_by_task_uids; + } + + // There is no database for this query, we must retrieve the task queried by the client and ensure it's valid + if let Some(canceled_by) = &canceled_by { + let mut all_canceled_batches = RoaringBitmap::new(); + for cancel_uid in canceled_by { + if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? { + if task.kind.as_kind() == Kind::TaskCancelation + && task.status == Status::Succeeded + { + if let Some(batch_uid) = task.batch_uid { + all_canceled_batches.insert(batch_uid); + } + } + } + } + + // if the canceled_by has been specified but no batch + // matches then we prefer matching zero than all batches. + if all_canceled_batches.is_empty() { + return Ok(RoaringBitmap::new()); + } else { + batches &= all_canceled_batches; + } + } + + if let Some(kind) = &types { + let mut kind_batches = RoaringBitmap::new(); + for kind in kind { + kind_batches |= self.batches.get_kind(rtxn, *kind)?; + if let Some(uid) = processing + .batch + .as_ref() + .and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid)) + { + kind_batches.insert(uid); + } + } + batches &= &kind_batches; + } + + if let Some(index) = &index_uids { + let mut index_batches = RoaringBitmap::new(); + for index in index { + index_batches |= self.batches.index_batches(rtxn, index)?; + if let Some(uid) = processing + .batch + .as_ref() + .and_then(|batch| batch.indexes.contains(index).then_some(batch.uid)) + { + index_batches.insert(uid); + } + } + batches &= &index_batches; + } + + // For the started_at filter, we need to treat the part of the batches that are processing from the part of the + // batches that are not processing. The non-processing ones are filtered normally while the processing ones + // are entirely removed unless the in-memory startedAt variable falls within the date filter. + // Once we have filtered the two subsets, we put them back together and assign it back to `batches`. + batches = { + let (mut filtered_non_processing_batches, mut filtered_processing_batches) = + (&batches - &*processing.processing, &batches & &*processing.processing); + + // special case for Processing batches + // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds + let mut clear_filtered_processing_batches = + |start: Bound, end: Bound| { + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); + let is_within_dates = RangeBounds::contains( + &(start, end), + &processing + .batch + .as_ref() + .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) + .unix_timestamp_nanos(), + ); + if !is_within_dates { + filtered_processing_batches.clear(); + } + }; + match (after_started_at, before_started_at) { + (None, None) => (), + (None, Some(before)) => { + clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before)) + } + (Some(after), None) => { + clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded) + } + (Some(after), Some(before)) => clear_filtered_processing_batches( + Bound::Excluded(*after), + Bound::Excluded(*before), + ), + }; + + keep_ids_within_datetimes( + rtxn, + &mut filtered_non_processing_batches, + self.batches.started_at, + *after_started_at, + *before_started_at, + )?; + filtered_non_processing_batches | filtered_processing_batches + }; + + keep_ids_within_datetimes( + rtxn, + &mut batches, + self.batches.enqueued_at, + *after_enqueued_at, + *before_enqueued_at, + )?; + + keep_ids_within_datetimes( + rtxn, + &mut batches, + self.batches.finished_at, + *after_finished_at, + *before_finished_at, + )?; + + if let Some(limit) = limit { + batches = if query.reverse.unwrap_or_default() { + batches.into_iter().take(*limit as usize).collect() + } else { + batches.into_iter().rev().take(*limit as usize).collect() + }; + } + + Ok(batches) + } + + /// Return the batch ids matching the query along with the total number of batches + /// by ignoring the from and limit parameters from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub(crate) fn get_batch_ids_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing: &ProcessingTasks, + ) -> Result<(RoaringBitmap, u64)> { + // compute all batches matching the filter by ignoring the limits, to find the number of batches matching + // the filter. + // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares + // us from modifying the underlying implementation, and the performance remains sufficient. + // Should this change, we would modify `get_batch_ids` to directly return the number of matching batches. + let total_batches = + self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?; + let mut batches = self.get_batch_ids(rtxn, query, processing)?; + + // If the query contains a list of index uid or there is a finite list of authorized indexes, + // then we must exclude all the batches that only contains tasks associated to multiple indexes. + // This works because we don't autobatch tasks associated to multiple indexes with tasks associated + // to a single index. e.g: IndexSwap cannot be batched with IndexCreation. + if query.index_uids.is_some() || !filters.all_indexes_authorized() { + for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { + batches -= self.tasks.get_kind(rtxn, kind)?; + if let Some(batch) = processing.batch.as_ref() { + if batch.kinds.contains(&kind) { + batches.remove(batch.uid); + } + } + } + } + + // Any batch that is internally associated with at least one authorized index + // must be returned. + if !filters.all_indexes_authorized() { + let mut valid_indexes = RoaringBitmap::new(); + let mut forbidden_indexes = RoaringBitmap::new(); + + let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?; + for result in all_indexes_iter { + let (index, index_tasks) = result?; + if filters.is_index_authorized(index) { + valid_indexes |= index_tasks; + } else { + forbidden_indexes |= index_tasks; + } + } + if let Some(batch) = processing.batch.as_ref() { + for index in &batch.indexes { + if filters.is_index_authorized(index) { + valid_indexes.insert(batch.uid); + } else { + forbidden_indexes.insert(batch.uid); + } + } + } + + // If a batch had ONE valid task then it should be returned + let invalid_batches = forbidden_indexes - valid_indexes; + + batches -= invalid_batches; + } + + Ok((batches, total_batches.len())) + } + + pub(crate) fn get_batches_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing: &ProcessingTasks, + ) -> Result<(Vec, u64)> { + let (batches, total) = + self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?; + let batches = if query.reverse.unwrap_or_default() { + Box::new(batches.into_iter()) as Box> + } else { + Box::new(batches.into_iter().rev()) as Box> + }; + + let batches = self.batches.get_existing_batches( + rtxn, + batches.take(query.limit.unwrap_or(u32::MAX) as usize), + processing, + )?; + + Ok((batches, total)) + } +} diff --git a/crates/index-scheduler/src/queue/batches_test.rs b/crates/index-scheduler/src/queue/batches_test.rs new file mode 100644 index 000000000..aa84cdaf0 --- /dev/null +++ b/crates/index-scheduler/src/queue/batches_test.rs @@ -0,0 +1,473 @@ +use meili_snap::snapshot; +use meilisearch_auth::AuthFilter; +use meilisearch_types::index_uid_pattern::IndexUidPattern; +use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status}; +use time::{Duration, OffsetDateTime}; + +use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, FailureLocation}; +use crate::{IndexScheduler, Query}; + +#[test] +fn query_batches_from_and_limit() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("doggo", "bone"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + let kind = index_creation_task("whalo", "plankton"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + let kind = index_creation_task("catto", "his_own_vomit"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_n_successful_batches(3); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); + + let proc = index_scheduler.processing_tasks.read().unwrap().clone(); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let query = Query { limit: Some(0), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { limit: Some(1), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { limit: Some(2), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[1,2,]"); + + let query = Query { from: Some(1), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); + + let query = Query { from: Some(2), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]"); + + let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[1,]"); + + let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); +} + +#[test] +fn query_batches_simple() { + let start_time = OffsetDateTime::now_utc(); + + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; + let (mut batches, _) = index_scheduler + .get_batches_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + assert_eq!(batches.len(), 1); + batches[0].started_at = OffsetDateTime::UNIX_EPOCH; + // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 + let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); + snapshot!(batch, @r#" + { + "uid": 0, + "details": { + "primaryKey": "mouse" + }, + "stats": { + "totalNbTasks": 1, + "status": { + "processing": 1 + }, + "types": { + "indexCreation": 1 + }, + "indexUids": { + "catto": 1 + } + }, + "startedAt": "1970-01-01T00:00:00Z", + "finishedAt": null + } + "#); + + let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks + + let query = + Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test, which should excludes the enqueued tasks + snapshot!(snapshot_bitmap(&batches), @"[0,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should excludes all of them + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should exclude the enqueued tasks and include the only processing task + snapshot!(snapshot_bitmap(&batches), @"[0,]"); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit"); + + let second_start_time = OffsetDateTime::now_utc(); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should include all tasks + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the second part of the test and before one minute after the + // second start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&batches), @"[]"); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // we run the same query to verify that, and indeed find that the last task is matched + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // enqueued, succeeded, or processing tasks started after the second part of the test, should + // again only return the last task + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + + // now the last task should have failed + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // so running the last query should return nothing + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&batches), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![1]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with an invalid uid + snapshot!(snapshot_bitmap(&batches), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![2]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with a valid uid + snapshot!(snapshot_bitmap(&batches), @"[2,]"); +} + +#[test] +fn query_batches_special_rules() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap().clone(); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // only the first task associated with catto is returned, the indexSwap tasks are excluded! + snapshot!(snapshot_bitmap(&batches), @"[0,]"); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks + // associated with doggo -> empty result + snapshot!(snapshot_bitmap(&batches), @"[]"); + + drop(rtxn); + // We're going to advance and process all the batches for the next query to actually hit the db + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + ]); + handle.advance_one_successful_batch(); + handle.advance_n_failed_batches(2); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything"); + let rtxn = index_scheduler.env.read_txn().unwrap(); + + let query = Query::default(); + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks + // -> only the index creation of doggo should be returned + snapshot!(snapshot_bitmap(&batches), @"[1,]"); + + let query = Query::default(); + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![ + IndexUidPattern::new_unchecked("catto"), + IndexUidPattern::new_unchecked("doggo"), + ] + .into_iter() + .collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks + // -> all tasks except the swap of catto with whalo are returned + snapshot!(snapshot_bitmap(&batches), @"[0,1,]"); + + let query = Query::default(); + let (batches, _) = index_scheduler + .queue + .get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // we asked for all the tasks with all index authorized -> all tasks returned + snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]"); +} + +#[test] +fn query_batches_canceled_by() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + handle.advance_n_successful_batches(1); + let kind = KindWithContent::TaskCancelation { + query: "test_query".to_string(), + tasks: [0, 1, 2, 3].into_iter().collect(), + }; + let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // The batch zero was the index creation task, the 1 is the task cancellation + snapshot!(snapshot_bitmap(&batches), @"[1,]"); + + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (batches, _) = index_scheduler + .get_batch_ids_from_authorized_indexes( + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + ) + .unwrap(); + // Return only 1 because the user is not authorized to see task 2 + snapshot!(snapshot_bitmap(&batches), @"[1,]"); +} diff --git a/crates/index-scheduler/src/queue/mod.rs b/crates/index-scheduler/src/queue/mod.rs new file mode 100644 index 000000000..4921d05e6 --- /dev/null +++ b/crates/index-scheduler/src/queue/mod.rs @@ -0,0 +1,379 @@ +mod batches; +#[cfg(test)] +mod batches_test; +mod tasks; +#[cfg(test)] +mod tasks_test; +#[cfg(test)] +mod test; + +use std::collections::BTreeMap; +use std::time::Duration; + +use file_store::FileStore; +use meilisearch_types::batches::BatchId; +use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; +use uuid::Uuid; + +use self::batches::BatchQueue; +use self::tasks::TaskQueue; +use crate::processing::ProcessingTasks; +use crate::utils::{ + check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch, +}; +use crate::{Error, IndexSchedulerOptions, Result, TaskId}; + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping"; +} + +/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. +/// +/// An empty/default query (where each field is set to `None`) matches all tasks. +/// Each non-null field restricts the set of tasks further. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Query { + /// The maximum number of tasks to be matched + pub limit: Option, + /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched + pub from: Option, + /// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`. + pub reverse: Option, + /// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched + pub uids: Option>, + /// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched + pub batch_uids: Option>, + /// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls + pub statuses: Option>, + /// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks. + /// + /// The kind of a task is given by: + /// ``` + /// # use meilisearch_types::tasks::{Task, Kind}; + /// # fn doc_func(task: Task) -> Kind { + /// task.kind.as_kind() + /// # } + /// ``` + pub types: Option>, + /// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks + pub index_uids: Option>, + /// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks + /// that canceled the matched tasks. + pub canceled_by: Option>, + /// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub before_enqueued_at: Option, + /// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub after_enqueued_at: Option, + /// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub before_started_at: Option, + /// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub after_started_at: Option, + /// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub before_finished_at: Option, + /// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub after_finished_at: Option, +} + +impl Query { + /// Return `true` if every field of the query is set to `None`, such that the query + /// matches all tasks. + pub fn is_empty(&self) -> bool { + matches!( + self, + Query { + limit: None, + from: None, + reverse: None, + uids: None, + batch_uids: None, + statuses: None, + types: None, + index_uids: None, + canceled_by: None, + before_enqueued_at: None, + after_enqueued_at: None, + before_started_at: None, + after_started_at: None, + before_finished_at: None, + after_finished_at: None, + } + ) + } + + /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. + pub fn with_index(self, index_uid: String) -> Self { + let mut index_vec = self.index_uids.unwrap_or_default(); + index_vec.push(index_uid); + Self { index_uids: Some(index_vec), ..self } + } + + // Removes the `from` and `limit` restrictions from the query. + // Useful to get the total number of tasks matching a filter. + pub fn without_limits(self) -> Self { + Query { limit: None, from: None, ..self } + } +} + +/// Structure which holds meilisearch's indexes and schedules the tasks +/// to be performed on them. +pub struct Queue { + pub(crate) tasks: tasks::TaskQueue, + pub(crate) batches: batches::BatchQueue, + + /// Matches a batch id with the associated task ids. + pub(crate) batch_to_tasks_mapping: Database, + + /// The list of files referenced by the tasks. + pub(crate) file_store: FileStore, + + /// The max number of tasks allowed before the scheduler starts to delete + /// the finished tasks automatically. + pub(crate) max_number_of_tasks: usize, +} + +impl Queue { + pub(crate) fn private_clone(&self) -> Queue { + Queue { + tasks: self.tasks.private_clone(), + batches: self.batches.private_clone(), + batch_to_tasks_mapping: self.batch_to_tasks_mapping, + file_store: self.file_store.clone(), + max_number_of_tasks: self.max_number_of_tasks, + } + } + + /// Create an index scheduler and start its run loop. + pub(crate) fn new( + env: &Env, + wtxn: &mut RwTxn, + options: &IndexSchedulerOptions, + ) -> Result { + // allow unreachable_code to get rids of the warning in the case of a test build. + Ok(Self { + file_store: FileStore::new(&options.update_file_path)?, + batch_to_tasks_mapping: env + .create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?, + tasks: TaskQueue::new(env, wtxn)?, + batches: BatchQueue::new(env, wtxn)?, + max_number_of_tasks: options.max_number_of_tasks, + }) + } + + /// Returns the whole set of tasks that belongs to this batch. + pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result { + Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default()) + } + + /// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks. + /// + /// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown. + pub(crate) fn get_existing_tasks_for_processing_batch( + &self, + rtxn: &RoTxn, + processing_batch: &mut ProcessingBatch, + tasks: impl IntoIterator, + ) -> Result> { + tasks + .into_iter() + .map(|task_id| { + let mut task = self + .tasks + .get_task(rtxn, task_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)); + processing_batch.processing(&mut task); + task + }) + .collect::>() + } + + pub(crate) fn write_batch( + &self, + wtxn: &mut RwTxn, + batch: ProcessingBatch, + tasks: &RoaringBitmap, + ) -> Result<()> { + self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?; + self.batches.write_batch(wtxn, batch)?; + Ok(()) + } + + pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { + match task.content_uuid() { + Some(content_file) => self.delete_update_file(content_file), + None => Ok(()), + } + } + + /// Delete a file from the index scheduler. + /// + /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. + pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> { + Ok(self.file_store.delete(uuid)?) + } + + /// Create a file and register it in the index scheduler. + /// + /// The returned file and uuid can be used to associate + /// some data to a task. The file will be kept until + /// the task has been fully processed. + pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { + if dry_run { + Ok((Uuid::nil(), file_store::File::dry_file()?)) + } else { + Ok(self.file_store.new_update()?) + } + } + + #[cfg(test)] + pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { + Ok(self.file_store.new_update_with_uuid(uuid)?) + } + + /// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes. + pub fn compute_update_file_size(&self) -> Result { + Ok(self.file_store.compute_total_size()?) + } + + pub fn register( + &self, + wtxn: &mut RwTxn, + kind: &KindWithContent, + task_id: Option, + dry_run: bool, + ) -> Result { + let next_task_id = self.tasks.next_task_id(wtxn)?; + + if let Some(uid) = task_id { + if uid < next_task_id { + return Err(Error::BadTaskId { received: uid, expected: next_task_id }); + } + } + + let mut task = Task { + uid: task_id.unwrap_or(next_task_id), + // The batch is defined once we starts processing the task + batch_uid: None, + enqueued_at: OffsetDateTime::now_utc(), + started_at: None, + finished_at: None, + error: None, + canceled_by: None, + details: kind.default_details(), + status: Status::Enqueued, + kind: kind.clone(), + }; + // For deletion and cancelation tasks, we want to make extra sure that they + // don't attempt to delete/cancel tasks that are newer than themselves. + filter_out_references_to_newer_tasks(&mut task); + // If the register task is an index swap task, verify that it is well-formed + // (that it does not contain duplicate indexes). + check_index_swap_validity(&task)?; + + // At this point the task is going to be registered and no further checks will be done + if dry_run { + return Ok(task); + } + + // Get rid of the mutability. + let task = task; + self.tasks.register(wtxn, &task)?; + + Ok(task) + } + + /// Register a task to cleanup the task queue if needed + pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> { + let nb_tasks = self.tasks.all_task_ids(wtxn)?.len(); + // if we have less than 1M tasks everything is fine + if nb_tasks < self.max_number_of_tasks as u64 { + return Ok(()); + } + + let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default() + | self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default() + | self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default(); + + let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000)); + + // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete + // the deletion tasks we enqueued ourselves. + if to_delete.len() < 2 { + tracing::warn!("The task queue is almost full, but no task can be deleted yet."); + // the only thing we can do is hope that the user tasks are going to finish + return Ok(()); + } + + tracing::info!( + "The task queue is almost full. Deleting the oldest {} finished tasks.", + to_delete.len() + ); + + // it's safe to unwrap here because we checked the len above + let newest_task_id = to_delete.iter().last().unwrap(); + let last_task_to_delete = + self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. + let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); + + self.register( + wtxn, + &KindWithContent::TaskDeletion { + query: format!( + "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", + delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, + ), + tasks: to_delete, + }, + None, + false, + )?; + + Ok(()) + } + + pub fn get_stats( + &self, + rtxn: &RoTxn, + processing: &ProcessingTasks, + ) -> Result>> { + let mut res = BTreeMap::new(); + let processing_tasks = processing.processing.len(); + + res.insert( + "statuses".to_string(), + enum_iterator::all::() + .map(|s| { + let tasks = self.tasks.get_status(rtxn, s)?.len(); + match s { + Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)), + Status::Processing => Ok((s.to_string(), processing_tasks)), + s => Ok((s.to_string(), tasks)), + } + }) + .collect::>>()?, + ); + res.insert( + "types".to_string(), + enum_iterator::all::() + .map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len()))) + .collect::>>()?, + ); + res.insert( + "indexes".to_string(), + self.tasks + .index_tasks + .iter(rtxn)? + .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?)) + .collect::>>()?, + ); + + Ok(res) + } +} diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap index ea3a75e8f..410f46929 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_canceled_by/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap index 9f5c7e4ad..27a641b59 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/processed_all_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_first_task.snap index 64503a754..74c4c4a33 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_second_task.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_second_task.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_second_task.snap index 171f6dab4..411e82ea0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_second_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_third_task.snap index f811b99a6..4c76db95e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_from_and_limit/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap index bf5d0528c..6d899b270 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/after-advancing-a-bit.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap index cbb780494..314f5b067 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/end.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/start.snap index 78a6c4228..6dc897dfa 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_simple/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/after-processing-everything.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/after-processing-everything.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap index 31a08e88b..f40322ac0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/after-processing-everything.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/after-processing-everything.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/start.snap b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/start.snap rename to crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/start.snap index 30f62c526..1184c197f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_special_rules/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/batches_test.rs/query_batches_special_rules/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/batches_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_canceled_by/start.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_canceled_by/start.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap index ea3a75e8f..165d7c4fe 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_canceled_by/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_canceled_by/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/processed_all_tasks.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/processed_all_tasks.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap index 9f5c7e4ad..079972755 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/processed_all_tasks.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/processed_all_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_first_task.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_first_task.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_first_task.snap index 64503a754..4f9ffb209 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_first_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_second_task.snap index 171f6dab4..eb6b0e7ec 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_third_task.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_third_task.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_third_task.snap index f811b99a6..181f0308c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_from_and_limit/registered_the_third_task.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_from_and_limit/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/end.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/end.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap index cbb780494..3ed017700 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/end.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/end.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/start.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/start.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/start.snap index 78a6c4228..268f463aa 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_simple/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_special_rules/start.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap rename to crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_special_rules/start.snap index 30f62c526..60c041c05 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap +++ b/crates/index-scheduler/src/queue/snapshots/tasks_test.rs/query_tasks_special_rules/start.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/tasks_test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/register/everything_is_successfully_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/register/everything_is_successfully_registered.snap index 8341d947d..e4d9af541 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/register/everything_is_successfully_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap similarity index 94% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap index 03213fbb0..30e8e17a8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap similarity index 91% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap index cc38f69a0..76f88a13f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap index 3400d8950..4e3fb5439 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap similarity index 95% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap index ab4210bed..4cabce94b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap index 8b69b1cc2..5565994cb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap index 8b69b1cc2..5565994cb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap rename to crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap index 8b69b1cc2..5565994cb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap +++ b/crates/index-scheduler/src/queue/snapshots/test.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/queue/test.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/queue/tasks.rs b/crates/index-scheduler/src/queue/tasks.rs new file mode 100644 index 000000000..bb6930b4b --- /dev/null +++ b/crates/index-scheduler/src/queue/tasks.rs @@ -0,0 +1,514 @@ +use std::ops::{Bound, RangeBounds}; + +use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, Status, Task}; +use roaring::{MultiOps, RoaringBitmap}; +use time::OffsetDateTime; + +use super::{Query, Queue}; +use crate::processing::ProcessingTasks; +use crate::utils::{self, insert_task_datetime, keep_ids_within_datetimes, map_bound}; +use crate::{Error, Result, TaskId, BEI128}; + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const ALL_TASKS: &str = "all-tasks"; + pub const STATUS: &str = "status"; + pub const KIND: &str = "kind"; + pub const INDEX_TASKS: &str = "index-tasks"; + pub const CANCELED_BY: &str = "canceled_by"; + pub const ENQUEUED_AT: &str = "enqueued-at"; + pub const STARTED_AT: &str = "started-at"; + pub const FINISHED_AT: &str = "finished-at"; +} + +pub struct TaskQueue { + /// The main database, it contains all the tasks accessible by their Id. + pub(crate) all_tasks: Database>, + + /// All the tasks ids grouped by their status. + // TODO we should not be able to serialize a `Status::Processing` in this database. + pub(crate) status: Database, RoaringBitmapCodec>, + /// All the tasks ids grouped by their kind. + pub(crate) kind: Database, RoaringBitmapCodec>, + /// Store the tasks associated to an index. + pub(crate) index_tasks: Database, + /// Store the tasks that were canceled by a task uid + pub(crate) canceled_by: Database, + /// Store the task ids of tasks which were enqueued at a specific date + pub(crate) enqueued_at: Database, + /// Store the task ids of finished tasks which started being processed at a specific date + pub(crate) started_at: Database, + /// Store the task ids of tasks which finished at a specific date + pub(crate) finished_at: Database, +} + +impl TaskQueue { + pub(crate) fn private_clone(&self) -> TaskQueue { + TaskQueue { + all_tasks: self.all_tasks, + status: self.status, + kind: self.kind, + index_tasks: self.index_tasks, + canceled_by: self.canceled_by, + enqueued_at: self.enqueued_at, + started_at: self.started_at, + finished_at: self.finished_at, + } + } + + pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result { + Ok(Self { + all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?, + status: env.create_database(wtxn, Some(db_name::STATUS))?, + kind: env.create_database(wtxn, Some(db_name::KIND))?, + index_tasks: env.create_database(wtxn, Some(db_name::INDEX_TASKS))?, + canceled_by: env.create_database(wtxn, Some(db_name::CANCELED_BY))?, + enqueued_at: env.create_database(wtxn, Some(db_name::ENQUEUED_AT))?, + started_at: env.create_database(wtxn, Some(db_name::STARTED_AT))?, + finished_at: env.create_database(wtxn, Some(db_name::FINISHED_AT))?, + }) + } + + pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { + Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k + 1)) + } + + pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { + Ok(self.last_task_id(rtxn)?.unwrap_or_default()) + } + + pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result { + enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() + } + + pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { + Ok(self.all_tasks.get(rtxn, &task_id)?) + } + + pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { + let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; + + debug_assert!(old_task != *task); + debug_assert_eq!(old_task.uid, task.uid); + debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some()); + + if old_task.status != task.status { + self.update_status(wtxn, old_task.status, |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_status(wtxn, task.status, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + if old_task.kind.as_kind() != task.kind.as_kind() { + self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + } + + assert_eq!( + old_task.enqueued_at, task.enqueued_at, + "Cannot update a task's enqueued_at time" + ); + if old_task.started_at != task.started_at { + assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time"); + if let Some(started_at) = task.started_at { + insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?; + } + } + if old_task.finished_at != task.finished_at { + assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time"); + if let Some(finished_at) = task.finished_at { + insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; + } + } + + self.all_tasks.put(wtxn, &task.uid, task)?; + Ok(()) + } + + /// Returns the whole set of tasks that belongs to this index. + pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result { + Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) + } + + pub(crate) fn update_index( + &self, + wtxn: &mut RwTxn, + index: &str, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.index_tasks(wtxn, index)?; + f(&mut tasks); + if tasks.is_empty() { + self.index_tasks.delete(wtxn, index)?; + } else { + self.index_tasks.put(wtxn, index, &tasks)?; + } + + Ok(()) + } + + pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { + Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) + } + + pub(crate) fn put_status( + &self, + wtxn: &mut RwTxn, + status: Status, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.status.put(wtxn, &status, bitmap)?) + } + + pub(crate) fn update_status( + &self, + wtxn: &mut RwTxn, + status: Status, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_status(wtxn, status)?; + f(&mut tasks); + self.put_status(wtxn, status, &tasks)?; + + Ok(()) + } + + pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { + Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) + } + + pub(crate) fn put_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.kind.put(wtxn, &kind, bitmap)?) + } + + pub(crate) fn update_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_kind(wtxn, kind)?; + f(&mut tasks); + self.put_kind(wtxn, kind, &tasks)?; + + Ok(()) + } + + /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a + /// `CorruptedTaskQueue` error will be thrown. + pub(crate) fn get_existing_tasks( + &self, + rtxn: &RoTxn, + tasks: impl IntoIterator, + ) -> Result> { + tasks + .into_iter() + .map(|task_id| { + self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + }) + .collect::>() + } + + pub(crate) fn register(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { + self.all_tasks.put(wtxn, &task.uid, task)?; + + for index in task.indexes() { + self.update_index(wtxn, index, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + self.update_status(wtxn, Status::Enqueued, |bitmap| { + bitmap.insert(task.uid); + })?; + + self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + + utils::insert_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; + + Ok(()) + } +} + +impl Queue { + /// Return the task ids matched by the given query from the index scheduler's point of view. + pub(crate) fn get_task_ids( + &self, + rtxn: &RoTxn, + query: &Query, + processing_tasks: &ProcessingTasks, + ) -> Result { + let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } = + processing_tasks; + let Query { + limit, + from, + reverse, + uids, + batch_uids, + statuses, + types, + index_uids, + canceled_by, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, + } = query; + + let mut tasks = self.tasks.all_task_ids(rtxn)?; + + if let Some(from) = from { + let range = if reverse.unwrap_or_default() { + u32::MIN..*from + } else { + from.saturating_add(1)..u32::MAX + }; + tasks.remove_range(range); + } + + if let Some(batch_uids) = batch_uids { + let mut batch_tasks = RoaringBitmap::new(); + for batch_uid in batch_uids { + if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { + batch_tasks |= &**processing_tasks; + } else { + batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; + } + } + tasks &= batch_tasks; + } + + if let Some(status) = statuses { + let mut status_tasks = RoaringBitmap::new(); + for status in status { + match status { + // special case for Processing tasks + Status::Processing => { + status_tasks |= &**processing_tasks; + } + status => status_tasks |= &self.tasks.get_status(rtxn, *status)?, + }; + } + if !status.contains(&Status::Processing) { + tasks -= &**processing_tasks; + } + tasks &= status_tasks; + } + + if let Some(uids) = uids { + let uids = RoaringBitmap::from_iter(uids); + tasks &= &uids; + } + + if let Some(canceled_by) = canceled_by { + let mut all_canceled_tasks = RoaringBitmap::new(); + for cancel_task_uid in canceled_by { + if let Some(canceled_by_uid) = self.tasks.canceled_by.get(rtxn, cancel_task_uid)? { + all_canceled_tasks |= canceled_by_uid; + } + } + + // if the canceled_by has been specified but no task + // matches then we prefer matching zero than all tasks. + if all_canceled_tasks.is_empty() { + return Ok(RoaringBitmap::new()); + } else { + tasks &= all_canceled_tasks; + } + } + + if let Some(kind) = types { + let mut kind_tasks = RoaringBitmap::new(); + for kind in kind { + kind_tasks |= self.tasks.get_kind(rtxn, *kind)?; + } + tasks &= &kind_tasks; + } + + if let Some(index) = index_uids { + let mut index_tasks = RoaringBitmap::new(); + for index in index { + index_tasks |= self.tasks.index_tasks(rtxn, index)?; + } + tasks &= &index_tasks; + } + + // For the started_at filter, we need to treat the part of the tasks that are processing from the part of the + // tasks that are not processing. The non-processing ones are filtered normally while the processing ones + // are entirely removed unless the in-memory startedAt variable falls within the date filter. + // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. + tasks = { + let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = + (&tasks - &**processing_tasks, &tasks & &**processing_tasks); + + // special case for Processing tasks + // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds + let mut clear_filtered_processing_tasks = + |start: Bound, end: Bound| { + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); + let is_within_dates = RangeBounds::contains( + &(start, end), + &processing_batch + .as_ref() + .map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at) + .unix_timestamp_nanos(), + ); + if !is_within_dates { + filtered_processing_tasks.clear(); + } + }; + match (after_started_at, before_started_at) { + (None, None) => (), + (None, Some(before)) => { + clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before)) + } + (Some(after), None) => { + clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded) + } + (Some(after), Some(before)) => clear_filtered_processing_tasks( + Bound::Excluded(*after), + Bound::Excluded(*before), + ), + }; + + keep_ids_within_datetimes( + rtxn, + &mut filtered_non_processing_tasks, + self.tasks.started_at, + *after_started_at, + *before_started_at, + )?; + filtered_non_processing_tasks | filtered_processing_tasks + }; + + keep_ids_within_datetimes( + rtxn, + &mut tasks, + self.tasks.enqueued_at, + *after_enqueued_at, + *before_enqueued_at, + )?; + + keep_ids_within_datetimes( + rtxn, + &mut tasks, + self.tasks.finished_at, + *after_finished_at, + *before_finished_at, + )?; + + if let Some(limit) = limit { + tasks = if query.reverse.unwrap_or_default() { + tasks.into_iter().take(*limit as usize).collect() + } else { + tasks.into_iter().rev().take(*limit as usize).collect() + }; + } + + Ok(tasks) + } + + pub(crate) fn get_task_ids_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing_tasks: &ProcessingTasks, + ) -> Result<(RoaringBitmap, u64)> { + // compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching + // the filter. + // As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares + // us from modifying the underlying implementation, and the performance remains sufficient. + // Should this change, we would modify `get_task_ids` to directly return the number of matching tasks. + let total_tasks = + self.get_task_ids(rtxn, &query.clone().without_limits(), processing_tasks)?; + let mut tasks = self.get_task_ids(rtxn, query, processing_tasks)?; + + // If the query contains a list of index uid or there is a finite list of authorized indexes, + // then we must exclude all the kinds that aren't associated to one and only one index. + if query.index_uids.is_some() || !filters.all_indexes_authorized() { + for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { + tasks -= self.tasks.get_kind(rtxn, kind)?; + } + } + + // Any task that is internally associated with a non-authorized index + // must be discarded. + if !filters.all_indexes_authorized() { + let all_indexes_iter = self.tasks.index_tasks.iter(rtxn)?; + for result in all_indexes_iter { + let (index, index_tasks) = result?; + if !filters.is_index_authorized(index) { + tasks -= index_tasks; + } + } + } + + Ok((tasks, total_tasks.len())) + } + + pub(crate) fn get_tasks_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + filters: &meilisearch_auth::AuthFilter, + processing_tasks: &ProcessingTasks, + ) -> Result<(Vec, u64)> { + let (tasks, total) = + self.get_task_ids_from_authorized_indexes(rtxn, query, filters, processing_tasks)?; + let tasks = if query.reverse.unwrap_or_default() { + Box::new(tasks.into_iter()) as Box> + } else { + Box::new(tasks.into_iter().rev()) as Box> + }; + let tasks = self + .tasks + .get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?; + + let ProcessingTasks { batch, processing, progress: _ } = processing_tasks; + + let ret = tasks.into_iter(); + if processing.is_empty() || batch.is_none() { + Ok((ret.collect(), total)) + } else { + // Safe because we ensured there was a batch in the previous branch + let batch = batch.as_ref().unwrap(); + Ok(( + ret.map(|task| { + if processing.contains(task.uid) { + Task { + status: Status::Processing, + batch_uid: Some(batch.uid), + started_at: Some(batch.started_at), + ..task + } + } else { + task + } + }) + .collect(), + total, + )) + } + } +} diff --git a/crates/index-scheduler/src/queue/tasks_test.rs b/crates/index-scheduler/src/queue/tasks_test.rs new file mode 100644 index 000000000..d60d621d1 --- /dev/null +++ b/crates/index-scheduler/src/queue/tasks_test.rs @@ -0,0 +1,441 @@ +use meili_snap::snapshot; +use meilisearch_auth::AuthFilter; +use meilisearch_types::index_uid_pattern::IndexUidPattern; +use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status}; +use time::{Duration, OffsetDateTime}; + +use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, FailureLocation}; +use crate::{IndexScheduler, Query}; + +#[test] +fn query_tasks_from_and_limit() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("doggo", "bone"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + let kind = index_creation_task("whalo", "plankton"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + let kind = index_creation_task("catto", "his_own_vomit"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_n_successful_batches(3); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let processing = index_scheduler.processing_tasks.read().unwrap(); + let query = Query { limit: Some(0), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { limit: Some(1), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { limit: Some(2), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); + + let query = Query { from: Some(1), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { from: Some(2), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); + + let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + + let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); +} + +#[test] +fn query_tasks_simple() { + let start_time = OffsetDateTime::now_utc(); + + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick + + let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick + + let query = + Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test, which should excludes the enqueued tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should excludes all of them + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should exclude the enqueued tasks and include the only processing task + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let second_start_time = OffsetDateTime::now_utc(); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should include all tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the second part of the test and before one minute after the + // second start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // we run the same query to verify that, and indeed find that the last task is matched + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // enqueued, succeeded, or processing tasks started after the second part of the test, should + // again only return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + + // now the last task should have failed + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // so running the last query should return nothing + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![1]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with an invalid uid + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![2]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let (tasks, _) = index_scheduler + .get_task_ids_from_authorized_indexes(&query, &AuthFilter::default()) + .unwrap(); + // same query but with a valid uid + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); +} + +#[test] +fn query_tasks_special_rules() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // only the first task associated with catto is returned, the indexSwap tasks are excluded! + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks + // associated with doggo -> empty result + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query::default(); + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks + // -> only the index creation of doggo should be returned + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + + let query = Query::default(); + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![ + IndexUidPattern::new_unchecked("catto"), + IndexUidPattern::new_unchecked("doggo"), + ] + .into_iter() + .collect(), + ), + &proc, + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks + // -> all tasks except the swap of catto with whalo are returned + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query::default(); + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // we asked for all the tasks with all index authorized -> all tasks returned + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]"); +} + +#[test] +fn query_tasks_canceled_by() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _ = index_scheduler.register(kind, None, false).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind, None, false).unwrap(); + + handle.advance_n_successful_batches(1); + let kind = KindWithContent::TaskCancelation { + query: "test_query".to_string(), + tasks: [0, 1, 2, 3].into_iter().collect(), + }; + let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + let rtxn = index_scheduler.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc) + .unwrap(); + // 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the + // taskCancelation itself + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); + + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let (tasks, _) = index_scheduler + .queue + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), + &proc, + ) + .unwrap(); + // Return only 1 because the user is not authorized to see task 2 + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); +} diff --git a/crates/index-scheduler/src/queue/test.rs b/crates/index-scheduler/src/queue/test.rs new file mode 100644 index 000000000..5a886b088 --- /dev/null +++ b/crates/index-scheduler/src/queue/test.rs @@ -0,0 +1,395 @@ +use big_s::S; +use meili_snap::{json_string, snapshot}; +use meilisearch_types::error::ErrorCode; +use meilisearch_types::tasks::{KindWithContent, Status}; +use roaring::RoaringBitmap; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, replace_document_import_task}; +use crate::{IndexScheduler, Query}; + +#[test] +fn register() { + // In this test, the handle doesn't make any progress, we only check that the tasks are registered + let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]); + + let kinds = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, 12), + replace_document_import_task("catto", None, 1, 50), + replace_document_import_task("doggo", Some("bone"), 2, 5000), + ]; + let (_, file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.queue.create_update_file_with_uuid(1).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.queue.create_update_file_with_uuid(2).unwrap(); + file.persist().unwrap(); + + for (idx, kind) in kinds.into_iter().enumerate() { + let k = kind.as_kind(); + let task = index_scheduler.register(kind, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + + assert_eq!(task.uid, idx as u32); + assert_eq!(task.status, Status::Enqueued); + assert_eq!(task.kind.as_kind(), k); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered"); +} + +#[test] +fn dry_run() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, true).unwrap(); + snapshot!(task.uid, @"0"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r" + ### Autobatching Enabled = true + ### Processing batch None: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### All Batches: + ---------------------------------------------------------------------- + ### Batch to tasks mapping: + ---------------------------------------------------------------------- + ### Batches Status: + ---------------------------------------------------------------------- + ### Batches Kind: + ---------------------------------------------------------------------- + ### Batches Index Tasks: + ---------------------------------------------------------------------- + ### Batches Enqueued At: + ---------------------------------------------------------------------- + ### Batches Started At: + ---------------------------------------------------------------------- + ### Batches Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), true).unwrap(); + snapshot!(task.uid, @"12"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r" + ### Autobatching Enabled = true + ### Processing batch None: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### All Batches: + ---------------------------------------------------------------------- + ### Batch to tasks mapping: + ---------------------------------------------------------------------- + ### Batches Status: + ---------------------------------------------------------------------- + ### Batches Kind: + ---------------------------------------------------------------------- + ### Batches Index Tasks: + ---------------------------------------------------------------------- + ### Batches Enqueued At: + ---------------------------------------------------------------------- + ### Batches Started At: + ---------------------------------------------------------------------- + ### Batches Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "); +} + +#[test] +fn basic_set_taskid() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(task.uid, @"0"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), false).unwrap(); + snapshot!(task.uid, @"12"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); + snapshot!(error, @"Received bad task id: 5 should be >= to 13."); +} + +#[test] +fn test_disable_auto_deletion_of_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { + config.cleanup_enabled = false; + config.max_number_of_tasks = 2; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); + + // at this point the max number of tasks is reached + // we can still enqueue multiple tasks + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + drop(rtxn); + drop(proc); + + // now we're above the max number of tasks + // and if we try to advance in the tick function no new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); + drop(rtxn); + drop(proc); +} + +#[test] +fn test_auto_deletion_of_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { + config.max_number_of_tasks = 2; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); + + // at this point the max number of tasks is reached + // we can still enqueue multiple tasks + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + drop(rtxn); + drop(proc); + + // now we're above the max number of tasks + // and if we try to advance in the tick function a new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued"); + drop(rtxn); + drop(proc); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed"); + drop(rtxn); + drop(proc); + + handle.advance_one_failed_batch(); + // a new task deletion has been enqueued + handle.advance_one_successful_batch(); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion"); + drop(rtxn); + drop(proc); + + handle.advance_one_failed_batch(); + handle.advance_one_successful_batch(); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let proc = index_scheduler.processing_tasks.read().unwrap(); + let tasks = + index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap(); + let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed"); + drop(rtxn); + drop(proc); +} + +#[test] +fn test_task_queue_is_full() { + let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { + // that's the minimum map size possible + config.task_db_size = 1048576; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + // on average this task takes ~600 bytes + loop { + let result = index_scheduler.register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ); + if result.is_err() { + break; + } + handle.advance_one_failed_batch(); + } + index_scheduler.assert_internally_consistent(); + + // at this point the task DB shoud have reached its limit and we should not be able to register new tasks + let result = index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap_err(); + snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); + // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code + snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); + + // Even the task deletion that doesn't delete anything shouldn't be accepted + let result = index_scheduler + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, + None, + false, + ) + .unwrap_err(); + snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); + // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code + snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice"); + + // But a task deletion that delete something should works + index_scheduler + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + // Now we should be able to enqueue a few tasks again + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); +} diff --git a/crates/index-scheduler/src/autobatcher.rs b/crates/index-scheduler/src/scheduler/autobatcher.rs similarity index 99% rename from crates/index-scheduler/src/autobatcher.rs rename to crates/index-scheduler/src/scheduler/autobatcher.rs index 5950e2b13..6e05d4dda 100644 --- a/crates/index-scheduler/src/autobatcher.rs +++ b/crates/index-scheduler/src/scheduler/autobatcher.rs @@ -519,7 +519,14 @@ mod tests { use uuid::Uuid; use super::*; - use crate::debug_snapshot; + + #[macro_export] + macro_rules! debug_snapshot { + ($value:expr, @$snapshot:literal) => {{ + let value = format!("{:?}", $value); + meili_snap::snapshot!(value, @$snapshot); + }}; + } fn autobatch_from( index_already_exists: bool, diff --git a/crates/index-scheduler/src/scheduler/create_batch.rs b/crates/index-scheduler/src/scheduler/create_batch.rs new file mode 100644 index 000000000..e9755c1a7 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/create_batch.rs @@ -0,0 +1,530 @@ +use std::fmt; + +use meilisearch_types::heed::RoTxn; +use meilisearch_types::milli::update::IndexDocumentsMethod; +use meilisearch_types::settings::{Settings, Unchecked}; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use uuid::Uuid; + +use super::autobatcher::{self, BatchKind}; +use crate::utils::ProcessingBatch; +use crate::{Error, IndexScheduler, Result}; + +/// Represents a combination of tasks that can all be processed at the same time. +/// +/// A batch contains the set of tasks that it represents (accessible through +/// [`self.ids()`](Batch::ids)), as well as additional information on how to +/// be processed. +#[derive(Debug)] +pub(crate) enum Batch { + TaskCancelation { + /// The task cancelation itself. + task: Task, + }, + TaskDeletions(Vec), + SnapshotCreation(Vec), + Dump(Task), + IndexOperation { + op: IndexOperation, + must_create_index: bool, + }, + IndexCreation { + index_uid: String, + primary_key: Option, + task: Task, + }, + IndexUpdate { + index_uid: String, + primary_key: Option, + task: Task, + }, + IndexDeletion { + index_uid: String, + tasks: Vec, + index_has_been_created: bool, + }, + IndexSwap { + task: Task, + }, +} + +#[derive(Debug)] +pub(crate) enum DocumentOperation { + Add(Uuid), + Delete(Vec), +} + +/// A [batch](Batch) that combines multiple tasks operating on an index. +#[derive(Debug)] +pub(crate) enum IndexOperation { + DocumentOperation { + index_uid: String, + primary_key: Option, + method: IndexDocumentsMethod, + operations: Vec, + tasks: Vec, + }, + DocumentEdition { + index_uid: String, + task: Task, + }, + DocumentDeletion { + index_uid: String, + tasks: Vec, + }, + DocumentClear { + index_uid: String, + tasks: Vec, + }, + Settings { + index_uid: String, + // The boolean indicates if it's a settings deletion or creation. + settings: Vec<(bool, Settings)>, + tasks: Vec, + }, + DocumentClearAndSetting { + index_uid: String, + cleared_tasks: Vec, + + // The boolean indicates if it's a settings deletion or creation. + settings: Vec<(bool, Settings)>, + settings_tasks: Vec, + }, +} + +impl Batch { + /// Return the task ids associated with this batch. + pub fn ids(&self) -> RoaringBitmap { + match self { + Batch::TaskCancelation { task, .. } + | Batch::Dump(task) + | Batch::IndexCreation { task, .. } + | Batch::IndexUpdate { task, .. } => { + RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() + } + Batch::SnapshotCreation(tasks) + | Batch::TaskDeletions(tasks) + | Batch::IndexDeletion { tasks, .. } => { + RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) + } + Batch::IndexOperation { op, .. } => match op { + IndexOperation::DocumentOperation { tasks, .. } + | IndexOperation::Settings { tasks, .. } + | IndexOperation::DocumentDeletion { tasks, .. } + | IndexOperation::DocumentClear { tasks, .. } => { + RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) + } + IndexOperation::DocumentEdition { task, .. } => { + RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() + } + IndexOperation::DocumentClearAndSetting { + cleared_tasks: tasks, + settings_tasks: other, + .. + } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)), + }, + Batch::IndexSwap { task } => { + RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() + } + } + } + + /// Return the index UID associated with this batch + pub fn index_uid(&self) -> Option<&str> { + use Batch::*; + match self { + TaskCancelation { .. } + | TaskDeletions(_) + | SnapshotCreation(_) + | Dump(_) + | IndexSwap { .. } => None, + IndexOperation { op, .. } => Some(op.index_uid()), + IndexCreation { index_uid, .. } + | IndexUpdate { index_uid, .. } + | IndexDeletion { index_uid, .. } => Some(index_uid), + } + } +} + +impl fmt::Display for Batch { + /// A text used when we debug the profiling reports. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let index_uid = self.index_uid(); + let tasks = self.ids(); + match self { + Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, + Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?, + Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, + Batch::Dump(_) => f.write_str("Dump")?, + Batch::IndexOperation { op, .. } => write!(f, "{op}")?, + Batch::IndexCreation { .. } => f.write_str("IndexCreation")?, + Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, + Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, + Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, + }; + match index_uid { + Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), + None => f.write_fmt(format_args!(" from tasks: {tasks:?}")), + } + } +} + +impl IndexOperation { + pub fn index_uid(&self) -> &str { + match self { + IndexOperation::DocumentOperation { index_uid, .. } + | IndexOperation::DocumentEdition { index_uid, .. } + | IndexOperation::DocumentDeletion { index_uid, .. } + | IndexOperation::DocumentClear { index_uid, .. } + | IndexOperation::Settings { index_uid, .. } + | IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid, + } + } +} + +impl fmt::Display for IndexOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IndexOperation::DocumentOperation { .. } => { + f.write_str("IndexOperation::DocumentOperation") + } + IndexOperation::DocumentEdition { .. } => { + f.write_str("IndexOperation::DocumentEdition") + } + IndexOperation::DocumentDeletion { .. } => { + f.write_str("IndexOperation::DocumentDeletion") + } + IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), + IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), + IndexOperation::DocumentClearAndSetting { .. } => { + f.write_str("IndexOperation::DocumentClearAndSetting") + } + } + } +} + +impl IndexScheduler { + /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. + /// + /// ## Arguments + /// - `rtxn`: read transaction + /// - `index_uid`: name of the index affected by the operations of the autobatch + /// - `batch`: the result of the autobatcher + pub(crate) fn create_next_batch_index( + &self, + rtxn: &RoTxn, + index_uid: String, + batch: BatchKind, + current_batch: &mut ProcessingBatch, + must_create_index: bool, + ) -> Result> { + match batch { + BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClear { + tasks: self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + ids, + )?, + index_uid, + }, + must_create_index, + })), + BatchKind::DocumentEdition { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + match &task.kind { + KindWithContent::DocumentEdition { index_uid, .. } => { + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentEdition { + index_uid: index_uid.clone(), + task, + }, + must_create_index: false, + })) + } + _ => unreachable!(), + } + } + BatchKind::DocumentOperation { method, operation_ids, .. } => { + let tasks = self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + operation_ids, + )?; + let primary_key = tasks + .iter() + .find_map(|task| match task.kind { + KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => { + // we want to stop on the first document addition + Some(primary_key.clone()) + } + KindWithContent::DocumentDeletion { .. } => None, + _ => unreachable!(), + }) + .flatten(); + + let mut operations = Vec::new(); + + for task in tasks.iter() { + match task.kind { + KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => { + operations.push(DocumentOperation::Add(content_file)); + } + KindWithContent::DocumentDeletion { ref documents_ids, .. } => { + operations.push(DocumentOperation::Delete(documents_ids.clone())); + } + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentOperation { + index_uid, + primary_key, + method, + operations, + tasks, + }, + must_create_index, + })) + } + BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => { + let tasks = self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + deletion_ids, + )?; + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentDeletion { index_uid, tasks }, + must_create_index, + })) + } + BatchKind::Settings { settings_ids, .. } => { + let tasks = self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + settings_ids, + )?; + + let mut settings = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::SettingsUpdate { + ref new_settings, is_deletion, .. + } => settings.push((is_deletion, *new_settings.clone())), + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks }, + must_create_index, + })) + } + BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => { + let (index_uid, settings, settings_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::Settings { settings_ids, allow_index_creation }, + current_batch, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks, .. }, + .. + } => (index_uid, settings, tasks), + _ => unreachable!(), + }; + let (index_uid, cleared_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::DocumentClear { ids: other }, + current_batch, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::DocumentClear { index_uid, tasks }, + .. + } => (index_uid, tasks), + _ => unreachable!(), + }; + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + }, + must_create_index, + })) + } + BatchKind::IndexCreation { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + let (index_uid, primary_key) = match &task.kind { + KindWithContent::IndexCreation { index_uid, primary_key } => { + (index_uid.clone(), primary_key.clone()) + } + _ => unreachable!(), + }; + Ok(Some(Batch::IndexCreation { index_uid, primary_key, task })) + } + BatchKind::IndexUpdate { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + let primary_key = match &task.kind { + KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(), + _ => unreachable!(), + }; + Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task })) + } + BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion { + index_uid, + index_has_been_created: must_create_index, + tasks: self.queue.get_existing_tasks_for_processing_batch( + rtxn, + current_batch, + ids, + )?, + })), + BatchKind::IndexSwap { id } => { + let mut task = + self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + Ok(Some(Batch::IndexSwap { task })) + } + } + } + + /// Create the next batch to be processed; + /// 1. We get the *last* task to cancel. + /// 2. We get the *next* task to delete. + /// 3. We get the *next* snapshot to process. + /// 4. We get the *next* dump to process. + /// 5. We get the *next* tasks to process for a specific index. + #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] + pub(crate) fn create_next_batch( + &self, + rtxn: &RoTxn, + ) -> Result> { + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?; + + let batch_id = self.queue.batches.next_batch_id(rtxn)?; + let mut current_batch = ProcessingBatch::new(batch_id); + + let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?; + let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; + + // 1. we get the last task to cancel. + if let Some(task_id) = to_cancel.max() { + let mut task = + self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + return Ok(Some((Batch::TaskCancelation { task }, current_batch))); + } + + // 2. we get the next task to delete + let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; + if !to_delete.is_empty() { + let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?; + current_batch.processing(&mut tasks); + return Ok(Some((Batch::TaskDeletions(tasks), current_batch))); + } + + // 3. we batch the snapshot. + let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; + if !to_snapshot.is_empty() { + let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?; + current_batch.processing(&mut tasks); + return Ok(Some((Batch::SnapshotCreation(tasks), current_batch))); + } + + // 4. we batch the dumps. + let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued; + if let Some(to_dump) = to_dump.min() { + let mut task = + self.queue.tasks.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?; + current_batch.processing(Some(&mut task)); + return Ok(Some((Batch::Dump(task), current_batch))); + } + + // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. + let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; + let mut task = + self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + // If the task is not associated with any index, verify that it is an index swap and + // create the batch directly. Otherwise, get the index name associated with the task + // and use the autobatcher to batch the enqueued tasks associated with it + + let index_name = if let Some(&index_name) = task.indexes().first() { + index_name + } else { + assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); + current_batch.processing(Some(&mut task)); + return Ok(Some((Batch::IndexSwap { task }, current_batch))); + }; + + let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; + let mut primary_key = None; + if index_already_exists { + let index = self.index_mapper.index(rtxn, index_name)?; + let rtxn = index.read_txn()?; + primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string()); + } + + let index_tasks = self.queue.tasks.index_tasks(rtxn, index_name)? & enqueued; + + // If autobatching is disabled we only take one task at a time. + // Otherwise, we take only a maximum of tasks to create batches. + let tasks_limit = if self.scheduler.autobatching_enabled { + self.scheduler.max_number_of_batched_tasks + } else { + 1 + }; + + let enqueued = index_tasks + .into_iter() + .take(tasks_limit) + .map(|task_id| { + self.queue + .tasks + .get_task(rtxn, task_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + .map(|task| (task.uid, task.kind)) + }) + .collect::>>()?; + + if let Some((batchkind, create_index)) = + autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref()) + { + return Ok(self + .create_next_batch_index( + rtxn, + index_name.to_string(), + batchkind, + &mut current_batch, + create_index, + )? + .map(|batch| (batch, current_batch))); + } + + // If we found no tasks then we were notified for something that got autobatched + // somehow and there is nothing to do. + Ok(None) + } +} diff --git a/crates/index-scheduler/src/scheduler/mod.rs b/crates/index-scheduler/src/scheduler/mod.rs new file mode 100644 index 000000000..447e260b4 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/mod.rs @@ -0,0 +1,342 @@ +mod autobatcher; +mod create_batch; +mod process_batch; +mod process_dump_creation; +mod process_index_operation; +mod process_snapshot_creation; +#[cfg(test)] +mod test; +#[cfg(test)] +mod test_document_addition; +#[cfg(test)] +mod test_embedders; +#[cfg(test)] +mod test_failure; + +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::Arc; + +use meilisearch_types::error::ResponseError; +use meilisearch_types::milli; +use meilisearch_types::tasks::Status; +use rayon::current_num_threads; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use roaring::RoaringBitmap; +use synchronoise::SignalEvent; + +use crate::processing::{AtomicTaskStep, BatchProgress}; +use crate::{Error, IndexScheduler, IndexSchedulerOptions, Result, TickOutcome}; + +#[derive(Default, Clone, Debug)] +pub struct MustStopProcessing(Arc); + +impl MustStopProcessing { + pub fn get(&self) -> bool { + self.0.load(Ordering::Relaxed) + } + + pub fn must_stop(&self) { + self.0.store(true, Ordering::Relaxed); + } + + pub fn reset(&self) { + self.0.store(false, Ordering::Relaxed); + } +} + +pub struct Scheduler { + /// A boolean that can be set to true to stop the currently processing tasks. + pub must_stop_processing: MustStopProcessing, + + /// Get a signal when a batch needs to be processed. + pub(crate) wake_up: Arc, + + /// Whether auto-batching is enabled or not. + pub(crate) autobatching_enabled: bool, + + /// The maximum number of tasks that will be batched together. + pub(crate) max_number_of_batched_tasks: usize, + + /// The path used to create the dumps. + pub(crate) dumps_path: PathBuf, + + /// The path used to create the snapshots. + pub(crate) snapshots_path: PathBuf, + + /// The path to the folder containing the auth LMDB env. + pub(crate) auth_path: PathBuf, + + /// The path to the version file of Meilisearch. + pub(crate) version_file_path: PathBuf, +} + +impl Scheduler { + pub(crate) fn private_clone(&self) -> Scheduler { + Scheduler { + must_stop_processing: self.must_stop_processing.clone(), + wake_up: self.wake_up.clone(), + autobatching_enabled: self.autobatching_enabled, + max_number_of_batched_tasks: self.max_number_of_batched_tasks, + dumps_path: self.dumps_path.clone(), + snapshots_path: self.snapshots_path.clone(), + auth_path: self.auth_path.clone(), + version_file_path: self.version_file_path.clone(), + } + } + + pub fn new(options: &IndexSchedulerOptions) -> Scheduler { + Scheduler { + must_stop_processing: MustStopProcessing::default(), + // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things + wake_up: Arc::new(SignalEvent::auto(true)), + autobatching_enabled: options.autobatching_enabled, + max_number_of_batched_tasks: options.max_number_of_batched_tasks, + dumps_path: options.dumps_path.clone(), + snapshots_path: options.snapshots_path.clone(), + auth_path: options.auth_path.clone(), + version_file_path: options.version_file_path.clone(), + } + } +} + +impl IndexScheduler { + /// Perform one iteration of the run loop. + /// + /// 1. See if we need to cleanup the task queue + /// 2. Find the next batch of tasks to be processed. + /// 3. Update the information of these tasks following the start of their processing. + /// 4. Update the in-memory list of processed tasks accordingly. + /// 5. Process the batch: + /// - perform the actions of each batched task + /// - update the information of each batched task following the end + /// of their processing. + /// 6. Reset the in-memory list of processed tasks. + /// + /// Returns the number of processed tasks. + pub(crate) fn tick(&self) -> Result { + #[cfg(test)] + { + *self.run_loop_iteration.write().unwrap() += 1; + self.breakpoint(crate::test_utils::Breakpoint::Start); + } + + if self.cleanup_enabled { + let mut wtxn = self.env.write_txn()?; + self.queue.cleanup_task_queue(&mut wtxn)?; + wtxn.commit()?; + } + + let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; + let (batch, mut processing_batch) = + match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { + Some(batch) => batch, + None => return Ok(TickOutcome::WaitForSignal), + }; + let index_uid = batch.index_uid().map(ToOwned::to_owned); + drop(rtxn); + + // 1. store the starting date with the bitmap of processing tasks. + let mut ids = batch.ids(); + let processed_tasks = ids.len(); + + // We reset the must_stop flag to be sure that we don't stop processing tasks + self.scheduler.must_stop_processing.reset(); + let progress = self + .processing_tasks + .write() + .unwrap() + // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches + .start_processing(processing_batch.clone(), ids.clone()); + + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::BatchCreated); + + // 2. Process the tasks + let res = { + let cloned_index_scheduler = self.private_clone(); + let processing_batch = &mut processing_batch; + let progress = progress.clone(); + std::thread::scope(|s| { + let handle = std::thread::Builder::new() + .name(String::from("batch-operation")) + .spawn_scoped(s, move || { + cloned_index_scheduler.process_batch(batch, processing_batch, progress) + }) + .unwrap(); + handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) + }) + }; + + // Reset the currently updating index to relinquish the index handle + self.index_mapper.set_currently_updating_index(None); + + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?; + + progress.update_progress(BatchProgress::WritingTasksToDisk); + processing_batch.finished(); + let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; + let mut canceled = RoaringBitmap::new(); + + match res { + Ok(tasks) => { + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded); + + let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); + progress.update_progress(task_progress_obj); + let mut success = 0; + let mut failure = 0; + let mut canceled_by = None; + + #[allow(unused_variables)] + for (i, mut task) in tasks.into_iter().enumerate() { + task_progress.fetch_add(1, Ordering::Relaxed); + processing_batch.update(&mut task); + if task.status == Status::Canceled { + canceled.insert(task.uid); + canceled_by = task.canceled_by; + } + + #[cfg(test)] + self.maybe_fail( + crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchSuccess { + task_uid: i as u32, + }, + )?; + + match task.error { + Some(_) => failure += 1, + None => success += 1, + } + + self.queue + .tasks + .update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + } + if let Some(canceled_by) = canceled_by { + self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?; + } + tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks."); + } + // If we have an abortion error we must stop the tick here and re-schedule tasks. + Err(Error::Milli { + error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), + .. + }) + | Err(Error::AbortedTask) => { + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::AbortedIndexation); + wtxn.abort(); + + tracing::info!("A batch of tasks was aborted."); + // We make sure that we don't call `stop_processing` on the `processing_tasks`, + // this is because we want to let the next tick call `create_next_batch` and keep + // the `started_at` date times and `processings` of the current processing tasks. + // This date time is used by the task cancelation to store the right `started_at` + // date in the task on disk. + return Ok(TickOutcome::TickAgain(0)); + } + // If an index said it was full, we need to: + // 1. identify which index is full + // 2. close the associated environment + // 3. resize it + // 4. re-schedule tasks + Err(Error::Milli { + error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), + .. + }) if index_uid.is_some() => { + // fixme: add index_uid to match to avoid the unwrap + let index_uid = index_uid.unwrap(); + // fixme: handle error more gracefully? not sure when this could happen + self.index_mapper.resize_index(&wtxn, &index_uid)?; + wtxn.abort(); + + tracing::info!("The max database size was reached. Resizing the index."); + + return Ok(TickOutcome::TickAgain(0)); + } + // In case of a failure we must get back and patch all the tasks with the error. + Err(err) => { + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed); + let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); + progress.update_progress(task_progress_obj); + + let error: ResponseError = err.into(); + for id in ids.iter() { + task_progress.fetch_add(1, Ordering::Relaxed); + let mut task = self + .queue + .tasks + .get_task(&wtxn, id) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? + .ok_or(Error::CorruptedTaskQueue)?; + task.status = Status::Failed; + task.error = Some(error.clone()); + task.details = task.details.map(|d| d.to_failed()); + processing_batch.update(&mut task); + + #[cfg(test)] + self.maybe_fail( + crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchFailure, + )?; + + tracing::error!("Batch failed {}", error); + + self.queue + .tasks + .update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + } + } + } + + // We must re-add the canceled task so they're part of the same batch. + ids |= canceled; + self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; + + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?; + + wtxn.commit().map_err(Error::HeedTransaction)?; + + // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task + // and then become « not found » for some time until the commit everything is written and the final commit is made. + self.processing_tasks.write().unwrap().stop_processing(); + + // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart + tracing::debug!("Deleting the update files"); + + //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap + let idx = AtomicU32::new(0); + (0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> { + let rtxn = self.read_txn()?; + while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) { + let task = self + .queue + .tasks + .get_task(&rtxn, id) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? + .ok_or(Error::CorruptedTaskQueue)?; + if let Err(e) = self.queue.delete_persisted_task_data(&task) { + tracing::error!( + "Failure to delete the content files associated with task {}. Error: {e}", + task.uid + ); + } + } + Ok(()) + })?; + + // We shouldn't crash the tick function if we can't send data to the webhook. + let _ = self.notify_webhook(&ids); + + #[cfg(test)] + self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing); + + Ok(TickOutcome::TickAgain(processed_tasks)) + } +} diff --git a/crates/index-scheduler/src/scheduler/process_batch.rs b/crates/index-scheduler/src/scheduler/process_batch.rs new file mode 100644 index 000000000..9a86939a4 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_batch.rs @@ -0,0 +1,581 @@ +use std::collections::{BTreeSet, HashMap, HashSet}; +use std::sync::atomic::Ordering; + +use meilisearch_types::batches::BatchId; +use meilisearch_types::heed::{RoTxn, RwTxn}; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task}; +use milli::update::Settings as MilliSettings; +use roaring::RoaringBitmap; + +use super::create_batch::Batch; +use crate::processing::{ + AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, + InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, + UpdateIndexProgress, VariableNameStep, +}; +use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; +use crate::{Error, IndexScheduler, Result, TaskId}; + +impl IndexScheduler { + /// Apply the operation associated with the given batch. + /// + /// ## Return + /// The list of tasks that were processed. The metadata of each task in the returned + /// list is updated accordingly, with the exception of the its date fields + /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). + #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))] + pub(crate) fn process_batch( + &self, + batch: Batch, + current_batch: &mut ProcessingBatch, + progress: Progress, + ) -> Result> { + #[cfg(test)] + { + self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?; + self.maybe_fail(crate::test_utils::FailureLocation::PanicInsideProcessBatch)?; + self.breakpoint(crate::test_utils::Breakpoint::InsideProcessBatch); + } + + match batch { + Batch::TaskCancelation { mut task } => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let matched_tasks = + if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind { + tasks + } else { + unreachable!() + }; + + let rtxn = self.env.read_txn()?; + let mut canceled_tasks = self.cancel_matched_tasks( + &rtxn, + task.uid, + current_batch, + matched_tasks, + &progress, + )?; + + task.status = Status::Succeeded; + match &mut task.details { + Some(Details::TaskCancelation { + matched_tasks: _, + canceled_tasks: canceled_tasks_details, + original_filter: _, + }) => { + *canceled_tasks_details = Some(canceled_tasks.len() as u64); + } + _ => unreachable!(), + } + + canceled_tasks.push(task); + + Ok(canceled_tasks) + } + Batch::TaskDeletions(mut tasks) => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let mut matched_tasks = RoaringBitmap::new(); + + for task in tasks.iter() { + if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { + matched_tasks |= tasks; + } else { + unreachable!() + } + } + + let mut wtxn = self.env.write_txn()?; + let mut deleted_tasks = + self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?; + wtxn.commit()?; + + for task in tasks.iter_mut() { + task.status = Status::Succeeded; + let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else { + unreachable!() + }; + + let deleted_tasks_count = deleted_tasks.intersection_len(tasks); + deleted_tasks -= tasks; + + match &mut task.details { + Some(Details::TaskDeletion { + matched_tasks: _, + deleted_tasks, + original_filter: _, + }) => { + *deleted_tasks = Some(deleted_tasks_count); + } + _ => unreachable!(), + } + } + Ok(tasks) + } + Batch::SnapshotCreation(tasks) => self.process_snapshot(progress, tasks), + Batch::Dump(task) => self.process_dump_creation(progress, task), + Batch::IndexOperation { op, must_create_index } => { + let index_uid = op.index_uid().to_string(); + let index = if must_create_index { + // create the index if it doesn't already exist + let wtxn = self.env.write_txn()?; + self.index_mapper.create_index(wtxn, &index_uid, None)? + } else { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, &index_uid)? + }; + + // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick + self.index_mapper + .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); + + let mut index_wtxn = index.write_txn()?; + let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; + + { + let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); + let _entered = span.enter(); + + index_wtxn.commit()?; + } + + // if the update processed successfully, we're going to store the new + // stats of the index. Since the tasks have already been processed and + // this is a non-critical operation. If it fails, we should not fail + // the entire batch. + let res = || -> Result<()> { + let index_rtxn = index.read_txn()?; + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; + let mut wtxn = self.env.write_txn()?; + self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; + wtxn.commit()?; + Ok(()) + }(); + + match res { + Ok(_) => (), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), + } + + Ok(tasks) + } + Batch::IndexCreation { index_uid, primary_key, task } => { + progress.update_progress(CreateIndexProgress::CreatingTheIndex); + + let wtxn = self.env.write_txn()?; + if self.index_mapper.exists(&wtxn, &index_uid)? { + return Err(Error::IndexAlreadyExists(index_uid)); + } + self.index_mapper.create_index(wtxn, &index_uid, None)?; + + self.process_batch( + Batch::IndexUpdate { index_uid, primary_key, task }, + current_batch, + progress, + ) + } + Batch::IndexUpdate { index_uid, primary_key, mut task } => { + progress.update_progress(UpdateIndexProgress::UpdatingTheIndex); + let rtxn = self.env.read_txn()?; + let index = self.index_mapper.index(&rtxn, &index_uid)?; + + if let Some(primary_key) = primary_key.clone() { + let mut index_wtxn = index.write_txn()?; + let mut builder = MilliSettings::new( + &mut index_wtxn, + &index, + self.index_mapper.indexer_config(), + ); + builder.set_primary_key(primary_key); + let must_stop_processing = self.scheduler.must_stop_processing.clone(); + builder + .execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + || must_stop_processing.get(), + ) + .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; + index_wtxn.commit()?; + } + + // drop rtxn before starting a new wtxn on the same db + rtxn.commit()?; + + task.status = Status::Succeeded; + task.details = Some(Details::IndexInfo { primary_key }); + + // if the update processed successfully, we're going to store the new + // stats of the index. Since the tasks have already been processed and + // this is a non-critical operation. If it fails, we should not fail + // the entire batch. + let res = || -> Result<()> { + let mut wtxn = self.env.write_txn()?; + let index_rtxn = index.read_txn()?; + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; + wtxn.commit()?; + Ok(()) + }(); + + match res { + Ok(_) => (), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), + } + + Ok(vec![task]) + } + Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { + progress.update_progress(DeleteIndexProgress::DeletingTheIndex); + let wtxn = self.env.write_txn()?; + + // it's possible that the index doesn't exist + let number_of_documents = || -> Result { + let index = self.index_mapper.index(&wtxn, &index_uid)?; + let index_rtxn = index.read_txn()?; + index + .number_of_documents(&index_rtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.to_string()))) + }() + .unwrap_or_default(); + + // The write transaction is directly owned and committed inside. + match self.index_mapper.delete_index(wtxn, &index_uid) { + Ok(()) => (), + Err(Error::IndexNotFound(_)) if index_has_been_created => (), + Err(e) => return Err(e), + } + + // We set all the tasks details to the default value. + for task in &mut tasks { + task.status = Status::Succeeded; + task.details = match &task.kind { + KindWithContent::IndexDeletion { .. } => { + Some(Details::ClearAll { deleted_documents: Some(number_of_documents) }) + } + otherwise => otherwise.default_finished_details(), + }; + } + + Ok(tasks) + } + Batch::IndexSwap { mut task } => { + progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); + + let mut wtxn = self.env.write_txn()?; + let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { + swaps + } else { + unreachable!() + }; + let mut not_found_indexes = BTreeSet::new(); + for IndexSwap { indexes: (lhs, rhs) } in swaps { + for index in [lhs, rhs] { + let index_exists = self.index_mapper.index_exists(&wtxn, index)?; + if !index_exists { + not_found_indexes.insert(index); + } + } + } + if !not_found_indexes.is_empty() { + if not_found_indexes.len() == 1 { + return Err(Error::SwapIndexNotFound( + not_found_indexes.into_iter().next().unwrap().clone(), + )); + } else { + return Err(Error::SwapIndexesNotFound( + not_found_indexes.into_iter().cloned().collect(), + )); + } + } + progress.update_progress(SwappingTheIndexes::SwappingTheIndexes); + for (step, swap) in swaps.iter().enumerate() { + progress.update_progress(VariableNameStep::new( + format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1), + step as u32, + swaps.len() as u32, + )); + self.apply_index_swap( + &mut wtxn, + &progress, + task.uid, + &swap.indexes.0, + &swap.indexes.1, + )?; + } + wtxn.commit()?; + task.status = Status::Succeeded; + Ok(vec![task]) + } + } + } + + /// Swap the index `lhs` with the index `rhs`. + fn apply_index_swap( + &self, + wtxn: &mut RwTxn, + progress: &Progress, + task_id: u32, + lhs: &str, + rhs: &str, + ) -> Result<()> { + progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks); + // 1. Verify that both lhs and rhs are existing indexes + let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; + if !index_lhs_exists { + return Err(Error::IndexNotFound(lhs.to_owned())); + } + let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?; + if !index_rhs_exists { + return Err(Error::IndexNotFound(rhs.to_owned())); + } + + // 2. Get the task set for index = name that appeared before the index swap task + let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?; + index_lhs_task_ids.remove_range(task_id..); + let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?; + index_rhs_task_ids.remove_range(task_id..); + + // 3. before_name -> new_name in the task's KindWithContent + progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks); + let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids; + let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32); + progress.update_progress(task_progress); + + for task_id in tasks_to_update { + let mut task = + self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + swap_index_uid_in_task(&mut task, (lhs, rhs)); + self.queue.tasks.all_tasks.put(wtxn, &task_id, &task)?; + atomic.fetch_add(1, Ordering::Relaxed); + } + + // 4. remove the task from indexuid = before_name + // 5. add the task to indexuid = after_name + progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata); + self.queue.tasks.update_index(wtxn, lhs, |lhs_tasks| { + *lhs_tasks -= &index_lhs_task_ids; + *lhs_tasks |= &index_rhs_task_ids; + })?; + self.queue.tasks.update_index(wtxn, rhs, |rhs_tasks| { + *rhs_tasks -= &index_rhs_task_ids; + *rhs_tasks |= &index_lhs_task_ids; + })?; + + // 6. Swap in the index mapper + self.index_mapper.swap(wtxn, lhs, rhs)?; + + Ok(()) + } + + /// Delete each given task from all the databases (if it is deleteable). + /// + /// Return the number of tasks that were actually deleted. + fn delete_matched_tasks( + &self, + wtxn: &mut RwTxn, + matched_tasks: &RoaringBitmap, + progress: &Progress, + ) -> Result { + progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime); + + // 1. Remove from this list the tasks that we are not allowed to delete + let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?; + let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); + + let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?; + let mut to_delete_tasks = all_task_ids & matched_tasks; + to_delete_tasks -= &**processing_tasks; + to_delete_tasks -= &enqueued_tasks; + + // 2. We now have a list of tasks to delete, delete them + + let mut affected_indexes = HashSet::new(); + let mut affected_statuses = HashSet::new(); + let mut affected_kinds = HashSet::new(); + let mut affected_canceled_by = RoaringBitmap::new(); + // The tasks that have been removed *per batches*. + let mut affected_batches: HashMap = HashMap::new(); + + let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); + progress.update_progress(task_progress); + for task_id in to_delete_tasks.iter() { + let task = + self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned())); + affected_statuses.insert(task.status); + affected_kinds.insert(task.kind.as_kind()); + // Note: don't delete the persisted task data since + // we can only delete succeeded, failed, and canceled tasks. + // In each of those cases, the persisted data is supposed to + // have been deleted already. + utils::remove_task_datetime( + wtxn, + self.queue.tasks.enqueued_at, + task.enqueued_at, + task.uid, + )?; + if let Some(started_at) = task.started_at { + utils::remove_task_datetime( + wtxn, + self.queue.tasks.started_at, + started_at, + task.uid, + )?; + } + if let Some(finished_at) = task.finished_at { + utils::remove_task_datetime( + wtxn, + self.queue.tasks.finished_at, + finished_at, + task.uid, + )?; + } + if let Some(canceled_by) = task.canceled_by { + affected_canceled_by.insert(canceled_by); + } + if let Some(batch_uid) = task.batch_uid { + affected_batches.entry(batch_uid).or_default().insert(task_id); + } + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata); + let (atomic_progress, task_progress) = AtomicTaskStep::new( + (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32, + ); + progress.update_progress(task_progress); + for index in affected_indexes.iter() { + self.queue.tasks.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + for status in affected_statuses.iter() { + self.queue.tasks.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + for kind in affected_kinds.iter() { + self.queue.tasks.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + progress.update_progress(TaskDeletionProgress::DeletingTasks); + let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); + progress.update_progress(task_progress); + for task in to_delete_tasks.iter() { + self.queue.tasks.all_tasks.delete(wtxn, &task)?; + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + for canceled_by in affected_canceled_by { + if let Some(mut tasks) = self.queue.tasks.canceled_by.get(wtxn, &canceled_by)? { + tasks -= &to_delete_tasks; + if tasks.is_empty() { + self.queue.tasks.canceled_by.delete(wtxn, &canceled_by)?; + } else { + self.queue.tasks.canceled_by.put(wtxn, &canceled_by, &tasks)?; + } + } + } + progress.update_progress(TaskDeletionProgress::DeletingBatches); + let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32); + progress.update_progress(batch_progress); + for (batch_id, to_delete_tasks) in affected_batches { + if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? { + tasks -= &to_delete_tasks; + // We must remove the batch entirely + if tasks.is_empty() { + self.queue.batches.all_batches.delete(wtxn, &batch_id)?; + self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; + } + // Anyway, we must remove the batch from all its reverse indexes. + // The only way to do that is to check + + for index in affected_indexes.iter() { + let index_tasks = self.queue.tasks.index_tasks(wtxn, index)?; + let remaining_index_tasks = index_tasks & &tasks; + if remaining_index_tasks.is_empty() { + self.queue.batches.update_index(wtxn, index, |bitmap| { + bitmap.remove(batch_id); + })?; + } + } + + for status in affected_statuses.iter() { + let status_tasks = self.queue.tasks.get_status(wtxn, *status)?; + let remaining_status_tasks = status_tasks & &tasks; + if remaining_status_tasks.is_empty() { + self.queue.batches.update_status(wtxn, *status, |bitmap| { + bitmap.remove(batch_id); + })?; + } + } + + for kind in affected_kinds.iter() { + let kind_tasks = self.queue.tasks.get_kind(wtxn, *kind)?; + let remaining_kind_tasks = kind_tasks & &tasks; + if remaining_kind_tasks.is_empty() { + self.queue.batches.update_kind(wtxn, *kind, |bitmap| { + bitmap.remove(batch_id); + })?; + } + } + } + atomic_progress.fetch_add(1, Ordering::Relaxed); + } + + Ok(to_delete_tasks) + } + + /// Cancel each given task from all the databases (if it is cancelable). + /// + /// Returns the list of tasks that matched the filter and must be written in the database. + fn cancel_matched_tasks( + &self, + rtxn: &RoTxn, + cancel_task_id: TaskId, + current_batch: &mut ProcessingBatch, + matched_tasks: &RoaringBitmap, + progress: &Progress, + ) -> Result> { + progress.update_progress(TaskCancelationProgress::RetrievingTasks); + + // 1. Remove from this list the tasks that we are not allowed to cancel + // Notice that only the _enqueued_ ones are cancelable and we should + // have already aborted the indexation of the _processing_ ones + let cancelable_tasks = self.queue.tasks.get_status(rtxn, Status::Enqueued)?; + let tasks_to_cancel = cancelable_tasks & matched_tasks; + + let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); + progress.update_progress(progress_obj); + + // 2. We now have a list of tasks to cancel, cancel them + let mut tasks = self.queue.tasks.get_existing_tasks( + rtxn, + tasks_to_cancel.iter().inspect(|_| { + task_progress.fetch_add(1, Ordering::Relaxed); + }), + )?; + + progress.update_progress(TaskCancelationProgress::UpdatingTasks); + let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); + progress.update_progress(progress_obj); + for task in tasks.iter_mut() { + task.status = Status::Canceled; + task.canceled_by = Some(cancel_task_id); + task.details = task.details.as_ref().map(|d| d.to_failed()); + current_batch.processing(Some(task)); + task_progress.fetch_add(1, Ordering::Relaxed); + } + + Ok(tasks) + } +} diff --git a/crates/index-scheduler/src/scheduler/process_dump_creation.rs b/crates/index-scheduler/src/scheduler/process_dump_creation.rs new file mode 100644 index 000000000..3770303da --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_dump_creation.rs @@ -0,0 +1,236 @@ +use std::fs::File; +use std::io::BufWriter; +use std::sync::atomic::Ordering; + +use dump::IndexMetadata; +use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; +use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; +use time::macros::format_description; +use time::OffsetDateTime; + +use crate::processing::{ + AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress, VariableNameStep, +}; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + pub(super) fn process_dump_creation( + &self, + progress: Progress, + mut task: Task, + ) -> Result> { + progress.update_progress(DumpCreationProgress::StartTheDumpCreation); + let started_at = OffsetDateTime::now_utc(); + let (keys, instance_uid) = + if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { + (keys, instance_uid) + } else { + unreachable!(); + }; + let dump = dump::DumpWriter::new(*instance_uid)?; + + // 1. dump the keys + progress.update_progress(DumpCreationProgress::DumpTheApiKeys); + let mut dump_keys = dump.create_keys()?; + for key in keys { + dump_keys.push_key(key)?; + } + dump_keys.flush()?; + + let rtxn = self.env.read_txn()?; + + // 2. dump the tasks + progress.update_progress(DumpCreationProgress::DumpTheTasks); + let mut dump_tasks = dump.create_tasks_queue()?; + + let (atomic, update_task_progress) = + AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32); + progress.update_progress(update_task_progress); + + for ret in self.queue.tasks.all_tasks.iter(&rtxn)? { + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + + let (_, mut t) = ret?; + let status = t.status; + let content_file = t.content_uuid(); + + // In the case we're dumping ourselves we want to be marked as finished + // to not loop over ourselves indefinitely. + if t.uid == task.uid { + let finished_at = OffsetDateTime::now_utc(); + + // We're going to fake the date because we don't know if everything is going to go well. + // But we need to dump the task as finished and successful. + // If something fail everything will be set appropriately in the end. + t.status = Status::Succeeded; + t.started_at = Some(started_at); + t.finished_at = Some(finished_at); + } + let mut dump_content_file = dump_tasks.push_task(&t.into())?; + + // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. + if let Some(content_file) = content_file { + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + if status == Status::Enqueued { + let content_file = self.queue.file_store.get_update(content_file)?; + + let reader = DocumentsBatchReader::from_reader(content_file) + .map_err(|e| Error::from_milli(e.into(), None))?; + + let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); + + while let Some(doc) = + cursor.next_document().map_err(|e| Error::from_milli(e.into(), None))? + { + dump_content_file.push_document( + &obkv_to_object(doc, &documents_batch_index) + .map_err(|e| Error::from_milli(e, None))?, + )?; + } + dump_content_file.flush()?; + } + } + atomic.fetch_add(1, Ordering::Relaxed); + } + dump_tasks.flush()?; + + // 3. Dump the indexes + progress.update_progress(DumpCreationProgress::DumpTheIndexes); + let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; + let mut count = 0; + self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { + progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes)); + count += 1; + + let rtxn = index.read_txn()?; + let metadata = IndexMetadata { + uid: uid.to_owned(), + primary_key: index.primary_key(&rtxn)?.map(String::from), + created_at: index + .created_at(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, + updated_at: index + .updated_at(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, + }; + let mut index_dumper = dump.create_index(uid, &metadata)?; + + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + let embedding_configs = index + .embedding_configs(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + let nb_documents = index + .number_of_documents(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? + as u32; + let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); + progress.update_progress(update_document_progress); + let documents = index + .all_documents(&rtxn) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + // 3.1. Dump the documents + for ret in documents { + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + + let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + 'inject_vectors: { + let embeddings = index + .embeddings(&rtxn, id) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + if embeddings.is_empty() { + break 'inject_vectors; + } + + let vectors = document + .entry(RESERVED_VECTORS_FIELD_NAME.to_owned()) + .or_insert(serde_json::Value::Object(Default::default())); + + let serde_json::Value::Object(vectors) = vectors else { + let user_err = + milli::Error::UserError(milli::UserError::InvalidVectorsMapType { + document_id: { + if let Ok(Some(Ok(index))) = index + .external_id_of(&rtxn, std::iter::once(id)) + .map(|it| it.into_iter().next()) + { + index + } else { + format!("internal docid={id}") + } + }, + value: vectors.clone(), + }); + + return Err(Error::from_milli(user_err, Some(uid.to_string()))); + }; + + for (embedder_name, embeddings) in embeddings { + let user_provided = embedding_configs + .iter() + .find(|conf| conf.name == embedder_name) + .is_some_and(|conf| conf.user_provided.contains(id)); + let embeddings = ExplicitVectors { + embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( + embeddings, + )), + regenerate: !user_provided, + }; + vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap()); + } + } + + index_dumper.push_document(&document)?; + atomic.fetch_add(1, Ordering::Relaxed); + } + + // 3.2. Dump the settings + let settings = meilisearch_types::settings::settings( + index, + &rtxn, + meilisearch_types::settings::SecretPolicy::RevealSecrets, + ) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + index_dumper.settings(&settings)?; + Ok(()) + })?; + + // 4. Dump experimental feature settings + progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); + let features = self.features().runtime_features(); + dump.create_experimental_features(features)?; + + let dump_uid = started_at.format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )).unwrap(); + + if self.scheduler.must_stop_processing.get() { + return Err(Error::AbortedTask); + } + progress.update_progress(DumpCreationProgress::CompressTheDump); + let path = self.scheduler.dumps_path.join(format!("{}.dump", dump_uid)); + let file = File::create(path)?; + dump.persist_to(BufWriter::new(file))?; + + // if we reached this step we can tell the scheduler we succeeded to dump ourselves. + task.status = Status::Succeeded; + task.details = Some(Details::Dump { dump_uid: Some(dump_uid) }); + Ok(vec![task]) + } +} diff --git a/crates/index-scheduler/src/scheduler/process_index_operation.rs b/crates/index-scheduler/src/scheduler/process_index_operation.rs new file mode 100644 index 000000000..365f7acd4 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs @@ -0,0 +1,529 @@ +use bumpalo::collections::CollectIn; +use bumpalo::Bump; +use meilisearch_types::heed::RwTxn; +use meilisearch_types::milli::documents::PrimaryKey; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; +use meilisearch_types::milli::update::DocumentAdditionResult; +use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; +use meilisearch_types::settings::apply_settings_to_builder; +use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; +use meilisearch_types::Index; +use roaring::RoaringBitmap; + +use super::create_batch::{DocumentOperation, IndexOperation}; +use crate::processing::{ + DocumentDeletionProgress, DocumentEditionProgress, DocumentOperationProgress, SettingsProgress, +}; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + /// Process the index operation on the given index. + /// + /// ## Return + /// The list of processed tasks. + #[tracing::instrument( + level = "trace", + skip(self, index_wtxn, index, progress), + target = "indexing::scheduler" + )] + pub(crate) fn apply_index_operation<'i>( + &self, + index_wtxn: &mut RwTxn<'i>, + index: &'i Index, + operation: IndexOperation, + progress: Progress, + ) -> Result> { + let indexer_alloc = Bump::new(); + + let started_processing_at = std::time::Instant::now(); + let must_stop_processing = self.scheduler.must_stop_processing.clone(); + + match operation { + IndexOperation::DocumentClear { index_uid, mut tasks } => { + let count = milli::update::ClearDocuments::new(index_wtxn, index) + .execute() + .map_err(|e| Error::from_milli(e, Some(index_uid)))?; + + let mut first_clear_found = false; + for task in &mut tasks { + task.status = Status::Succeeded; + // The first document clear will effectively delete every documents + // in the database but the next ones will clear 0 documents. + task.details = match &task.kind { + KindWithContent::DocumentClear { .. } => { + let count = if first_clear_found { 0 } else { count }; + first_clear_found = true; + Some(Details::ClearAll { deleted_documents: Some(count) }) + } + otherwise => otherwise.default_details(), + }; + } + + Ok(tasks) + } + IndexOperation::DocumentOperation { + index_uid, + primary_key, + method, + operations, + mut tasks, + } => { + progress.update_progress(DocumentOperationProgress::RetrievingConfig); + // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. + // this is made difficult by the fact we're doing private clones of the index scheduler and sending it + // to a fresh thread. + let mut content_files = Vec::new(); + for operation in &operations { + if let DocumentOperation::Add(content_uuid) = operation { + let content_file = self.queue.file_store.get_update(*content_uuid)?; + let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; + if !mmap.is_empty() { + content_files.push(mmap); + } + } + } + + let rtxn = index.read_txn()?; + let db_fields_ids_map = index.fields_ids_map(&rtxn)?; + let mut new_fields_ids_map = db_fields_ids_map.clone(); + + let mut content_files_iter = content_files.iter(); + let mut indexer = indexer::DocumentOperation::new(method); + let embedders = index + .embedding_configs(index_wtxn) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + let embedders = self.embedders(index_uid.clone(), embedders)?; + for operation in operations { + match operation { + DocumentOperation::Add(_content_uuid) => { + let mmap = content_files_iter.next().unwrap(); + indexer + .add_documents(mmap) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + } + DocumentOperation::Delete(document_ids) => { + let document_ids: bumpalo::collections::vec::Vec<_> = document_ids + .iter() + .map(|s| &*indexer_alloc.alloc_str(s)) + .collect_in(&indexer_alloc); + indexer.delete_documents(document_ids.into_bump_slice()); + } + } + } + + let local_pool; + let indexer_config = self.index_mapper.indexer_config(); + let pool = match &indexer_config.thread_pool { + Some(pool) => pool, + None => { + local_pool = ThreadPoolNoAbortBuilder::new() + .thread_name(|i| format!("indexing-thread-{i}")) + .build() + .unwrap(); + &local_pool + } + }; + + progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges); + let (document_changes, operation_stats, primary_key) = indexer + .into_changes( + &indexer_alloc, + index, + &rtxn, + primary_key.as_deref(), + &mut new_fields_ids_map, + &|| must_stop_processing.get(), + progress.clone(), + ) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + + let mut candidates_count = 0; + for (stats, task) in operation_stats.into_iter().zip(&mut tasks) { + candidates_count += stats.document_count; + match stats.error { + Some(error) => { + task.status = Status::Failed; + task.error = Some(milli::Error::UserError(error).into()); + } + None => task.status = Status::Succeeded, + } + + task.details = match task.details { + Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => { + Some(Details::DocumentAdditionOrUpdate { + received_documents, + indexed_documents: Some(stats.document_count), + }) + } + Some(Details::DocumentDeletion { provided_ids, .. }) => { + Some(Details::DocumentDeletion { + provided_ids, + deleted_documents: Some(stats.document_count), + }) + } + _ => { + // In the case of a `documentAdditionOrUpdate` or `DocumentDeletion` + // the details MUST be set to either addition or deletion + unreachable!(); + } + } + } + + progress.update_progress(DocumentOperationProgress::Indexing); + if tasks.iter().any(|res| res.error.is_none()) { + indexer::index( + index_wtxn, + index, + pool, + indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + primary_key, + &document_changes, + embedders, + &|| must_stop_processing.get(), + &progress, + ) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + + let addition = DocumentAdditionResult { + indexed_documents: candidates_count, + number_of_documents: index + .number_of_documents(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + }; + + tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); + } + + Ok(tasks) + } + IndexOperation::DocumentEdition { index_uid, mut task } => { + progress.update_progress(DocumentEditionProgress::RetrievingConfig); + + let (filter, code) = if let KindWithContent::DocumentEdition { + filter_expr, + context: _, + function, + .. + } = &task.kind + { + (filter_expr, function) + } else { + unreachable!() + }; + + let candidates = match filter.as_ref().map(Filter::from_json) { + Some(Ok(Some(filter))) => filter + .evaluate(index_wtxn, index) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + None | Some(Ok(None)) => index.documents_ids(index_wtxn)?, + Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))), + }; + + let (original_filter, context, function) = if let Some(Details::DocumentEdition { + original_filter, + context, + function, + .. + }) = task.details + { + (original_filter, context, function) + } else { + // In the case of a `documentEdition` the details MUST be set + unreachable!(); + }; + + if candidates.is_empty() { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentEdition { + original_filter, + context, + function, + deleted_documents: Some(0), + edited_documents: Some(0), + }); + + return Ok(vec![task]); + } + + let rtxn = index.read_txn()?; + let db_fields_ids_map = index.fields_ids_map(&rtxn)?; + let mut new_fields_ids_map = db_fields_ids_map.clone(); + // candidates not empty => index not empty => a primary key is set + let primary_key = index.primary_key(&rtxn)?.unwrap(); + + let primary_key = + PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) + .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; + + let result_count = Ok((candidates.len(), candidates.len())) as Result<_>; + + if task.error.is_none() { + let local_pool; + let indexer_config = self.index_mapper.indexer_config(); + let pool = match &indexer_config.thread_pool { + Some(pool) => pool, + None => { + local_pool = ThreadPoolNoAbortBuilder::new() + .thread_name(|i| format!("indexing-thread-{i}")) + .build() + .unwrap(); + &local_pool + } + }; + + let candidates_count = candidates.len(); + progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges); + let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone()); + let document_changes = pool + .install(|| { + indexer + .into_changes(&primary_key) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))) + }) + .unwrap()?; + let embedders = index + .embedding_configs(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + let embedders = self.embedders(index_uid.clone(), embedders)?; + + progress.update_progress(DocumentEditionProgress::Indexing); + indexer::index( + index_wtxn, + index, + pool, + indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + None, // cannot change primary key in DocumentEdition + &document_changes, + embedders, + &|| must_stop_processing.get(), + &progress, + ) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + + let addition = DocumentAdditionResult { + indexed_documents: candidates_count, + number_of_documents: index + .number_of_documents(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + }; + + tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); + } + + match result_count { + Ok((deleted_documents, edited_documents)) => { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentEdition { + original_filter, + context, + function, + deleted_documents: Some(deleted_documents), + edited_documents: Some(edited_documents), + }); + } + Err(e) => { + task.status = Status::Failed; + task.details = Some(Details::DocumentEdition { + original_filter, + context, + function, + deleted_documents: Some(0), + edited_documents: Some(0), + }); + task.error = Some(e.into()); + } + } + + Ok(vec![task]) + } + IndexOperation::DocumentDeletion { mut tasks, index_uid } => { + progress.update_progress(DocumentDeletionProgress::RetrievingConfig); + + let mut to_delete = RoaringBitmap::new(); + let external_documents_ids = index.external_documents_ids(); + + for task in tasks.iter_mut() { + let before = to_delete.len(); + task.status = Status::Succeeded; + + match &task.kind { + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + for id in documents_ids { + if let Some(id) = external_documents_ids.get(index_wtxn, id)? { + to_delete.insert(id); + } + } + let will_be_removed = to_delete.len() - before; + task.details = Some(Details::DocumentDeletion { + provided_ids: documents_ids.len(), + deleted_documents: Some(will_be_removed), + }); + } + KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => { + let before = to_delete.len(); + let filter = match Filter::from_json(filter_expr) { + Ok(filter) => filter, + Err(err) => { + // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens + task.status = Status::Failed; + task.error = Some( + Error::from_milli(err, Some(index_uid.clone())).into(), + ); + None + } + }; + if let Some(filter) = filter { + let candidates = filter + .evaluate(index_wtxn, index) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone()))); + match candidates { + Ok(candidates) => to_delete |= candidates, + Err(err) => { + task.status = Status::Failed; + task.error = Some(err.into()); + } + }; + } + let will_be_removed = to_delete.len() - before; + if let Some(Details::DocumentDeletionByFilter { + original_filter: _, + deleted_documents, + }) = &mut task.details + { + *deleted_documents = Some(will_be_removed); + } else { + // In the case of a `documentDeleteByFilter` the details MUST be set + unreachable!() + } + } + _ => unreachable!(), + } + } + + if to_delete.is_empty() { + return Ok(tasks); + } + + let rtxn = index.read_txn()?; + let db_fields_ids_map = index.fields_ids_map(&rtxn)?; + let mut new_fields_ids_map = db_fields_ids_map.clone(); + + // to_delete not empty => index not empty => primary key set + let primary_key = index.primary_key(&rtxn)?.unwrap(); + + let primary_key = + PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) + .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; + + if !tasks.iter().all(|res| res.error.is_some()) { + let local_pool; + let indexer_config = self.index_mapper.indexer_config(); + let pool = match &indexer_config.thread_pool { + Some(pool) => pool, + None => { + local_pool = ThreadPoolNoAbortBuilder::new() + .thread_name(|i| format!("indexing-thread-{i}")) + .build() + .unwrap(); + &local_pool + } + }; + + progress.update_progress(DocumentDeletionProgress::DeleteDocuments); + let mut indexer = indexer::DocumentDeletion::new(); + let candidates_count = to_delete.len(); + indexer.delete_documents_by_docids(to_delete); + let document_changes = indexer.into_changes(&indexer_alloc, primary_key); + let embedders = index + .embedding_configs(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + let embedders = self.embedders(index_uid.clone(), embedders)?; + + progress.update_progress(DocumentDeletionProgress::Indexing); + indexer::index( + index_wtxn, + index, + pool, + indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + None, // document deletion never changes primary key + &document_changes, + embedders, + &|| must_stop_processing.get(), + &progress, + ) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + + let addition = DocumentAdditionResult { + indexed_documents: candidates_count, + number_of_documents: index + .number_of_documents(index_wtxn) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, + }; + + tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); + } + + Ok(tasks) + } + IndexOperation::Settings { index_uid, settings, mut tasks } => { + progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); + let indexer_config = self.index_mapper.indexer_config(); + let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); + + for (task, (_, settings)) in tasks.iter_mut().zip(settings) { + let checked_settings = settings.clone().check(); + task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); + apply_settings_to_builder(&checked_settings, &mut builder); + + // We can apply the status right now and if an update fail later + // the whole batch will be marked as failed. + task.status = Status::Succeeded; + } + + progress.update_progress(SettingsProgress::ApplyTheSettings); + builder + .execute( + |indexing_step| tracing::debug!(update = ?indexing_step), + || must_stop_processing.get(), + ) + .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; + + Ok(tasks) + } + IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + } => { + let mut import_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::DocumentClear { + index_uid: index_uid.clone(), + tasks: cleared_tasks, + }, + progress.clone(), + )?; + + let settings_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, + progress, + )?; + + let mut tasks = settings_tasks; + tasks.append(&mut import_tasks); + Ok(tasks) + } + } + } +} diff --git a/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs b/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs new file mode 100644 index 000000000..c6d6e2dc8 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_snapshot_creation.rs @@ -0,0 +1,134 @@ +use std::ffi::OsStr; +use std::fs; +use std::sync::atomic::Ordering; + +use meilisearch_types::heed::CompactionOption; +use meilisearch_types::milli::progress::Progress; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::{Status, Task}; +use meilisearch_types::{compression, VERSION_FILE_NAME}; + +use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress, VariableNameStep}; +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + pub(super) fn process_snapshot( + &self, + progress: Progress, + mut tasks: Vec, + ) -> Result> { + progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation); + + fs::create_dir_all(&self.scheduler.snapshots_path)?; + let temp_snapshot_dir = tempfile::tempdir()?; + + // 1. Snapshot the version file. + let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME); + fs::copy(&self.scheduler.version_file_path, dst)?; + + // 2. Snapshot the index-scheduler LMDB env + // + // When we call copy_to_file, LMDB opens a read transaction by itself, + // we can't provide our own. It is an issue as we would like to know + // the update files to copy but new ones can be enqueued between the copy + // of the env and the new transaction we open to retrieve the enqueued tasks. + // So we prefer opening a new transaction after copying the env and copy more + // update files than not enough. + // + // Note that there cannot be any update files deleted between those + // two read operations as the task processing is synchronous. + + // 2.1 First copy the LMDB env of the index-scheduler + progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); + let dst = temp_snapshot_dir.path().join("tasks"); + fs::create_dir_all(&dst)?; + self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 2.2 Create a read transaction on the index-scheduler + let rtxn = self.env.read_txn()?; + + // 2.3 Create the update files directory + let update_files_dir = temp_snapshot_dir.path().join("update_files"); + fs::create_dir_all(&update_files_dir)?; + + // 2.4 Only copy the update files of the enqueued tasks + progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles); + let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?; + let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32); + progress.update_progress(update_file_progress); + for task_id in enqueued { + let task = + self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + if let Some(content_uuid) = task.content_uuid() { + let src = self.queue.file_store.get_update_path(content_uuid); + let dst = update_files_dir.join(content_uuid.to_string()); + fs::copy(src, dst)?; + } + atomic.fetch_add(1, Ordering::Relaxed); + } + + // 3. Snapshot every indexes + progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes); + let index_mapping = self.index_mapper.index_mapping; + let nb_indexes = index_mapping.len(&rtxn)? as u32; + + for (i, result) in index_mapping.iter(&rtxn)?.enumerate() { + let (name, uuid) = result?; + progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes)); + let index = self.index_mapper.index(&rtxn, name)?; + let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); + fs::create_dir_all(&dst)?; + index + .copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) + .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; + } + + drop(rtxn); + + // 4. Snapshot the auth LMDB env + progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); + let dst = temp_snapshot_dir.path().join("auth"); + fs::create_dir_all(&dst)?; + // TODO We can't use the open_auth_store_env function here but we should + let auth = unsafe { + milli::heed::EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) // 1 GiB + .max_dbs(2) + .open(&self.scheduler.auth_path) + }?; + auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 5. Copy and tarball the flat snapshot + progress.update_progress(SnapshotCreationProgress::CreateTheTarball); + // 5.1 Find the original name of the database + // TODO find a better way to get this path + let mut base_path = self.env.path().to_owned(); + base_path.pop(); + let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms"); + + // 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension + let snapshot_path = self.scheduler.snapshots_path.join(format!("{}.snapshot", db_name)); + let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.scheduler.snapshots_path)?; + compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; + let file = temp_snapshot_file.persist(snapshot_path)?; + + // 5.3 Change the permission to make the snapshot readonly + let mut permissions = file.metadata()?.permissions(); + permissions.set_readonly(true); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + #[allow(clippy::non_octal_unix_permissions)] + // rwxrwxrwx + permissions.set_mode(0b100100100); + } + + file.set_permissions(permissions)?; + + for task in &mut tasks { + task.status = Status::Succeeded; + } + + Ok(tasks) + } +} diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-2.snap similarity index 77% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-2.snap index 2b76f46a6..01a8429c4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-2.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-5.snap similarity index 78% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-5.snap index 061de75a5..7b576aa24 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update-5.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs expression: config.embedder_options +snapshot_kind: text --- { "Rest": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update.snap similarity index 77% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update.snap index 2b76f46a6..01a8429c4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test__settings_update.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-15.snap similarity index 63% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-15.snap index 540835dfb..ece33e3b4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-15.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: doc +snapshot_kind: text --- { "doggo": "Intel", diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-2.snap similarity index 86% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-2.snap index 629ea87dc..025ea4a5e 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-2.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-22.snap similarity index 62% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-22.snap index bc35d84f6..49c5403d4 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-22.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: doc +snapshot_kind: text --- { "doggo": "kefir", diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-5.snap similarity index 76% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-5.snap index c08aa8116..14fcb3ee9 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-5.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: fakerest_config.embedder_options +snapshot_kind: text --- { "Rest": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap similarity index 72% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap index 712a62c77..76828ad7a 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors-8.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: simple_hf_config.embedder_options +snapshot_kind: text --- { "HuggingFace": { diff --git a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors.snap similarity index 86% rename from crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap rename to crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors.snap index 629ea87dc..025ea4a5e 100644 --- a/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/index_scheduler__scheduler__test_embedders__import_vectors.snap @@ -1,6 +1,7 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs expression: task.details +snapshot_kind: text --- { "embedders": { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap index f0c382d86..e3da7bd06 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/initial_tasks_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/initial_tasks_enqueued.snap index b895bbc7c..51fb88025 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_enqueued_task/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/aborted_indexation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/aborted_indexation.snap index b73714e36..408980c05 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/aborted_indexation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap index 444b171dd..2a9de78ab 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/first_task_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/first_task_processed.snap index 17265263c..e85755e98 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/first_task_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap index c24c36313..957df00ea 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/after_dump_register.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/after_dump_register.snap index 8821af805..a7c5e5c09 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/after_dump_register.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap index dbae3a082..426a649c8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap index b9f33e598..fe128e3d3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_dump/cancel_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/aborted_indexation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/aborted_indexation.snap index 0b9a0d709..26ded73d7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/aborted_indexation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap index ef6845b05..9a0d8cc88 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_task_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_task_registered.snap index fef6c20f6..6ac809600 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/cancel_task_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/initial_task_processing.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/initial_task_processing.snap index 3f45be007..72f0c39eb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/initial_task_processing.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/registered_the_first_task.snap index 087257e18..63919487a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_processing_task/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap index de94da936..56dea5b08 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/cancel_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/initial_task_processed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/initial_task_processed.snap index 78b62979b..9d83368b1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/initial_task_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/registered_the_first_task.snap index 087257e18..63919487a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/cancel_succeeded_task/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap index 3fe1a7d01..58b2a831d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap index 0234a5057..6dc95e1d1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/before_index_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap index 8203e81f4..7de83b538 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/both_task_succeeded.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_first_task.snap index 230b5e195..92f24508c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_second_task.snap index 9b22afff0..5f25c2964 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_third_task.snap index 914660746..0006ee8c0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap index c252d35c9..d7e2f3b07 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap index 830afd854..83604f393 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap index 9d3f29c48..11ec76348 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap index e3627bbd3..cf2b2b691 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap index 322bcf4ab..e1e36fffc 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap index aa047e3ff..0b16ffadd 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap index 8d499b59c..4e5651deb 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap index 423dfb37c..5b829d27e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap index e5878246d..d4113041a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap index 230b5e195..92f24508c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap index a0148db63..21a6a59f7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap index bee90a73b..adf9a76fe 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap index ac18e924d..809273a20 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/first.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap index 06e63e00e..a871c2baa 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/fourth.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_first_task.snap index 33cea7854..5c8082f72 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap index ebd130966..a22004697 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_second_task.snap index c53aec0c9..635491dc1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_third_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_third_task.snap index 7679999ce..1d190baca 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/registered_the_third_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap index 632b7a54a..208aa100b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/second.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap index 3a2963654..8977e4cf0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/process_tasks_without_autobatching/third.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap index b1c6fde36..dc73ddb0d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_a.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap index 065023214..25827aa96 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_b.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap index 03b09b928..7b1ad4b9b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_c.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap index 08ecfddc2..aa4b71d67 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/create_d.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap index bca858559..68934003d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap index 234915267..2296dc9f2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/first_swap_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap index 7b5ab6e4b..abc2f2954 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/second_swap_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap index 77b1193a5..f75caa10c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/third_empty_swap_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap index ccab86904..cb5fd822d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes/two_swaps_registered.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap index 08ecfddc2..aa4b71d67 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/after_the_index_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap index e8e74d0e3..f7eb4e1e7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/first_swap_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap index 08ecfddc2..aa4b71d67 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/swap_indexes_errors/initial_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap index 0d51e242c..2c33bd04a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap index f63c498a5..83c43339f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap index 95d615b1e..dd3ed4c8a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap index 6402982ee..3c4b35d9f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_enqueued.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_enqueued.snap index 0d51e242c..2c33bd04a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_processed.snap index f63c498a5..83c43339f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/initial_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap index 3f4ae56d8..9512a8d8d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_deleteable/task_deletion_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap index aed0a818a..46cbaefc2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap index ae910d44b..b35bcdf1b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_done.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_enqueued.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_enqueued.snap index 746caa1de..a861fea12 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_enqueued.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap index 85a0afc46..b3500b8a5 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/task_deletion_undeleteable/task_deletion_processing.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap index e2668fcea..92e37550a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap index 7f08c0575..bdd654672 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_task_is_processing/registered_a_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test.rs/test_task_is_processing/registered_a_task.snap index e3627bbd3..cf2b2b691 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_task_is_processing/registered_a_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_register.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_register.snap index d8a689669..42df87d17 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_register.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_the_batch_creation.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_the_batch_creation.snap index 8beb49145..a3d3deade 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/after_the_batch_creation.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/once_everything_is_processed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/once_everything_is_processed.snap index 2357a404f..83e23e8b0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition/once_everything_is_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/after_processing_the_batch.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/after_processing_the_batch.snap index 1fce684f5..2bc94c1f9 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/after_processing_the_batch.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/documents.snap new file mode 100644 index 000000000..bef7fca61 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/documents.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_first_task.snap index b1337b287..c1629dc02 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_second_task.snap index 60e2d22be..c8c117b2a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_addition_and_document_deletion/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap index ee42e932a..825d74562 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap index 0e9e47574..4ffdf8958 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/after_last_successful_addition.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/documents.snap similarity index 60% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/documents.snap index 8204d059b..3619a50b5 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_first_task.snap index 2e96a4614..6faba461a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_second_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_second_task.snap index d4f8b47b9..257c66390 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/document_deletion_and_document_addition/registered_the_second_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap index 5efac0653..2c29d9da7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap index cdc1f98b7..ce66dc4d1 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap index 24ace66bf..68f4b6701 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap index 230b5e195..03d4e5b16 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap index f937c6805..9c3711061 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap index 28a2a65a5..ed9d02ae2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap index 519646fcb..343c1f77d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap index f842a275e..9aa284128 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap index 33cea7854..6f0f9c782 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap index aa27500a7..d87a73a81 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap index 8b463b588..2fbcc3dc6 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap index 537980795..e8f8d85d3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap index e342fb2f3..a5e55b95f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap index 9531dd0bf..231352493 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap index 3cdee6f23..aa6956c5f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap index a3609fb1b..e4b176513 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap similarity index 81% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap index cbd8d175a..ae77cfa9d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap index d73d749f6..4b737c1e6 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap index 00c911dae..79c0071ff 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap index 99922b9a0..95466395e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap index 24ace66bf..68f4b6701 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap similarity index 96% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap index 230b5e195..03d4e5b16 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap index e8ee841ae..9878d5283 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/documents.snap similarity index 53% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/documents.snap index dd1bbf8b0..d8f74d472 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap index 1713c0ac2..bd87c1981 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap index 96e83ac9b..ac50daec7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap index f54713081..7785e0cb0 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap index 0f24a6715..73c8fcf17 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap index a3a481855..86f301ba8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/documents.snap new file mode 100644 index 000000000..5022049f1 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/documents.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "jean bob" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap index f12ac555b..4d5028d60 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap index b49d3ea64..4350f68ae 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap index 35783d84f..226a1d509 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap index 3eb5c7a4d..c744a7f18 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap new file mode 100644 index 000000000..5022049f1 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "jean bob" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap index d01799fc2..86ab07c3c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap index 2c6d29a18..889d65ff9 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap index 3306009aa..4111cb60e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap index 6d3fabe77..7ef550fd8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/documents.snap similarity index 68% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/documents.snap index a73c52da5..0a8f2e4e8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap index 5b304aa24..8bda924d3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap index b5e113599..f153e2d44 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap index 0f3730932..cdd51b199 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap index 2876c7681..f18858451 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap index 46408e0a7..2fb5363e8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap similarity index 74% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap index 9c79853fa..1ff106b5b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap index 4acc5342b..6a7d3617a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap index 828c28298..7a98a0e37 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap index 7e9a02288..7603decab 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/1.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/1.snap index a5fbc024c..85b137b45 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/1.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/2.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/2.snap index d2cfc793b..8bd563e6e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/2.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/documents.snap similarity index 82% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/documents.snap index 5a839838d..bf6495c9c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace/documents.snap @@ -1,5 +1,6 @@ --- -source: index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text --- [ { diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap index ba16d64f1..122136e08 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap index 5c6c711a0..7ecc0e7a9 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap index e03da1332..7b3a9db02 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_replace_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/1.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/1.snap index 5444d0a3e..bb4fb66df 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/1.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/2.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/2.snap index 9e742df7b..0911eb631 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/2.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap index 35368e4b3..916b44f96 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap index ef6e2d0e1..4b005f38e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap index bfc2e9f42..bf73f9593 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_document_update_without_autobatching/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = false diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap index 773f43c2c..7f08b5d0d 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap similarity index 99% rename from crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap index a4649c1eb..ff617008c 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/documents.snap new file mode 100644 index 000000000..bf6495c9c --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/documents.snap @@ -0,0 +1,46 @@ +--- +source: crates/index-scheduler/src/scheduler/test_document_addition.rs +snapshot_kind: text +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/five_tasks_processed.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/five_tasks_processed.snap index 8aba4bd5c..ff492e75e 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/five_tasks_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_document_addition.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap index f581defa8..7de6af6b7 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap index 27522376f..68872a141 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap index 28504ffea..1732eee6b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap index 288f2bc88..3777a7bc8 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap index ff63c0caf..33bd5c0d2 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap index 77367f06b..e5baae150 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors_first_and_embedder_later/documents after initial push.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors_first_and_embedder_later/documents after initial push.snap index e06d09464..3eaf58e17 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors_first_and_embedder_later/documents after initial push.snap @@ -1,4 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_embedders.rs +snapshot_kind: text --- [{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"my_doggo_embedder":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"my_doggo_embedder":{"regenerate":false,"embeddings":[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]},"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel","_vectors":{"my_doggo_embedder":{"regenerate":true,"embeddings":[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]}}},{"id":4,"doggo":"sora","_vectors":{"my_doggo_embedder":{"regenerate":true,"embeddings":null}}}] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap index 8beb49145..fcbaaace3 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap index 875ae06c6..5b38f28b5 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap index d8a689669..c5b21621f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap index bda90680f..1b9018726 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap index be79abf21..5bbc89c44 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap index 0ee4d91e5..7149d5f97 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap new file mode 100644 index 000000000..18071608b --- /dev/null +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap @@ -0,0 +1,10 @@ +--- +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap similarity index 98% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap index 43be57779..b13a63738 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap index ca1866473..9e10d3052 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap @@ -1,5 +1,6 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs +snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/after_register.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/after_register.snap index 5129662eb..4ece15b13 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/after_register.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap index b24d0be1e..24589fc66 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap index 8ab4d84dd..f698eff0a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap index 8ab4d84dd..f698eff0a 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap index d8a689669..c5b21621f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap index d8a689669..c5b21621f 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap index 2357a404f..1a678e46b 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap index c776baab7..3f3a6f769 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap similarity index 97% rename from crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap rename to crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap index 5129662eb..4ece15b13 100644 --- a/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap @@ -1,5 +1,5 @@ --- -source: crates/index-scheduler/src/lib.rs +source: crates/index-scheduler/src/scheduler/test_failure.rs snapshot_kind: text --- ### Autobatching Enabled = true diff --git a/crates/index-scheduler/src/scheduler/test.rs b/crates/index-scheduler/src/scheduler/test.rs new file mode 100644 index 000000000..a2276107d --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test.rs @@ -0,0 +1,876 @@ +use std::collections::BTreeMap; + +use big_s::S; +use meili_snap::{json_string, snapshot}; +use meilisearch_types::milli::index::IndexEmbeddingConfig; +use meilisearch_types::milli::update::IndexDocumentsMethod::*; +use meilisearch_types::milli::{self}; +use meilisearch_types::tasks::{IndexSwap, KindWithContent}; +use roaring::RoaringBitmap; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{ + index_creation_task, read_json, replace_document_import_task, sample_documents, +}; +use crate::IndexScheduler; + +#[test] +fn insert_task_while_another_task_is_processing() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); + + // while the task is processing can we register another task? + index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); +} + +#[test] +fn test_task_is_processing() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); + + handle.advance_till([Start, BatchCreated]); + assert!(index_scheduler.is_task_processing().unwrap()); +} + +/// We send a lot of tasks but notify the tasks scheduler only once as +/// we send them very fast, we must make sure that they are all processed. +#[test] +fn process_tasks_inserted_without_new_signal() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_second_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_third_task"); +} + +#[test] +fn process_tasks_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth"); +} + +#[test] +fn task_deletion_undeleteable() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + // here we have registered all the tasks, but the index scheduler + // has not progressed at all + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + index_scheduler + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1]), + }, + None, + false, + ) + .unwrap(); + // again, no progress made at all, but one more task is registered + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); + + // now we create the first batch + handle.advance_till([Start, BatchCreated]); + + // the task deletion should now be "processing" + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing"); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + // after the task deletion is processed, no task should actually have been deleted, + // because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable + // the "task deletion" task should be marked as "succeeded" and, in its details, the + // number of deleted tasks should be 0 + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done"); +} + +#[test] +fn task_deletion_deleteable() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.advance_one_successful_batch(); + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task + index_scheduler + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); +} + +#[test] +fn task_deletion_delete_same_task_twice() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.advance_one_successful_batch(); + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task multiple times in a row + for _ in 0..2 { + index_scheduler + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_one_successful_batch(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); +} + +#[test] +fn document_addition_and_index_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_one_successful_batch(); // The index creation. + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "before_index_creation"); + handle.advance_one_successful_batch(); // // after the execution of the two tasks in a single batch. + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded"); +} + +#[test] +fn do_not_batch_task_of_different_indexes() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + let index_names = ["doggos", "cattos", "girafos"]; + + for name in index_names { + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: name.to_string(), primary_key: None }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for name in index_names { + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: name.to_string() }, None, false) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for _ in 0..(index_names.len() * 2) { + handle.advance_one_successful_batch(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); +} + +#[test] +fn swap_indexes() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let to_enqueue = [ + index_creation_task("a", "id"), + index_creation_task("b", "id"), + index_creation_task("c", "id"), + index_creation_task("d", "id"), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_a"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_b"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_c"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); + + index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, + ], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); + index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); + + index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }, None, false).unwrap(); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); +} + +#[test] +fn swap_indexes_errors() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let to_enqueue = [ + index_creation_task("a", "id"), + index_creation_task("b", "id"), + index_creation_task("c", "id"), + index_creation_task("d", "id"), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_n_successful_batches(4); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_index_creation"); + + let first_snap = snapshot_index_scheduler(&index_scheduler); + snapshot!(first_snap, name: "initial_tasks_processed"); + + let err = index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, + ], + }, + None, + false, + ) + .unwrap_err(); + snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); + + let second_snap = snapshot_index_scheduler(&index_scheduler); + assert_eq!(first_snap, second_snap); + + // Index `e` does not exist, but we don't check its existence yet + index_scheduler + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, + IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, + ], + }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); + // Now the first swap should have an error message saying `e` and `f` do not exist + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_failed"); +} + +#[test] +fn document_addition_and_index_deletion_on_unexisting_index() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); +} + +#[test] +fn cancel_enqueued_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn cancel_succeeded_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); + + index_scheduler + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn cancel_processing_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); + + index_scheduler + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); + // Now we check that we can reach the AbortedIndexation error handling + handle.advance_till([AbortedIndexation]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); + + // handle.advance_till([Start, BatchCreated, BeforeProcessing, AfterProcessing]); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn cancel_mix_of_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file1.persist().unwrap(); + let (file2, documents_count2) = sample_documents(&index_scheduler, 2, 2); + file2.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("beavero", None, 1, documents_count1), + replace_document_import_task("wolfo", None, 2, documents_count2), + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task, None, false).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_processed"); + + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + index_scheduler + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1, 2]), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); + + handle.advance_till([AbortedIndexation]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} + +#[test] +fn test_settings_update() { + use meilisearch_types::settings::{Settings, Unchecked}; + use milli::update::Setting; + + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let mut new_settings: Box> = Box::default(); + let mut embedders = BTreeMap::default(); + let embedding_settings = milli::vector::settings::EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), + api_key: Setting::Set(S("My super secret")), + url: Setting::Set(S("http://localhost:7777")), + dimensions: Setting::Set(4), + request: Setting::Set(serde_json::json!("{{text}}")), + response: Setting::Set(serde_json::json!("{{embedding}}")), + ..Default::default() + }; + embedders.insert(S("default"), Setting::Set(embedding_settings)); + new_settings.embedders = Setting::Set(embedders); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + let configs = index.embedding_configs(&rtxn).unwrap(); + let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap(); + insta::assert_snapshot!(name, @"default"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_json_snapshot!(config.embedder_options); +} + +#[test] +fn simple_new() { + crate::IndexScheduler::test(true, vec![]); +} + +#[test] +fn basic_get_stats() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind, None, false).unwrap(); + + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 3, + "failed": 0, + "processing": 0, + "succeeded": 0 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + handle.advance_till([Start, BatchCreated]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 2, + "failed": 0, + "processing": 1, + "succeeded": 0 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 1, + "failed": 0, + "processing": 1, + "succeeded": 1 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" + { + "indexes": { + "catto": 1, + "doggo": 1, + "whalo": 1 + }, + "statuses": { + "canceled": 0, + "enqueued": 0, + "failed": 0, + "processing": 1, + "succeeded": 2 + }, + "types": { + "documentAdditionOrUpdate": 0, + "documentDeletion": 0, + "documentEdition": 0, + "dumpCreation": 0, + "indexCreation": 3, + "indexDeletion": 0, + "indexSwap": 0, + "indexUpdate": 0, + "settingsUpdate": 0, + "snapshotCreation": 0, + "taskCancelation": 0, + "taskDeletion": 0 + } + } + "###); +} + +#[test] +fn cancel_processing_dump() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None }; + let dump_cancellation = KindWithContent::TaskCancelation { + query: "cancel dump".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }; + let _ = index_scheduler.register(dump_creation, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + + let _ = index_scheduler.register(dump_cancellation, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); + + snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); +} diff --git a/crates/index-scheduler/src/scheduler/test_document_addition.rs b/crates/index-scheduler/src/scheduler/test_document_addition.rs new file mode 100644 index 000000000..96181cbaa --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test_document_addition.rs @@ -0,0 +1,1169 @@ +use big_s::S; +use meili_snap::snapshot; +use meilisearch_types::milli::obkv_to_json; +use meilisearch_types::milli::update::IndexDocumentsMethod::*; +use meilisearch_types::tasks::KindWithContent; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::read_json; +use crate::test_utils::Breakpoint::*; +use crate::IndexScheduler; + +#[test] +fn document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); + + handle.advance_till([Start, BatchCreated]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_batch_creation"); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "once_everything_is_processed"); +} + +#[test] +fn document_addition_and_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn document_deletion_and_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + // The deletion should have failed because it can't create an index + handle.advance_one_failed_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion"); + + // The addition should works + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_replace() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_update() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_mixed_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let method = if i % 2 == 0 { UpdateDocuments } else { ReplaceDocuments }; + + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Only half of the task should've been processed since we can't autobatch replace and update together. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_replace_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_update_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_cant_create_index_without_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is no index currently. + // Thus, everything should be batched together and a IndexDoesNotExists + // error should be throwed. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_till([ + Start, + BatchCreated, + InsideProcessBatch, + ProcessBatchFailed, + AfterProcessing, + ]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); + + // The index should not exist. + snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); +} + +#[test] +fn test_document_addition_cant_create_index_without_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the auto-batching is disabled, every task should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_failed_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_failed_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // The index should not exist. + snapshot!(matches!(index_scheduler.index_exists("doggos"), Ok(true)), @"false"); +} + +#[test] +fn test_document_addition_cant_create_index_with_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is already an index. + // Thus, everything should be batched together and no error should be + // throwed. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_cant_create_index_with_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the autobatching is disabled, every tasks should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + // Create the index. + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_mixed_rights_with_index() { + // We're going to autobatch multiple document addition. + // - The index already exists + // - The first document addition don't have the right to create an index + // can it batch with the other one? + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_mixed_right_without_index_starts_with_cant_create() { + // We're going to autobatch multiple document addition. + // - The index does not exists + // - The first document addition don't have the right to create an index + // - The second do. They should not batch together. + // - The second should batch with everything else as it's going to create an index. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(i).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // A first batch should be processed with only the first documentAddition that's going to fail. + handle.advance_one_failed_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_failed"); + + // Everything else should be batched together. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_multiple_primary_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in ["id", "bork", "bloup"].iter().enumerate() { + let content = format!( + r#"{{ + "id": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); + + // A first batch should be processed with only the first documentAddition. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_fails"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_multiple_primary_key_batch_wrong_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in ["id", "bork", "bork"].iter().enumerate() { + let content = format!( + r#"{{ + "id": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_3_tasks"); + + // A first batch should be processed with only the first documentAddition. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_succeed"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_and_third_tasks_fails"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_bad_primary_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in ["bork", "bork", "id", "bork", "id"].iter().enumerate() { + let content = format!( + r#"{{ + "id": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_5_tasks"); + + // A first batch should be processed with only the first two documentAddition. + // it should fails because the documents don't contains any `bork` field. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_and_second_task_fails"); + + // The primary key should be set to none since we failed the batch. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap(); + snapshot!(primary_key.is_none(), @"true"); + + // The second batch should succeed and only contains one task. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); + + // The primary key should be set to `id` since this batch succeeded. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // We're trying to `bork` again, but now there is already a primary key set for this index. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth_task_fails"); + + // Finally the last task should succeed since its primary key is the same as the valid one. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fifth_task_succeeds"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"id"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_set_and_null_primary_key() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in + [None, Some("bork"), Some("paw"), None, None, Some("paw")].into_iter().enumerate() + { + let content = format!( + r#"{{ + "paw": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); + + // A first batch should contains only one task that fails because we can't infer the primary key. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_fails"); + + // The second batch should contains only one task that fails because we bork is not a valid primary key. + // NOTE: it's marked as successful because the batch didn't fails, it's the individual tasks that failed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); + + // No primary key should be set at this point. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap(); + snapshot!(primary_key.is_none(), @"true"); + + // The third batch should succeed and only contains one task. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); + + // The primary key should be set to `id` since this batch succeeded. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"paw"); + + // We should be able to batch together the next two tasks that don't specify any primary key + // + the last task that matches the current primary-key. Everything should succeed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"paw"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} + +#[test] +fn test_document_addition_with_set_and_null_primary_key_inference_works() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for (id, primary_key) in + [None, Some("bork"), Some("doggoid"), None, None, Some("doggoid")].into_iter().enumerate() + { + let content = format!( + r#"{{ + "doggoid": {id}, + "doggo": "jean bob" + }}"#, + ); + let (uuid, mut file) = + index_scheduler.queue.create_update_file_with_uuid(id as u128).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_6_tasks"); + + // A first batch should contains only one task that succeed and sets the primary key to `doggoid`. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_succeed"); + + // Checking the primary key. + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap(); + snapshot!(primary_key.is_none(), @"false"); + + // The second batch should contains only one task that fails because it tries to update the primary key to `bork`. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_task_fails"); + + // The third batch should succeed and only contains one task. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_task_succeeds"); + + // We should be able to batch together the next two tasks that don't specify any primary key + // + the last task that matches the current primary-key. Everything should succeed. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_other_tasks_succeeds"); + + // Is the primary key still what we expect? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let primary_key = index.primary_key(&rtxn).unwrap().unwrap(); + snapshot!(primary_key, @"doggoid"); + + // Is the document still the one we expect?. + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); +} diff --git a/crates/index-scheduler/src/scheduler/test_embedders.rs b/crates/index-scheduler/src/scheduler/test_embedders.rs new file mode 100644 index 000000000..d21dc7548 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test_embedders.rs @@ -0,0 +1,833 @@ +use std::collections::BTreeMap; + +use big_s::S; +use insta::assert_json_snapshot; +use meili_snap::{json_string, snapshot}; +use meilisearch_types::milli::index::IndexEmbeddingConfig; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::vector::settings::EmbeddingSettings; +use meilisearch_types::milli::{self, obkv_to_json}; +use meilisearch_types::settings::{Settings, Unchecked}; +use meilisearch_types::tasks::KindWithContent; +use milli::update::IndexDocumentsMethod::*; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::read_json; +use crate::IndexScheduler; + +#[test] +fn import_vectors() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let mut new_settings: Box> = Box::default(); + let mut embedders = BTreeMap::default(); + let embedding_settings = milli::vector::settings::EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::Rest), + api_key: Setting::Set(S("My super secret")), + url: Setting::Set(S("http://localhost:7777")), + dimensions: Setting::Set(384), + request: Setting::Set(serde_json::json!("{{text}}")), + response: Setting::Set(serde_json::json!("{{embedding}}")), + ..Default::default() + }; + embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings)); + + let embedding_settings = milli::vector::settings::EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), + model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), + revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), + document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")), + ..Default::default() + }; + embedders.insert(S("B_small_hf"), Setting::Set(embedding_settings)); + + new_settings.embedders = Setting::Set(embedders); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors"); + + { + let rtxn = index_scheduler.read_txn().unwrap(); + let task = index_scheduler.queue.tasks.get_task(&rtxn, 0).unwrap().unwrap(); + let task = meilisearch_types::task_view::TaskView::from_task(&task); + insta::assert_json_snapshot!(task.details); + } + + let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + let configs = index.embedding_configs(&rtxn).unwrap(); + // for consistency with the below + #[allow(clippy::get_first)] + let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } = + configs.get(0).unwrap(); + insta::assert_snapshot!(name, @"A_fakerest"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_json_snapshot!(fakerest_config.embedder_options); + let fakerest_name = name.clone(); + + let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } = + configs.get(1).unwrap(); + insta::assert_snapshot!(name, @"B_small_hf"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + insta::assert_json_snapshot!(simple_hf_config.embedder_options); + let simple_hf_name = name.clone(); + + let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); + let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); + let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); + let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); + let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); + (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) + }; + + // add one doc, specifying vectors + + let doc = serde_json::json!( + { + "id": 0, + "doggo": "Intel", + "breed": "beagle", + "_vectors": { + &fakerest_name: { + // this will never trigger regeneration, which is good because we can't actually generate with + // this embedder + "regenerate": false, + "embeddings": beagle_embed, + }, + &simple_hf_name: { + // this will be regenerated on updates + "regenerate": true, + "embeddings": lab_embed, + }, + "noise": [0.1, 0.2, 0.3] + } + } + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0u128).unwrap(); + let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel"); + + handle.advance_one_successful_batch(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds"); + + // check embeddings + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + // Ensure the document have been inserted into the relevant bitamp + let configs = index.embedding_configs(&rtxn).unwrap(); + // for consistency with the below + #[allow(clippy::get_first)] + let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = + configs.get(0).unwrap(); + insta::assert_snapshot!(name, @"A_fakerest"); + insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); + + let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); + insta::assert_snapshot!(name, @"B_small_hf"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + + let embeddings = index.embeddings(&rtxn, 0).unwrap(); + + assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); + assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); + + let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let doc = obkv_to_json( + &[ + fields_ids_map.id("doggo").unwrap(), + fields_ids_map.id("breed").unwrap(), + fields_ids_map.id("_vectors").unwrap(), + ], + &fields_ids_map, + doc, + ) + .unwrap(); + assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); + } + + // update the doc, specifying vectors + + let doc = serde_json::json!( + { + "id": 0, + "doggo": "kefir", + "breed": "patou", + } + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(1u128).unwrap(); + let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap(); + assert_eq!(documents_count, 1); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds"); + + { + // check embeddings + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + + // Ensure the document have been inserted into the relevant bitamp + let configs = index.embedding_configs(&rtxn).unwrap(); + // for consistency with the below + #[allow(clippy::get_first)] + let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = + configs.get(0).unwrap(); + insta::assert_snapshot!(name, @"A_fakerest"); + insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); + + let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); + insta::assert_snapshot!(name, @"B_small_hf"); + insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); + + let embeddings = index.embeddings(&rtxn, 0).unwrap(); + + // automatically changed to patou because set to regenerate + assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); + // remained beagle + assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); + + let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let doc = obkv_to_json( + &[ + fields_ids_map.id("doggo").unwrap(), + fields_ids_map.id("breed").unwrap(), + fields_ids_map.id("_vectors").unwrap(), + ], + &fields_ids_map, + doc, + ) + .unwrap(); + assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"}); + } + } +} + +#[test] +fn import_vectors_first_and_embedder_later() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = serde_json::json!( + [ + { + "id": 0, + "doggo": "kefir", + }, + { + "id": 1, + "doggo": "intel", + "_vectors": { + "my_doggo_embedder": vec![1; 384], + "unknown embedder": vec![1, 2, 3], + } + }, + { + "id": 2, + "doggo": "max", + "_vectors": { + "my_doggo_embedder": { + "regenerate": false, + "embeddings": vec![2; 384], + }, + "unknown embedder": vec![4, 5], + }, + }, + { + "id": 3, + "doggo": "marcel", + "_vectors": { + "my_doggo_embedder": { + "regenerate": true, + "embeddings": vec![3; 384], + }, + }, + }, + { + "id": 4, + "doggo": "sora", + "_vectors": { + "my_doggo_embedder": { + "regenerate": true, + }, + }, + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"5"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push"); + + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), + model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), + revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), + document_template: Setting::Set(S("{{doc.doggo}}")), + ..Default::default() + }) + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + index_scheduler.assert_internally_consistent(); + handle.advance_one_successful_batch(); + index_scheduler.assert_internally_consistent(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + // the all the vectors linked to the new specified embedder have been removed + // Only the unknown embedders stays in the document DB + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###); + let conf = index.embedding_configs(&rtxn).unwrap(); + // even though we specified the vector for the ID 3, it shouldn't be marked + // as user provided since we explicitely marked it as NOT user provided. + snapshot!(format!("{conf:#?}"), @r###" + [ + IndexEmbeddingConfig { + name: "my_doggo_embedder", + config: EmbeddingConfig { + embedder_options: HuggingFace( + EmbedderOptions { + model: "sentence-transformers/all-MiniLM-L6-v2", + revision: Some( + "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + ), + distribution: None, + }, + ), + prompt: PromptData { + template: "{{doc.doggo}}", + max_bytes: Some( + 400, + ), + }, + quantized: None, + }, + user_provided: RoaringBitmap<[1, 2]>, + }, + ] + "###); + let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["my_doggo_embedder"]; + assert!(!embedding.is_empty(), "{embedding:?}"); + + // the document with the id 3 should keep its original embedding + let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embeddings = &embeddings["my_doggo_embedder"]; + + snapshot!(embeddings.len(), @"1"); + assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); + + // If we update marcel it should regenerate its embedding automatically + + let content = serde_json::json!( + [ + { + "id": 3, + "doggo": "marvel", + }, + { + "id": 4, + "doggo": "sorry", + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(1_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"2"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + // the document with the id 3 should have its original embedding updated + let rtxn = index.read_txn().unwrap(); + let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); + let doc = index.documents(&rtxn, Some(docid)).unwrap()[0]; + let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap(); + snapshot!(json_string!(doc), @r###" + { + "id": 3, + "doggo": "marvel" + } + "###); + + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["my_doggo_embedder"]; + + assert!(!embedding.is_empty()); + assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]); + + // the document with the id 4 should generate an embedding + let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["my_doggo_embedder"]; + + assert!(!embedding.is_empty()); +} + +#[test] +fn delete_document_containing_vector() { + // 1. Add an embedder + // 2. Push two documents containing a simple vector + // 3. Delete the first document + // 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore + // 5. Clear the index + // 6. The user defined roaring bitmap shouldn't contains the id of the second document + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("manual") => Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), + dimensions: Setting::Set(3), + ..Default::default() + }) + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let content = serde_json::json!( + [ + { + "id": 0, + "doggo": "kefir", + "_vectors": { + "manual": vec![0, 0, 0], + } + }, + { + "id": 1, + "doggo": "intel", + "_vectors": { + "manual": vec![1, 1, 1], + } + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"2"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1")], + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###); + let conf = index.embedding_configs(&rtxn).unwrap(); + snapshot!(format!("{conf:#?}"), @r###" + [ + IndexEmbeddingConfig { + name: "manual", + config: EmbeddingConfig { + embedder_options: UserProvided( + EmbedderOptions { + dimensions: 3, + distribution: None, + }, + ), + prompt: PromptData { + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), + }, + quantized: None, + }, + user_provided: RoaringBitmap<[0]>, + }, + ] + "###); + let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embedding = &embeddings["manual"]; + assert!(!embedding.is_empty(), "{embedding:?}"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) + .unwrap(); + handle.advance_one_successful_batch(); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @"[]"); + let conf = index.embedding_configs(&rtxn).unwrap(); + snapshot!(format!("{conf:#?}"), @r###" + [ + IndexEmbeddingConfig { + name: "manual", + config: EmbeddingConfig { + embedder_options: UserProvided( + EmbedderOptions { + dimensions: 3, + distribution: None, + }, + ), + prompt: PromptData { + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), + }, + quantized: None, + }, + user_provided: RoaringBitmap<[]>, + }, + ] + "###); +} + +#[test] +fn delete_embedder_with_user_provided_vectors() { + // 1. Add two embedders + // 2. Push two documents containing a simple vector + // 3. The documents must not contain the vectors after the update as they are in the vectors db + // 3. Delete the embedders + // 4. The documents contain the vectors again + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("manual") => Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), + dimensions: Setting::Set(3), + ..Default::default() + }), + S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { + source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), + model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), + revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), + document_template: Setting::Set(S("{{doc.doggo}}")), + ..Default::default() + }), + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + let content = serde_json::json!( + [ + { + "id": 0, + "doggo": "kefir", + "_vectors": { + "manual": vec![0, 0, 0], + "my_doggo_embedder": vec![1; 384], + } + }, + { + "id": 1, + "doggo": "intel", + "_vectors": { + "manual": vec![1, 1, 1], + } + }, + ] + ); + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0_u128).unwrap(); + let documents_count = + read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file).unwrap(); + snapshot!(documents_count, @"2"); + file.persist().unwrap(); + + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: None, + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###); + } + + { + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Set(maplit::btreemap! { + S("manual") => Setting::Reset, + }), + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + } + + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###); + } + + { + let setting = meilisearch_types::settings::Settings:: { + embedders: Setting::Reset, + ..Default::default() + }; + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings: Box::new(setting), + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + } + + { + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + + // FIXME: redaction + snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###); + } +} diff --git a/crates/index-scheduler/src/scheduler/test_failure.rs b/crates/index-scheduler/src/scheduler/test_failure.rs new file mode 100644 index 000000000..cf835daa3 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/test_failure.rs @@ -0,0 +1,251 @@ +use std::time::Instant; + +use big_s::S; +use maplit::btreeset; +use meili_snap::snapshot; +use meilisearch_types::milli::obkv_to_json; +use meilisearch_types::milli::update::IndexDocumentsMethod::*; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::tasks::KindWithContent; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::test_utils::Breakpoint::*; +use crate::test_utils::{index_creation_task, read_json, FailureLocation}; +use crate::IndexScheduler; + +#[test] +fn fail_in_process_batch_for_index_creation() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); + + handle.advance_one_failed_batch(); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); +} + +#[test] +fn fail_in_process_batch_for_document_addition() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_till([Start, BatchCreated]); + + snapshot!( + snapshot_index_scheduler(&index_scheduler), + name: "document_addition_batch_created" + ); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_failed"); +} + +#[test] +fn fail_in_update_task_after_process_batch_success_for_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test( + true, + vec![(1, FailureLocation::UpdatingTaskAfterProcessBatchSuccess { task_uid: 0 })], + ); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_succeeded_but_index_scheduler_not_updated"); + + handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_succeeded"); + + // At this point the next time the scheduler will try to progress it should encounter + // a critical failure and have to wait for 1s before retrying anything. + + let before_failure = Instant::now(); + handle.advance_till([Start]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_to_commit"); + let failure_duration = before_failure.elapsed(); + assert!(failure_duration.as_millis() >= 1000); + + handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_successfully_processed"); +} + +#[test] +fn fail_in_process_batch_for_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + use meilisearch_types::settings::{Settings, Unchecked}; + let mut new_settings: Box> = Box::default(); + new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents"); + + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1")], + }, + None, + false, + ) + .unwrap(); + // This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!(true), + }, + None, + false, + ) + .unwrap(); + // Should fail because the ids are not filterable + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("id = 2"), + }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("catto EXISTS"), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions"); + + // Everything should be batched together + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork"); +} + +#[test] +fn panic_in_process_batch_for_index_creation() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::PanicInsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + // No matter what happens in process_batch, the index_scheduler should be internally consistent + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); +} diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap deleted file mode 100644 index 2b56b71d1..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 3, - "doggo": "bork" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap deleted file mode 100644 index 45065d8b1..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap +++ /dev/null @@ -1,43 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -### Autobatching Enabled = true -### Processing Tasks: -[] ----------------------------------------------------------------------- -### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} -1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ----------------------------------------------------------------------- -### Status: -enqueued [1,] -succeeded [0,] ----------------------------------------------------------------------- -### Kind: -"documentAdditionOrUpdate" [1,] -"settingsUpdate" [0,] ----------------------------------------------------------------------- -### Index Tasks: -doggos [0,1,] ----------------------------------------------------------------------- -### Index Mapper: -doggos: { number_of_documents: 0, field_distribution: {} } - ----------------------------------------------------------------------- -### Canceled By: - ----------------------------------------------------------------------- -### Enqueued At: -[timestamp] [0,] -[timestamp] [1,] ----------------------------------------------------------------------- -### Started At: -[timestamp] [0,] ----------------------------------------------------------------------- -### Finished At: -[timestamp] [0,] ----------------------------------------------------------------------- -### File Store: -00000000-0000-0000-0000-000000000000 - ----------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap deleted file mode 100644 index 2b56b71d1..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 3, - "doggo": "bork" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap deleted file mode 100644 index 96f9d447f..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "jean bob" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap deleted file mode 100644 index 96f9d447f..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap +++ /dev/null @@ -1,9 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "jean bob" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap deleted file mode 100644 index 5a839838d..000000000 --- a/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: index-scheduler/src/lib.rs ---- -[ - { - "id": 0, - "doggo": "bob 0" - }, - { - "id": 1, - "doggo": "bob 1" - }, - { - "id": 2, - "doggo": "bob 2" - }, - { - "id": 3, - "doggo": "bob 3" - }, - { - "id": 4, - "doggo": "bob 4" - }, - { - "id": 5, - "doggo": "bob 5" - }, - { - "id": 6, - "doggo": "bob 6" - }, - { - "id": 7, - "doggo": "bob 7" - }, - { - "id": 8, - "doggo": "bob 8" - }, - { - "id": 9, - "doggo": "bob 9" - } -] diff --git a/crates/index-scheduler/src/test_utils.rs b/crates/index-scheduler/src/test_utils.rs new file mode 100644 index 000000000..f4779eea9 --- /dev/null +++ b/crates/index-scheduler/src/test_utils.rs @@ -0,0 +1,351 @@ +use std::io::{BufWriter, Write}; +use std::sync::Arc; + +use file_store::File; +use meilisearch_types::document_formats::DocumentFormatError; +use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments; +use uuid::Uuid; + +use crate::insta_snapshot::snapshot_index_scheduler; +use crate::{Error, IndexScheduler, IndexSchedulerOptions}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum Breakpoint { + // this state is only encountered while creating the scheduler in the test suite. + Init, + + Start, + BatchCreated, + AfterProcessing, + AbortedIndexation, + ProcessBatchSucceeded, + ProcessBatchFailed, + InsideProcessBatch, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FailureLocation { + InsideCreateBatch, + InsideProcessBatch, + PanicInsideProcessBatch, + AcquiringWtxn, + UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, + UpdatingTaskAfterProcessBatchFailure, + CommittingWtxn, +} + +use big_s::S; +use crossbeam_channel::RecvTimeoutError; +use meilisearch_types::milli::update::IndexerConfig; +use meilisearch_types::tasks::KindWithContent; +use meilisearch_types::VERSION_FILE_NAME; +use tempfile::{NamedTempFile, TempDir}; +use Breakpoint::*; + +impl IndexScheduler { + /// Blocks the thread until the test handle asks to progress to/through this breakpoint. + /// + /// Two messages are sent through the channel for each breakpoint. + /// The first message is `(b, false)` and the second message is `(b, true)`. + /// + /// Since the channel has a capacity of zero, the `send` and `recv` calls wait for each other. + /// So when the index scheduler calls `test_breakpoint_sdr.send(b, false)`, it blocks + /// the thread until the test catches up by calling `test_breakpoint_rcv.recv()` enough. + /// From the test side, we call `recv()` repeatedly until we find the message `(breakpoint, false)`. + /// As soon as we find it, the index scheduler is unblocked but then wait again on the call to + /// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the + /// test asks to progress to the next `(b2, false)`. + #[cfg(test)] + pub(crate) fn breakpoint(&self, b: Breakpoint) { + // We send two messages. The first one will sync with the call + // to `handle.wait_until(b)`. The second one will block until the + // the next call to `handle.wait_until(..)`. + self.test_breakpoint_sdr.send((b, false)).unwrap(); + // This one will only be able to be sent if the test handle stays alive. + // If it fails, then it means that we have exited the test. + // By crashing with `unwrap`, we kill the run loop. + self.test_breakpoint_sdr.send((b, true)).unwrap(); + } +} + +impl IndexScheduler { + pub(crate) fn test( + autobatching_enabled: bool, + planned_failures: Vec<(usize, FailureLocation)>, + ) -> (Self, IndexSchedulerHandle) { + Self::test_with_custom_config(planned_failures, |config| { + config.autobatching_enabled = autobatching_enabled; + }) + } + + pub(crate) fn test_with_custom_config( + planned_failures: Vec<(usize, FailureLocation)>, + configuration: impl Fn(&mut IndexSchedulerOptions), + ) -> (Self, IndexSchedulerHandle) { + let tempdir = TempDir::new().unwrap(); + let (sender, receiver) = crossbeam_channel::bounded(0); + + let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() }; + + let mut options = IndexSchedulerOptions { + version_file_path: tempdir.path().join(VERSION_FILE_NAME), + auth_path: tempdir.path().join("auth"), + tasks_path: tempdir.path().join("db_path"), + update_file_path: tempdir.path().join("file_store"), + indexes_path: tempdir.path().join("indexes"), + snapshots_path: tempdir.path().join("snapshots"), + dumps_path: tempdir.path().join("dumps"), + webhook_url: None, + webhook_authorization_header: None, + task_db_size: 1000 * 1000 * 10, // 10 MB, we don't use MiB on purpose. + index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. + enable_mdb_writemap: false, + index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB + index_count: 5, + indexer_config: Arc::new(indexer_config), + autobatching_enabled: true, + cleanup_enabled: true, + max_number_of_tasks: 1_000_000, + max_number_of_batched_tasks: usize::MAX, + instance_features: Default::default(), + }; + configuration(&mut options); + + let index_scheduler = Self::new(options, sender, planned_failures).unwrap(); + + // To be 100% consistent between all test we're going to start the scheduler right now + // and ensure it's in the expected starting state. + let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") + } + Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), + }; + assert_eq!(breakpoint, (Init, false)); + let index_scheduler_handle = IndexSchedulerHandle { + _tempdir: tempdir, + index_scheduler: index_scheduler.private_clone(), + test_breakpoint_rcv: receiver, + last_breakpoint: breakpoint.0, + }; + + (index_scheduler, index_scheduler_handle) + } + + /// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned + /// for the given location and current run loop iteration. + pub(crate) fn maybe_fail(&self, location: FailureLocation) -> crate::Result<()> { + if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location)) { + match location { + FailureLocation::PanicInsideProcessBatch => { + panic!("simulated panic") + } + _ => Err(Error::PlannedFailure), + } + } else { + Ok(()) + } + } +} + +/// Return a `KindWithContent::IndexCreation` task +pub(crate) fn index_creation_task( + index: &'static str, + primary_key: &'static str, +) -> KindWithContent { + KindWithContent::IndexCreation { index_uid: S(index), primary_key: Some(S(primary_key)) } +} + +/// Create a `KindWithContent::DocumentImport` task that imports documents. +/// +/// - `index_uid` is given as parameter +/// - `primary_key` is given as parameter +/// - `method` is set to `ReplaceDocuments` +/// - `content_file` is given as parameter +/// - `documents_count` is given as parameter +/// - `allow_index_creation` is set to `true` +pub(crate) fn replace_document_import_task( + index: &'static str, + primary_key: Option<&'static str>, + content_file_uuid: u128, + documents_count: u64, +) -> KindWithContent { + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S(index), + primary_key: primary_key.map(ToOwned::to_owned), + method: ReplaceDocuments, + content_file: Uuid::from_u128(content_file_uuid), + documents_count, + allow_index_creation: true, + } +} + +/// Adapting to the new json reading interface +pub(crate) fn read_json( + bytes: &[u8], + write: impl Write, +) -> std::result::Result { + let temp_file = NamedTempFile::new().unwrap(); + let mut buffer = BufWriter::new(temp_file.reopen().unwrap()); + buffer.write_all(bytes).unwrap(); + buffer.flush().unwrap(); + meilisearch_types::document_formats::read_json(temp_file.as_file(), write) +} + +/// Create an update file with the given file uuid. +/// +/// The update file contains just one simple document whose id is given by `document_id`. +/// +/// The uuid of the file and its documents count is returned. +pub(crate) fn sample_documents( + index_scheduler: &IndexScheduler, + file_uuid: u128, + document_id: usize, +) -> (File, u64) { + let content = format!( + r#" + {{ + "id" : "{document_id}" + }}"# + ); + + let (_uuid, mut file) = index_scheduler.queue.create_update_file_with_uuid(file_uuid).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + (file, documents_count) +} + +pub struct IndexSchedulerHandle { + _tempdir: TempDir, + index_scheduler: IndexScheduler, + test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>, + last_breakpoint: Breakpoint, +} + +impl IndexSchedulerHandle { + /// Advance the scheduler to the next tick. + /// Panic + /// * If the scheduler is waiting for a task to be registered. + /// * If the breakpoint queue is in a bad state. + #[track_caller] + pub(crate) fn advance(&mut self) -> Breakpoint { + let (breakpoint_1, b) = match self + .test_breakpoint_rcv + .recv_timeout(std::time::Duration::from_secs(50)) + { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") + } + Err(RecvTimeoutError::Disconnected) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler crashed.\n{state}") + } + }; + // if we've already encountered a breakpoint we're supposed to be stuck on the false + // and we expect the same variant with the true to come now. + assert_eq!( + (breakpoint_1, b), + (self.last_breakpoint, true), + "Internal error in the test suite. In the previous iteration I got `({:?}, false)` and now I got `({:?}, {:?})`.", + self.last_breakpoint, + breakpoint_1, + b, + ); + + let (breakpoint_2, b) = match self + .test_breakpoint_rcv + .recv_timeout(std::time::Duration::from_secs(50)) + { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") + } + Err(RecvTimeoutError::Disconnected) => { + let state = snapshot_index_scheduler(&self.index_scheduler); + panic!("The scheduler crashed.\n{state}") + } + }; + assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); + + self.last_breakpoint = breakpoint_2; + + breakpoint_2 + } + + /// Advance the scheduler until all the provided breakpoints are reached in order. + #[track_caller] + pub(crate) fn advance_till(&mut self, breakpoints: impl IntoIterator) { + for breakpoint in breakpoints { + let b = self.advance(); + assert_eq!( + b, + breakpoint, + "Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{}", + breakpoint, + b, + snapshot_index_scheduler(&self.index_scheduler) + ); + } + } + + /// Wait for `n` successful batches. + #[track_caller] + pub(crate) fn advance_n_successful_batches(&mut self, n: usize) { + for _ in 0..n { + self.advance_one_successful_batch(); + } + } + + /// Wait for `n` failed batches. + #[track_caller] + pub(crate) fn advance_n_failed_batches(&mut self, n: usize) { + for _ in 0..n { + self.advance_one_failed_batch(); + } + } + + // Wait for one successful batch. + #[track_caller] + pub(crate) fn advance_one_successful_batch(&mut self) { + self.advance_till([Start, BatchCreated]); + loop { + match self.advance() { + // the process_batch function can call itself recursively, thus we need to + // accept as may InsideProcessBatch as possible before moving to the next state. + InsideProcessBatch => (), + // the batch went successfully, we can stop the loop and go on with the next states. + ProcessBatchSucceeded => break, + AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), + ProcessBatchFailed => { + while self.advance() != Start {} + panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler)) + }, + breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), + } + } + + self.advance_till([AfterProcessing]); + } + + // Wait for one failed batch. + #[track_caller] + pub(crate) fn advance_one_failed_batch(&mut self) { + self.advance_till([Start, BatchCreated]); + loop { + match self.advance() { + // the process_batch function can call itself recursively, thus we need to + // accept as may InsideProcessBatch as possible before moving to the next state. + InsideProcessBatch => (), + // the batch went failed, we can stop the loop and go on with the next states. + ProcessBatchFailed => break, + ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)), + AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), + breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), + } + } + self.advance_till([AfterProcessing]); + } +} diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs index 1fcedfddf..1f861776f 100644 --- a/crates/index-scheduler/src/utils.rs +++ b/crates/index-scheduler/src/utils.rs @@ -4,15 +4,14 @@ use std::collections::{BTreeSet, HashSet}; use std::ops::Bound; use meilisearch_types::batches::{Batch, BatchId, BatchStats}; -use meilisearch_types::heed::types::DecodeIgnore; use meilisearch_types::heed::{Database, RoTxn, RwTxn}; use meilisearch_types::milli::CboRoaringBitmapCodec; use meilisearch_types::task_view::DetailsView; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; -use roaring::{MultiOps, RoaringBitmap}; +use roaring::RoaringBitmap; use time::OffsetDateTime; -use crate::{Error, IndexScheduler, ProcessingTasks, Result, Task, TaskId, BEI128}; +use crate::{Error, Result, Task, TaskId, BEI128}; /// This structure contains all the information required to write a batch in the database without reading the tasks. /// It'll stay in RAM so it must be small. @@ -22,7 +21,7 @@ use crate::{Error, IndexScheduler, ProcessingTasks, Result, Task, TaskId, BEI128 /// 3. Call `finished` once the batch has been processed. /// 4. Call `update` on all the tasks. #[derive(Debug, Clone)] -pub(crate) struct ProcessingBatch { +pub struct ProcessingBatch { pub uid: BatchId, pub details: DetailsView, pub stats: BatchStats, @@ -143,349 +142,6 @@ impl ProcessingBatch { } } -impl IndexScheduler { - pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result { - enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() - } - - pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result { - enum_iterator::all().map(|s| self.get_batch_status(rtxn, s)).union() - } - - pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { - Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k + 1)) - } - - pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { - Ok(self.last_task_id(rtxn)?.unwrap_or_default()) - } - - pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result { - Ok(self - .all_batches - .remap_data_type::() - .last(rtxn)? - .map(|(k, _)| k + 1) - .unwrap_or_default()) - } - - pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { - Ok(self.all_tasks.get(rtxn, &task_id)?) - } - - pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result> { - Ok(self.all_batches.get(rtxn, &batch_id)?) - } - - pub(crate) fn write_batch( - &self, - wtxn: &mut RwTxn, - batch: ProcessingBatch, - tasks: &RoaringBitmap, - ) -> Result<()> { - self.all_batches.put( - wtxn, - &batch.uid, - &Batch { - uid: batch.uid, - progress: None, - details: batch.details, - stats: batch.stats, - started_at: batch.started_at, - finished_at: batch.finished_at, - }, - )?; - self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?; - - for status in batch.statuses { - self.update_batch_status(wtxn, status, |bitmap| { - bitmap.insert(batch.uid); - })?; - } - - for kind in batch.kinds { - self.update_batch_kind(wtxn, kind, |bitmap| { - bitmap.insert(batch.uid); - })?; - } - - for index in batch.indexes { - self.update_batch_index(wtxn, &index, |bitmap| { - bitmap.insert(batch.uid); - })?; - } - - if let Some(enqueued_at) = batch.oldest_enqueued_at { - insert_task_datetime(wtxn, self.batch_enqueued_at, enqueued_at, batch.uid)?; - } - if let Some(enqueued_at) = batch.earliest_enqueued_at { - insert_task_datetime(wtxn, self.batch_enqueued_at, enqueued_at, batch.uid)?; - } - insert_task_datetime(wtxn, self.batch_started_at, batch.started_at, batch.uid)?; - insert_task_datetime(wtxn, self.batch_finished_at, batch.finished_at.unwrap(), batch.uid)?; - - Ok(()) - } - - /// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks. - /// - /// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown. - pub(crate) fn get_existing_tasks_for_processing_batch( - &self, - rtxn: &RoTxn, - processing_batch: &mut ProcessingBatch, - tasks: impl IntoIterator, - ) -> Result> { - tasks - .into_iter() - .map(|task_id| { - let mut task = self - .get_task(rtxn, task_id) - .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)); - processing_batch.processing(&mut task); - task - }) - .collect::>() - } - - /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a - /// `CorruptedTaskQueue` error will be thrown. - pub(crate) fn get_existing_tasks( - &self, - rtxn: &RoTxn, - tasks: impl IntoIterator, - ) -> Result> { - tasks - .into_iter() - .map(|task_id| { - self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) - }) - .collect::>() - } - - /// Convert an iterator to a `Vec` of batches. The batches MUST exist or a - /// `CorruptedTaskQueue` error will be thrown. - pub(crate) fn get_existing_batches( - &self, - rtxn: &RoTxn, - processing: &ProcessingTasks, - tasks: impl IntoIterator, - ) -> Result> { - tasks - .into_iter() - .map(|batch_id| { - if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { - let mut batch = processing.batch.as_ref().unwrap().to_batch(); - batch.progress = processing.get_progress_view(); - Ok(batch) - } else { - self.get_batch(rtxn, batch_id) - .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) - } - }) - .collect::>() - } - - pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { - let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; - - debug_assert!(old_task != *task); - debug_assert_eq!(old_task.uid, task.uid); - debug_assert!(old_task.batch_uid.is_none() && task.batch_uid.is_some()); - - if old_task.status != task.status { - self.update_status(wtxn, old_task.status, |bitmap| { - bitmap.remove(task.uid); - })?; - self.update_status(wtxn, task.status, |bitmap| { - bitmap.insert(task.uid); - })?; - } - - if old_task.kind.as_kind() != task.kind.as_kind() { - self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| { - bitmap.remove(task.uid); - })?; - self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { - bitmap.insert(task.uid); - })?; - } - - assert_eq!( - old_task.enqueued_at, task.enqueued_at, - "Cannot update a task's enqueued_at time" - ); - if old_task.started_at != task.started_at { - assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time"); - if let Some(started_at) = task.started_at { - insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?; - } - } - if old_task.finished_at != task.finished_at { - assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time"); - if let Some(finished_at) = task.finished_at { - insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; - } - } - - self.all_tasks.put(wtxn, &task.uid, task)?; - Ok(()) - } - - /// Returns the whole set of tasks that belongs to this batch. - pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result { - Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default()) - } - - /// Returns the whole set of tasks that belongs to this index. - pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result { - Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) - } - - pub(crate) fn update_index( - &self, - wtxn: &mut RwTxn, - index: &str, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.index_tasks(wtxn, index)?; - f(&mut tasks); - if tasks.is_empty() { - self.index_tasks.delete(wtxn, index)?; - } else { - self.index_tasks.put(wtxn, index, &tasks)?; - } - - Ok(()) - } - - /// Returns the whole set of batches that belongs to this index. - pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result { - Ok(self.batch_index_tasks.get(rtxn, index)?.unwrap_or_default()) - } - - pub(crate) fn update_batch_index( - &self, - wtxn: &mut RwTxn, - index: &str, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut batches = self.index_batches(wtxn, index)?; - f(&mut batches); - if batches.is_empty() { - self.batch_index_tasks.delete(wtxn, index)?; - } else { - self.batch_index_tasks.put(wtxn, index, &batches)?; - } - - Ok(()) - } - - pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { - Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) - } - - pub(crate) fn put_status( - &self, - wtxn: &mut RwTxn, - status: Status, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.status.put(wtxn, &status, bitmap)?) - } - - pub(crate) fn update_status( - &self, - wtxn: &mut RwTxn, - status: Status, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_status(wtxn, status)?; - f(&mut tasks); - self.put_status(wtxn, status, &tasks)?; - - Ok(()) - } - - pub(crate) fn get_batch_status(&self, rtxn: &RoTxn, status: Status) -> Result { - Ok(self.batch_status.get(rtxn, &status)?.unwrap_or_default()) - } - - pub(crate) fn put_batch_status( - &self, - wtxn: &mut RwTxn, - status: Status, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.batch_status.put(wtxn, &status, bitmap)?) - } - - pub(crate) fn update_batch_status( - &self, - wtxn: &mut RwTxn, - status: Status, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_batch_status(wtxn, status)?; - f(&mut tasks); - self.put_batch_status(wtxn, status, &tasks)?; - - Ok(()) - } - - pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { - Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) - } - - pub(crate) fn put_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.kind.put(wtxn, &kind, bitmap)?) - } - - pub(crate) fn update_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_kind(wtxn, kind)?; - f(&mut tasks); - self.put_kind(wtxn, kind, &tasks)?; - - Ok(()) - } - - pub(crate) fn get_batch_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { - Ok(self.batch_kind.get(rtxn, &kind)?.unwrap_or_default()) - } - - pub(crate) fn put_batch_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - bitmap: &RoaringBitmap, - ) -> Result<()> { - Ok(self.batch_kind.put(wtxn, &kind, bitmap)?) - } - - pub(crate) fn update_batch_kind( - &self, - wtxn: &mut RwTxn, - kind: Kind, - f: impl Fn(&mut RoaringBitmap), - ) -> Result<()> { - let mut tasks = self.get_batch_kind(wtxn, kind)?; - f(&mut tasks); - self.put_batch_kind(wtxn, kind, &tasks)?; - - Ok(()) - } -} - pub(crate) fn insert_task_datetime( wtxn: &mut RwTxn, database: Database, @@ -651,11 +307,11 @@ pub fn clamp_to_page_size(size: usize) -> usize { } #[cfg(test)] -impl IndexScheduler { +impl crate::IndexScheduler { /// Asserts that the index scheduler's content is internally consistent. pub fn assert_internally_consistent(&self) { let rtxn = self.env.read_txn().unwrap(); - for task in self.all_tasks.iter(&rtxn).unwrap() { + for task in self.queue.tasks.all_tasks.iter(&rtxn).unwrap() { let (task_id, task) = task.unwrap(); let task_index_uid = task.index_uid().map(ToOwned::to_owned); @@ -674,6 +330,7 @@ impl IndexScheduler { assert_eq!(uid, task.uid); if let Some(ref batch) = batch_uid { assert!(self + .queue .batch_to_tasks_mapping .get(&rtxn, batch) .unwrap() @@ -682,17 +339,26 @@ impl IndexScheduler { } if let Some(task_index_uid) = &task_index_uid { assert!(self + .queue + .tasks .index_tasks .get(&rtxn, task_index_uid.as_str()) .unwrap() .unwrap() .contains(task.uid)); } - let db_enqueued_at = - self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap(); + let db_enqueued_at = self + .queue + .tasks + .enqueued_at + .get(&rtxn, &enqueued_at.unix_timestamp_nanos()) + .unwrap() + .unwrap(); assert!(db_enqueued_at.contains(task_id)); if let Some(started_at) = started_at { let db_started_at = self + .queue + .tasks .started_at .get(&rtxn, &started_at.unix_timestamp_nanos()) .unwrap() @@ -701,6 +367,8 @@ impl IndexScheduler { } if let Some(finished_at) = finished_at { let db_finished_at = self + .queue + .tasks .finished_at .get(&rtxn, &finished_at.unix_timestamp_nanos()) .unwrap() @@ -708,9 +376,11 @@ impl IndexScheduler { assert!(db_finished_at.contains(task_id)); } if let Some(canceled_by) = canceled_by { - let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap(); + let db_canceled_tasks = + self.queue.tasks.get_status(&rtxn, Status::Canceled).unwrap(); assert!(db_canceled_tasks.contains(uid)); - let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap(); + let db_canceling_task = + self.queue.tasks.get_task(&rtxn, canceled_by).unwrap().unwrap(); assert_eq!(db_canceling_task.status, Status::Succeeded); match db_canceling_task.kind { KindWithContent::TaskCancelation { query: _, tasks } => { @@ -770,7 +440,9 @@ impl IndexScheduler { Details::IndexInfo { primary_key: pk1 } => match &kind { KindWithContent::IndexCreation { index_uid, primary_key: pk2 } | KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => { - self.index_tasks + self.queue + .tasks + .index_tasks .get(&rtxn, index_uid.as_str()) .unwrap() .unwrap() @@ -878,23 +550,24 @@ impl IndexScheduler { } } - assert!(self.get_status(&rtxn, status).unwrap().contains(uid)); - assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid)); + assert!(self.queue.tasks.get_status(&rtxn, status).unwrap().contains(uid)); + assert!(self.queue.tasks.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid)); if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind { match status { Status::Enqueued | Status::Processing => { assert!(self - .file_store + .queue.file_store .all_uuids() .unwrap() .any(|uuid| uuid.as_ref().unwrap() == &content_file), "Could not find uuid `{content_file}` in the file_store. Available uuids are {:?}.", - self.file_store.all_uuids().unwrap().collect::, file_store::Error>>().unwrap(), + self.queue.file_store.all_uuids().unwrap().collect::, file_store::Error>>().unwrap(), ); } Status::Succeeded | Status::Failed | Status::Canceled => { assert!(self + .queue .file_store .all_uuids() .unwrap() diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index 9e6e45836..3ea8c06c6 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -307,7 +307,7 @@ fn open_or_create_database_unchecked( task_db_size: opt.max_task_db_size.as_u64() as usize, index_base_map_size: opt.max_index_size.as_u64() as usize, enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, - indexer_config: (&opt.indexer_options).try_into()?, + indexer_config: Arc::new((&opt.indexer_options).try_into()?), autobatching_enabled: true, cleanup_enabled: !opt.experimental_replication_parameters, max_number_of_tasks: 1_000_000, diff --git a/crates/meilisearch/src/routes/batches.rs b/crates/meilisearch/src/routes/batches.rs index 4d42cdd16..36bf31605 100644 --- a/crates/meilisearch/src/routes/batches.rs +++ b/crates/meilisearch/src/routes/batches.rs @@ -36,7 +36,7 @@ async fn get_batch( let query = index_scheduler::Query { batch_uids: Some(vec![batch_uid]), ..Query::default() }; let filters = index_scheduler.filters(); - let (batches, _) = index_scheduler.get_batches_from_authorized_indexes(query, filters)?; + let (batches, _) = index_scheduler.get_batches_from_authorized_indexes(&query, filters)?; if let Some(batch) = batches.first() { let task_view = BatchView::from_batch(batch); @@ -66,7 +66,7 @@ async fn get_batches( let query = params.into_query(); let filters = index_scheduler.filters(); - let (tasks, total) = index_scheduler.get_batches_from_authorized_indexes(query, filters)?; + let (tasks, total) = index_scheduler.get_batches_from_authorized_indexes(&query, filters)?; let mut results: Vec<_> = tasks.iter().map(BatchView::from_batch).collect(); // If we were able to fetch the number +1 tasks we asked diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs index 5f79000bd..3b9a89885 100644 --- a/crates/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -608,7 +608,7 @@ async fn document_addition( } }; - let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?; + let (uuid, mut update_file) = index_scheduler.queue.create_update_file(dry_run)?; let documents_count = match format { PayloadType::Ndjson => { let (path, file) = update_file.into_parts(); @@ -670,7 +670,7 @@ async fn document_addition( Err(e) => { // Here the file MAY have been persisted or not. // We don't know thus we ignore the file not found error. - match index_scheduler.delete_update_file(uuid) { + match index_scheduler.queue.delete_update_file(uuid) { Ok(()) => (), Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) if e.kind() == ErrorKind::NotFound => {} @@ -701,7 +701,7 @@ async fn document_addition( { Ok(task) => task, Err(e) => { - index_scheduler.delete_update_file(uuid)?; + index_scheduler.queue.delete_update_file(uuid)?; return Err(e.into()); } }; diff --git a/crates/meilisearch/src/routes/metrics.rs b/crates/meilisearch/src/routes/metrics.rs index 7dd9ee3bb..191beba8c 100644 --- a/crates/meilisearch/src/routes/metrics.rs +++ b/crates/meilisearch/src/routes/metrics.rs @@ -1,7 +1,3 @@ -use crate::extractors::authentication::policies::ActionPolicy; -use crate::extractors::authentication::{AuthenticationError, GuardedData}; -use crate::routes::create_all_stats; -use crate::search_queue::SearchQueue; use actix_web::http::header; use actix_web::web::{self, Data}; use actix_web::HttpResponse; @@ -13,6 +9,11 @@ use meilisearch_types::tasks::Status; use prometheus::{Encoder, TextEncoder}; use time::OffsetDateTime; +use crate::extractors::authentication::policies::ActionPolicy; +use crate::extractors::authentication::{AuthenticationError, GuardedData}; +use crate::routes::create_all_stats; +use crate::search_queue::SearchQueue; + pub fn configure(config: &mut web::ServiceConfig) { config.service(web::resource("").route(web::get().to(get_metrics))); } @@ -64,7 +65,7 @@ pub async fn get_metrics( let task_queue_latency_seconds = index_scheduler .get_tasks_from_authorized_indexes( - Query { + &Query { limit: Some(1), reverse: Some(true), statuses: Some(vec![Status::Enqueued, Status::Processing]), diff --git a/crates/meilisearch/src/routes/tasks.rs b/crates/meilisearch/src/routes/tasks.rs index cd82a6a18..71c45eb1d 100644 --- a/crates/meilisearch/src/routes/tasks.rs +++ b/crates/meilisearch/src/routes/tasks.rs @@ -260,11 +260,8 @@ async fn cancel_tasks( let query = params.into_query(); - let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes( - &index_scheduler.read_txn()?, - &query, - index_scheduler.filters(), - )?; + let (tasks, _) = + index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?; let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; @@ -312,11 +309,8 @@ async fn delete_tasks( let query = params.into_query(); - let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes( - &index_scheduler.read_txn()?, - &query, - index_scheduler.filters(), - )?; + let (tasks, _) = + index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?; let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; @@ -349,7 +343,7 @@ async fn get_tasks( let query = params.into_query(); let filters = index_scheduler.filters(); - let (tasks, total) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?; + let (tasks, total) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?; let mut results: Vec<_> = tasks.iter().map(TaskView::from_task).collect(); // If we were able to fetch the number +1 tasks we asked @@ -377,7 +371,7 @@ async fn get_task( let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() }; let filters = index_scheduler.filters(); - let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?; + let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?; if let Some(task) = tasks.first() { let task_view = TaskView::from_task(task);