diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index c013cd7b6..d823f278e 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -819,7 +819,7 @@ impl IndexScheduler { /// Delete each given task from all the databases (if it is deleteable). /// - /// Return the number of tasks that were actually deleted + /// Return the number of tasks that were actually deleted. fn delete_matched_tasks( &self, wtxn: &mut RwTxn, @@ -830,11 +830,9 @@ impl IndexScheduler { let processing_tasks = &self.processing_tasks.read().unwrap().1; - // TODO: Lo: Take the intersection of `matched_tasks` and `all_tasks` first, - // so that we end up with the correct count for `deleted_tasks` (the value returned - // by this function). Related snapshot test: - // `task_deletion_delete_same_task_twice/task_deletion_processed.snap` - let mut to_delete_tasks = matched_tasks - processing_tasks; + let all_task_ids = self.all_task_ids(&wtxn)?; + let mut to_delete_tasks = all_task_ids & matched_tasks; + to_delete_tasks -= processing_tasks; to_delete_tasks -= enqueued_tasks; // 2. We now have a list of tasks to delete, delete them @@ -844,6 +842,8 @@ impl IndexScheduler { let mut affected_kinds = HashSet::new(); for task_id in to_delete_tasks.iter() { + // This should never fail, but there is no harm done if it does. The function + // will still be 99% correct (the number of deleted tasks will be slightly incorrect). if let Some(task) = self.get_task(wtxn, task_id)? { if let Some(task_indexes) = task.indexes() { affected_indexes.extend(task_indexes.into_iter().map(|x| x.to_owned())); diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 24ea364e7..7a46e90a5 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -262,18 +262,7 @@ impl IndexScheduler { let rtxn = self.env.read_txn()?; // This is the list of all the tasks. - let mut tasks = { - let mut all_tasks = RoaringBitmap::new(); - for status in [ - Status::Enqueued, - Status::Processing, - Status::Succeeded, - Status::Failed, - ] { - all_tasks |= self.get_status(&rtxn, status)?; - } - all_tasks - }; + let mut tasks = self.all_task_ids(&rtxn)?; if let Some(uids) = &query.uid { tasks &= RoaringBitmap::from_iter(uids); @@ -827,8 +816,6 @@ mod tests { for _ in 0..2 { handle.wait_till(Breakpoint::AfterProcessing); } - // The three deletion tasks are marked as succeeded, and all their details say that one - // task has been deleted. Is this the correct behaviour? Probably not! snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); } diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap index 90d2b0c8f..96a80c3a9 100644 --- a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap @@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs ### All Tasks: 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: 0 }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} -3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} ---------------------------------------------------------------------- ### Status: enqueued [1,] diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index 5a39cc378..3183b6bc8 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -8,6 +8,18 @@ use crate::{Error, IndexScheduler, Result, Task, TaskId}; use meilisearch_types::tasks::{Kind, Status}; impl IndexScheduler { + pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result { + let mut all_tasks = RoaringBitmap::new(); + for status in [ + Status::Enqueued, + Status::Processing, + Status::Succeeded, + Status::Failed, + ] { + all_tasks |= self.get_status(&rtxn, status)?; + } + Ok(all_tasks) + } pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { Ok(self .all_tasks