Fix number of deleted tasks details after duplicate task deletion

This commit is contained in:
Loïc Lecrenier 2022-10-17 12:58:20 +02:00 committed by Clément Renault
parent 8defad6c38
commit 8954b1bd1d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 20 additions and 21 deletions

View File

@ -819,7 +819,7 @@ impl IndexScheduler {
/// Delete each given task from all the databases (if it is deleteable). /// Delete each given task from all the databases (if it is deleteable).
/// ///
/// Return the number of tasks that were actually deleted /// Return the number of tasks that were actually deleted.
fn delete_matched_tasks( fn delete_matched_tasks(
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
@ -830,11 +830,9 @@ impl IndexScheduler {
let processing_tasks = &self.processing_tasks.read().unwrap().1; let processing_tasks = &self.processing_tasks.read().unwrap().1;
// TODO: Lo: Take the intersection of `matched_tasks` and `all_tasks` first, let all_task_ids = self.all_task_ids(&wtxn)?;
// so that we end up with the correct count for `deleted_tasks` (the value returned let mut to_delete_tasks = all_task_ids & matched_tasks;
// by this function). Related snapshot test: to_delete_tasks -= processing_tasks;
// `task_deletion_delete_same_task_twice/task_deletion_processed.snap`
let mut to_delete_tasks = matched_tasks - processing_tasks;
to_delete_tasks -= enqueued_tasks; to_delete_tasks -= enqueued_tasks;
// 2. We now have a list of tasks to delete, delete them // 2. We now have a list of tasks to delete, delete them
@ -844,6 +842,8 @@ impl IndexScheduler {
let mut affected_kinds = HashSet::new(); let mut affected_kinds = HashSet::new();
for task_id in to_delete_tasks.iter() { for task_id in to_delete_tasks.iter() {
// This should never fail, but there is no harm done if it does. The function
// will still be 99% correct (the number of deleted tasks will be slightly incorrect).
if let Some(task) = self.get_task(wtxn, task_id)? { if let Some(task) = self.get_task(wtxn, task_id)? {
if let Some(task_indexes) = task.indexes() { if let Some(task_indexes) = task.indexes() {
affected_indexes.extend(task_indexes.into_iter().map(|x| x.to_owned())); affected_indexes.extend(task_indexes.into_iter().map(|x| x.to_owned()));

View File

@ -262,18 +262,7 @@ impl IndexScheduler {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
// This is the list of all the tasks. // This is the list of all the tasks.
let mut tasks = { let mut tasks = self.all_task_ids(&rtxn)?;
let mut all_tasks = RoaringBitmap::new();
for status in [
Status::Enqueued,
Status::Processing,
Status::Succeeded,
Status::Failed,
] {
all_tasks |= self.get_status(&rtxn, status)?;
}
all_tasks
};
if let Some(uids) = &query.uid { if let Some(uids) = &query.uid {
tasks &= RoaringBitmap::from_iter(uids); tasks &= RoaringBitmap::from_iter(uids);
@ -827,8 +816,6 @@ mod tests {
for _ in 0..2 { for _ in 0..2 {
handle.wait_till(Breakpoint::AfterProcessing); handle.wait_till(Breakpoint::AfterProcessing);
} }
// The three deletion tasks are marked as succeeded, and all their details say that one
// task has been deleted. Is this the correct behaviour? Probably not!
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed");
} }

View File

@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
### All Tasks: ### All Tasks:
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: 0 }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: 0 }, kind: DocumentImport { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} 2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} 3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_query: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [1,] enqueued [1,]

View File

@ -8,6 +8,18 @@ use crate::{Error, IndexScheduler, Result, Task, TaskId};
use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::tasks::{Kind, Status};
impl IndexScheduler { impl IndexScheduler {
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
let mut all_tasks = RoaringBitmap::new();
for status in [
Status::Enqueued,
Status::Processing,
Status::Succeeded,
Status::Failed,
] {
all_tasks |= self.get_status(&rtxn, status)?;
}
Ok(all_tasks)
}
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> { pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self Ok(self
.all_tasks .all_tasks