write a test ensuring the index-scheduler is effectively down when the upgrade task fail and try to process it when it restarts. There is a bug when deleting this task

2025-07-04 04:17:10 +02:00 · 2025-01-21 15:00:48 +01:00 · 2025-01-21 15:00:48 +01:00 · 5458850d21
commit 5458850d21
parent 20ac59c946
11 changed files with 550 additions and 17 deletions
--- a/crates/index-scheduler/src/test_utils.rs
+++ b/crates/index-scheduler/src/test_utils.rs
@ -1,10 +1,18 @@
 use std::io::{BufWriter, Write};
 use std::sync::Arc;
+use std::time::Duration;

+use big_s::S;
+use crossbeam_channel::RecvTimeoutError;
 use file_store::File;
 use meilisearch_types::document_formats::DocumentFormatError;
 use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
+use meilisearch_types::milli::update::IndexerConfig;
+use meilisearch_types::tasks::KindWithContent;
+use meilisearch_types::VERSION_FILE_NAME;
+use tempfile::{NamedTempFile, TempDir};
 use uuid::Uuid;
+use Breakpoint::*;

 use crate::insta_snapshot::snapshot_index_scheduler;
 use crate::{Error, IndexScheduler, IndexSchedulerOptions};
@ -28,20 +36,13 @@ pub(crate) enum FailureLocation {
    InsideCreateBatch,
    InsideProcessBatch,
    PanicInsideProcessBatch,
+    ProcessUpgrade,
    AcquiringWtxn,
    UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },
    UpdatingTaskAfterProcessBatchFailure,
    CommittingWtxn,
 }

-use big_s::S;
-use crossbeam_channel::RecvTimeoutError;
-use meilisearch_types::milli::update::IndexerConfig;
-use meilisearch_types::tasks::KindWithContent;
-use meilisearch_types::VERSION_FILE_NAME;
-use tempfile::{NamedTempFile, TempDir};
-use Breakpoint::*;
-
 impl IndexScheduler {
    /// Blocks the thread until the test handle asks to progress to/through this breakpoint.
    ///
@ -55,7 +56,6 @@ impl IndexScheduler {
    /// As soon as we find it, the index scheduler is unblocked but then wait again on the call to
    /// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the
    /// test asks to progress to the next `(b2, false)`.
-    #[cfg(test)]
    pub(crate) fn breakpoint(&self, b: Breakpoint) {
        // We send two messages. The first one will sync with the call
        // to `handle.wait_until(b)`. The second one will block until the
@ -225,6 +225,46 @@ pub struct IndexSchedulerHandle {
 }

 impl IndexSchedulerHandle {
+    /// Restarts the index-scheduler on the same database.
+    /// To use this function you must give back the index-scheduler that was given to you when
+    /// creating the handle the first time.
+    /// If the index-scheduler has been cloned in the meantime you must drop all copy otherwise
+    /// the function will panic.
+    pub(crate) fn restart(
+        self,
+        index_scheduler: IndexScheduler,
+        autobatching_enabled: bool,
+        planned_failures: Vec<(usize, FailureLocation)>,
+    ) -> (IndexScheduler, Self) {
+        drop(index_scheduler);
+        let Self { _tempdir: tempdir, index_scheduler, test_breakpoint_rcv, last_breakpoint: _ } =
+            self;
+        drop(index_scheduler);
+
+        // We must ensure that the `run` function has stopped running before restarting the index scheduler
+        loop {
+            match test_breakpoint_rcv.recv_timeout(Duration::from_secs(5)) {
+                Ok(_) => continue,
+                Err(RecvTimeoutError::Timeout) => panic!("The indexing loop is stuck somewhere"),
+                Err(RecvTimeoutError::Disconnected) => break,
+            }
+        }
+
+        let (scheduler, mut handle) =
+            IndexScheduler::test_with_custom_config(planned_failures, |config| {
+                config.autobatching_enabled = autobatching_enabled;
+                config.version_file_path = tempdir.path().join(VERSION_FILE_NAME);
+                config.auth_path = tempdir.path().join("auth");
+                config.tasks_path = tempdir.path().join("db_path");
+                config.update_file_path = tempdir.path().join("file_store");
+                config.indexes_path = tempdir.path().join("indexes");
+                config.snapshots_path = tempdir.path().join("snapshots");
+                config.dumps_path = tempdir.path().join("dumps");
+            });
+        handle._tempdir = tempdir;
+        (scheduler, handle)
+    }
+
    /// Advance the scheduler to the next tick.
    /// Panic
    /// * If the scheduler is waiting for a task to be registered.
@ -350,4 +390,18 @@ impl IndexSchedulerHandle {
        }
        self.advance_till([AfterProcessing]);
    }
+
+    // Wait for one failed batch.
+    #[track_caller]
+    pub(crate) fn scheduler_is_down(&mut self) {
+        loop {
+            match self
+            .test_breakpoint_rcv
+            .recv_timeout(std::time::Duration::from_secs(1)) {
+                Ok((_, true)) => continue,
+                Ok((b, false)) => panic!("The scheduler was supposed to be down but successfully moved to the next breakpoint: {b:?}"),
+                Err(RecvTimeoutError::Timeout | RecvTimeoutError::Disconnected) => break,
+            }
+        }
+    }
 }