start integrating the index-scheduler in meilisearch-lib

2025-07-03 03:47:02 +02:00 · 2022-09-21 17:13:09 +02:00 · 2022-09-21 17:13:09 +02:00 · 250410495c
commit 250410495c
parent 8f0fd35358
7 changed files with 163 additions and 146 deletions
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@ -1,6 +1,8 @@
 use milli::heed;
 use thiserror::Error;

+use crate::TaskId;
+
 #[derive(Error, Debug)]
 pub enum Error {
    #[error("Index `{0}` not found")]
@ -9,6 +11,8 @@ pub enum Error {
    IndexAlreadyExists(String),
    #[error("Corrupted task queue.")]
    CorruptedTaskQueue,
+    #[error("Task `{0}` not found")]
+    TaskNotFound(TaskId),
    #[error(transparent)]
    Heed(#[from] heed::Error),
    #[error(transparent)]
--- a/index-scheduler/src/index_mapper.rs
+++ b/index-scheduler/src/index_mapper.rs
@ -107,6 +107,16 @@ impl IndexMapper {
        Ok(index)
    }

+    pub fn indexes(&self, rtxn: &RoTxn) -> Result<Vec<Index>> {
+        self.index_mapping
+            .iter(&rtxn)?
+            .map(|ret| {
+                ret.map_err(Error::from)
+                    .and_then(|(name, _)| self.index(rtxn, name))
+            })
+            .collect()
+    }
+
    /// Swap two index name.
    pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> {
        let lhs_uuid = self
--- a/index-scheduler/src/index_scheduler.rs
+++ b/index-scheduler/src/index_scheduler.rs
@ -1,6 +1,6 @@
 use crate::index_mapper::IndexMapper;
 use crate::task::{Kind, KindWithContent, Status, Task, TaskView};
-use crate::{Error, Result};
+use crate::{Error, Result, TaskId};
 use file_store::FileStore;
 use index::Index;
 use milli::update::IndexerConfig;
@ -20,7 +20,7 @@ use serde::Deserialize;

 const DEFAULT_LIMIT: fn() -> u32 = || 20;

-#[derive(Debug, Clone, Deserialize)]
+#[derive(derive_builder::Builder, Debug, Clone, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct Query {
    #[serde(default = "DEFAULT_LIMIT")]
@ -32,6 +32,38 @@ pub struct Query {
    index_uid: Option<Vec<String>>,
 }

+impl Default for Query {
+    fn default() -> Self {
+        Self {
+            limit: DEFAULT_LIMIT(),
+            from: None,
+            status: None,
+            kind: None,
+            index_uid: None,
+        }
+    }
+}
+
+impl Query {
+    pub fn with_status(self, status: Status) -> Self {
+        let mut status_vec = self.status.unwrap_or_default();
+        status_vec.push(status);
+        Self {
+            status: Some(status_vec),
+            ..self
+        }
+    }
+
+    pub fn with_kind(self, kind: Kind) -> Self {
+        let mut kind_vec = self.kind.unwrap_or_default();
+        kind_vec.push(kind);
+        Self {
+            kind: Some(kind_vec),
+            ..self
+        }
+    }
+}
+
 pub mod db_name {
    pub const ALL_TASKS: &str = "all-tasks";
    pub const STATUS: &str = "status";
@ -73,20 +105,20 @@ pub struct IndexScheduler {

 impl IndexScheduler {
    pub fn new(
-        db_path: PathBuf,
+        tasks_path: PathBuf,
        update_file_path: PathBuf,
        indexes_path: PathBuf,
        index_size: usize,
        indexer_config: IndexerConfig,
    ) -> Result<Self> {
-        std::fs::create_dir_all(&db_path)?;
+        std::fs::create_dir_all(&tasks_path)?;
        std::fs::create_dir_all(&update_file_path)?;
        std::fs::create_dir_all(&indexes_path)?;

        let mut options = heed::EnvOpenOptions::new();
        options.max_dbs(6);

-        let env = options.open(db_path)?;
+        let env = options.open(tasks_path)?;
        // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
        let wake_up = SignalEvent::auto(true);

@ -115,6 +147,12 @@ impl IndexScheduler {
        self.index_mapper.index(&rtxn, name)
    }

+    /// Return and open all the indexes.
+    pub fn indexes(&self) -> Result<Vec<Index>> {
+        let rtxn = self.env.read_txn()?;
+        self.index_mapper.indexes(&rtxn)
+    }
+
    /// Returns the tasks corresponding to the query.
    pub fn get_tasks(&self, query: Query) -> Result<Vec<TaskView>> {
        let rtxn = self.env.read_txn()?;
@ -155,6 +193,15 @@ impl IndexScheduler {
        Ok(tasks.into_iter().map(|task| task.as_task_view()).collect())
    }

+    /// Returns the tasks corresponding to the query.
+    pub fn task(&self, uid: TaskId) -> Result<TaskView> {
+        let rtxn = self.env.read_txn()?;
+        self.get_task(&rtxn, uid).and_then(|opt| {
+            opt.ok_or(Error::TaskNotFound(uid))
+                .map(|task| task.as_task_view())
+        })
+    }
+
    /// Register a new task in the scheduler. If it fails and data was associated with the task
    /// it tries to delete the file.
    pub fn register(&self, task: KindWithContent) -> Result<TaskView> {
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -9,12 +9,12 @@ mod utils;
 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

-pub use crate::index_scheduler::IndexScheduler;
+pub use crate::index_scheduler::{IndexScheduler, Query};
 pub use error::Error;
 /// from the exterior you don't need to know there is multiple type of `Kind`
-pub use task::KindWithContent as TaskKind;
+pub use task::KindWithContent;
 /// from the exterior you don't need to know there is multiple type of `Task`
-pub use task::TaskView as Task;
+pub use task::TaskView;

 #[cfg(test)]
 mod tests {