start integrating the index-scheduler in the meilisearch codebase

2025-07-03 03:47:02 +02:00 · 2022-09-14 16:16:53 +02:00 · 2022-09-14 16:16:53 +02:00 · f84ced7e38
commit f84ced7e38
parent 9882b7fa57
30 changed files with 682 additions and 3622 deletions
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -1,6 +1,6 @@
 use crate::{
    autobatcher::BatchKind,
-    task::{KindWithContent, Status},
+    task::{Kind, KindWithContent, Status, Task},
    Error, IndexScheduler, Result,
 };
 use index::{Settings, Unchecked};
@ -10,8 +10,6 @@ use milli::{
 };
 use uuid::Uuid;

-use crate::{task::Kind, Task};
-
 pub(crate) enum Batch {
    Cancel(Task),
    Snapshot(Vec<Task>),
@ -230,8 +228,8 @@ impl IndexScheduler {

                for (ret, mut task) in ret.iter().zip(document_addition_tasks.into_iter()) {
                    match ret {
-                        Ok(ret) => task.info = Some(format!("{:?}", ret)),
-                        Err(err) => task.error = Some(err.to_string()),
+                        Ok(ret) => todo!(),  // task.info = Some(format!("{:?}", ret)),
+                        Err(err) => todo!(), // task.error = Some(err.to_string()),
                    }
                    updated_tasks.push(task);
                }
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@ -13,9 +13,11 @@ pub enum Error {
    Heed(#[from] heed::Error),
    #[error(transparent)]
    Milli(#[from] milli::Error),
-    #[error("{0}")]
+    #[error(transparent)]
    IndexError(#[from] index::error::IndexError),
    #[error(transparent)]
+    FileStore(#[from] file_store::Error),
+    #[error(transparent)]
    IoError(#[from] std::io::Error),

    #[error(transparent)]
--- a/index-scheduler/src/index_mapper.rs
+++ b/index-scheduler/src/index_mapper.rs
@ -8,11 +8,13 @@ use index::Index;
 use milli::heed::types::SerdeBincode;
 use milli::heed::types::Str;
 use milli::heed::Database;
+use milli::heed::Env;
 use milli::heed::RoTxn;
 use milli::heed::RwTxn;
 use milli::update::IndexerConfig;
 use uuid::Uuid;

+use crate::index_scheduler::db_name;
 use crate::Error;
 use crate::Result;

@ -31,9 +33,24 @@ pub struct IndexMapper {
 }

 impl IndexMapper {
+    pub fn new(
+        env: &Env,
+        base_path: PathBuf,
+        index_size: usize,
+        indexer_config: IndexerConfig,
+    ) -> Result<Self> {
+        Ok(Self {
+            index_map: Arc::default(),
+            index_mapping: env.create_database(Some(db_name::INDEX_MAPPING))?,
+            base_path,
+            index_size,
+            indexer_config: Arc::new(indexer_config),
+        })
+    }
+
    /// Get or create the index.
-    pub fn create_index(&self, rwtxn: &mut RwTxn, name: &str) -> Result<Index> {
-        let index = match self.index(rwtxn, name) {
+    pub fn create_index(&self, wtxn: &mut RwTxn, name: &str) -> Result<Index> {
+        let index = match self.index(wtxn, name) {
            Ok(index) => index,
            Err(Error::IndexNotFound(_)) => {
                let uuid = Uuid::new_v4();
--- a/index-scheduler/src/index_scheduler.rs
+++ b/index-scheduler/src/index_scheduler.rs
@ -0,0 +1,435 @@
+use crate::index_mapper::IndexMapper;
+use crate::task::{Kind, KindWithContent, Status, Task, TaskView};
+use crate::Result;
+use file_store::FileStore;
+use index::Index;
+use milli::update::IndexerConfig;
+use synchronoise::SignalEvent;
+
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::sync::RwLock;
+
+use milli::heed::types::{OwnedType, SerdeBincode, Str};
+use milli::heed::{self, Database, Env};
+
+use milli::{RoaringBitmapCodec, BEU32};
+use roaring::RoaringBitmap;
+use serde::Deserialize;
+
+const DEFAULT_LIMIT: fn() -> u32 = || 20;
+
+#[derive(Debug, Clone, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Query {
+    #[serde(default = "DEFAULT_LIMIT")]
+    limit: u32,
+    from: Option<u32>,
+    status: Option<Vec<Status>>,
+    #[serde(rename = "type")]
+    kind: Option<Vec<Kind>>,
+    index_uid: Option<Vec<String>>,
+}
+
+pub mod db_name {
+    pub const ALL_TASKS: &str = "all-tasks";
+    pub const STATUS: &str = "status";
+    pub const KIND: &str = "kind";
+    pub const INDEX_TASKS: &str = "index-tasks";
+
+    pub const INDEX_MAPPING: &str = "index-mapping";
+}
+
+/// This module is responsible for two things;
+/// 1. Resolve the name of the indexes.
+/// 2. Schedule the tasks.
+#[derive(Clone)]
+pub struct IndexScheduler {
+    /// The list of tasks currently processing.
+    pub(crate) processing_tasks: Arc<RwLock<RoaringBitmap>>,
+
+    pub(crate) file_store: FileStore,
+
+    /// The LMDB environment which the DBs are associated with.
+    pub(crate) env: Env,
+
+    // The main database, it contains all the tasks accessible by their Id.
+    pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeBincode<Task>>,
+
+    /// All the tasks ids grouped by their status.
+    pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
+    /// All the tasks ids grouped by their kind.
+    pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
+    /// Store the tasks associated to an index.
+    pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
+
+    /// In charge of creating, opening, storing and returning indexes.
+    pub(crate) index_mapper: IndexMapper,
+
+    // set to true when there is work to do.
+    pub(crate) wake_up: Arc<SignalEvent>,
+}
+
+impl IndexScheduler {
+    pub fn new(
+        db_path: PathBuf,
+        update_file_path: PathBuf,
+        indexes_path: PathBuf,
+        index_size: usize,
+        indexer_config: IndexerConfig,
+    ) -> Result<Self> {
+        std::fs::create_dir_all(&db_path)?;
+        std::fs::create_dir_all(&update_file_path)?;
+        std::fs::create_dir_all(&indexes_path)?;
+
+        let mut options = heed::EnvOpenOptions::new();
+        options.max_dbs(6);
+
+        let env = options.open(db_path)?;
+        // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
+        let wake_up = SignalEvent::auto(true);
+
+        Ok(Self {
+            // by default there is no processing tasks
+            processing_tasks: Arc::default(),
+            file_store: FileStore::new(update_file_path)?,
+            all_tasks: env.create_database(Some(db_name::ALL_TASKS))?,
+            status: env.create_database(Some(db_name::STATUS))?,
+            kind: env.create_database(Some(db_name::KIND))?,
+            index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?,
+            index_mapper: IndexMapper::new(&env, indexes_path, index_size, indexer_config)?,
+            env,
+            wake_up: Arc::new(wake_up),
+        })
+    }
+
+    /// Return the index corresponding to the name. If it wasn't opened before
+    /// it'll be opened. But if it doesn't exist on disk it'll throw an
+    /// `IndexNotFound` error.
+    pub fn index(&self, name: &str) -> Result<Index> {
+        let rtxn = self.env.read_txn()?;
+        self.index_mapper.index(&rtxn, name)
+    }
+
+    /// Returns the tasks corresponding to the query.
+    pub fn get_tasks(&self, query: Query) -> Result<Vec<TaskView>> {
+        let rtxn = self.env.read_txn()?;
+        let last_task_id = match self.last_task_id(&rtxn)? {
+            Some(tid) => query.from.map(|from| from.min(tid)).unwrap_or(tid),
+            None => return Ok(Vec::new()),
+        };
+
+        // This is the list of all the tasks.
+        let mut tasks = RoaringBitmap::from_iter(0..last_task_id);
+
+        if let Some(status) = query.status {
+            let mut status_tasks = RoaringBitmap::new();
+            for status in status {
+                status_tasks |= self.get_status(&rtxn, status)?;
+            }
+            tasks &= status_tasks;
+        }
+
+        if let Some(kind) = query.kind {
+            let mut kind_tasks = RoaringBitmap::new();
+            for kind in kind {
+                kind_tasks |= self.get_kind(&rtxn, kind)?;
+            }
+            tasks &= kind_tasks;
+        }
+
+        if let Some(index) = query.index_uid {
+            let mut index_tasks = RoaringBitmap::new();
+            for index in index {
+                index_tasks |= self.get_index(&rtxn, &index)?;
+            }
+            tasks &= index_tasks;
+        }
+
+        let tasks =
+            self.get_existing_tasks(&rtxn, tasks.into_iter().rev().take(query.limit as usize))?;
+        Ok(tasks.into_iter().map(|task| task.as_task_view()).collect())
+    }
+
+    /// Register a new task in the scheduler. If it fails and data was associated with the task
+    /// it tries to delete the file.
+    pub fn register(&self, task: KindWithContent) -> Result<TaskView> {
+        let mut wtxn = self.env.write_txn()?;
+
+        let task = Task {
+            uid: self.next_task_id(&wtxn)?,
+            enqueued_at: time::OffsetDateTime::now_utc(),
+            started_at: None,
+            finished_at: None,
+            error: None,
+            details: None,
+            status: Status::Enqueued,
+            kind: task,
+        };
+
+        self.all_tasks
+            .append(&mut wtxn, &BEU32::new(task.uid), &task)?;
+
+        if let Some(indexes) = task.indexes() {
+            for index in indexes {
+                self.update_index(&mut wtxn, index, |bitmap| drop(bitmap.insert(task.uid)))?;
+            }
+        }
+
+        self.update_status(&mut wtxn, Status::Enqueued, |bitmap| {
+            bitmap.insert(task.uid);
+        })?;
+
+        self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
+            (bitmap.insert(task.uid));
+        })?;
+
+        // we persist the file in last to be sure everything before was applied successfuly
+        task.persist()?;
+
+        match wtxn.commit() {
+            Ok(()) => (),
+            e @ Err(_) => {
+                task.remove_data()?;
+                e?;
+            }
+        }
+
+        self.notify();
+
+        Ok(task.as_task_view())
+    }
+
+    /// This worker function must be run in a different thread and must be run only once.
+    fn run(&self) {
+        loop {
+            self.wake_up.wait();
+
+            let mut wtxn = match self.env.write_txn() {
+                Ok(wtxn) => wtxn,
+                Err(e) => {
+                    log::error!("{}", e);
+                    continue;
+                }
+            };
+            let batch = match self.create_next_batch(&wtxn) {
+                Ok(Some(batch)) => batch,
+                Ok(None) => continue,
+                Err(e) => {
+                    log::error!("{}", e);
+                    continue;
+                }
+            };
+            // 1. store the starting date with the bitmap of processing tasks
+            // 2. update the tasks with a starting date *but* do not write anything on disk
+
+            // 3. process the tasks
+            let _res = self.process_batch(&mut wtxn, batch);
+
+            // 4. store the updated tasks on disk
+
+            // TODO: TAMO: do this later
+            // must delete the file on disk
+            // in case of error, must update the tasks with the error
+            // in case of « success » we must update all the task on disk
+            // self.handle_batch_result(res);
+
+            match wtxn.commit() {
+                Ok(()) => log::info!("A batch of tasks was successfully completed."),
+                Err(e) => {
+                    log::error!("{}", e);
+                    continue;
+                }
+            }
+        }
+    }
+
+    #[cfg(truc)]
+    fn process_batch(&self, wtxn: &mut RwTxn, batch: &mut Batch) -> Result<()> {
+        match batch {
+            Batch::One(task) => match &task.kind {
+                KindWithContent::ClearAllDocuments { index_name } => {
+                    self.index(&index_name)?.clear_documents()?;
+                }
+                KindWithContent::RenameIndex {
+                    index_name: _,
+                    new_name,
+                } => {
+                    if self.available_index.get(wtxn, &new_name)?.unwrap_or(false) {
+                        return Err(Error::IndexAlreadyExists(new_name.to_string()));
+                    }
+                    todo!("wait for @guigui insight");
+                }
+                KindWithContent::CreateIndex {
+                    index_name,
+                    primary_key,
+                } => {
+                    if self
+                        .available_index
+                        .get(wtxn, &index_name)?
+                        .unwrap_or(false)
+                    {
+                        return Err(Error::IndexAlreadyExists(index_name.to_string()));
+                    }
+
+                    self.available_index.put(wtxn, &index_name, &true)?;
+                    // TODO: TAMO: give real info to the index
+                    let index = Index::open(
+                        index_name.to_string(),
+                        index_name.to_string(),
+                        100_000_000,
+                        Arc::default(),
+                    )?;
+                    if let Some(primary_key) = primary_key {
+                        index.update_primary_key(primary_key.to_string())?;
+                    }
+                    self.index_map
+                        .write()
+                        .map_err(|_| Error::CorruptedTaskQueue)?
+                        .insert(index_name.to_string(), index.clone());
+                }
+                KindWithContent::DeleteIndex { index_name } => {
+                    if !self.available_index.delete(wtxn, &index_name)? {
+                        return Err(Error::IndexNotFound(index_name.to_string()));
+                    }
+                    if let Some(index) = self
+                        .index_map
+                        .write()
+                        .map_err(|_| Error::CorruptedTaskQueue)?
+                        .remove(index_name)
+                    {
+                        index.delete()?;
+                    } else {
+                        // TODO: TAMO: fix the path
+                        std::fs::remove_file(index_name)?;
+                    }
+                }
+                KindWithContent::SwapIndex { lhs, rhs } => {
+                    if !self.available_index.get(wtxn, &lhs)?.unwrap_or(false) {
+                        return Err(Error::IndexNotFound(lhs.to_string()));
+                    }
+                    if !self.available_index.get(wtxn, &rhs)?.unwrap_or(false) {
+                        return Err(Error::IndexNotFound(rhs.to_string()));
+                    }
+
+                    let lhs_bitmap = self.index_tasks.get(wtxn, lhs)?;
+                    let rhs_bitmap = self.index_tasks.get(wtxn, rhs)?;
+                    // the bitmap are lazily created and thus may not exists.
+                    if let Some(bitmap) = rhs_bitmap {
+                        self.index_tasks.put(wtxn, lhs, &bitmap)?;
+                    }
+                    if let Some(bitmap) = lhs_bitmap {
+                        self.index_tasks.put(wtxn, rhs, &bitmap)?;
+                    }
+
+                    let mut index_map = self
+                        .index_map
+                        .write()
+                        .map_err(|_| Error::CorruptedTaskQueue)?;
+
+                    let lhs_index = index_map.remove(lhs).unwrap();
+                    let rhs_index = index_map.remove(rhs).unwrap();
+
+                    index_map.insert(lhs.to_string(), rhs_index);
+                    index_map.insert(rhs.to_string(), lhs_index);
+                }
+                _ => unreachable!(),
+            },
+            Batch::Cancel(_) => todo!(),
+            Batch::Snapshot(_) => todo!(),
+            Batch::Dump(_) => todo!(),
+            Batch::Contiguous { tasks, kind } => {
+                // it's safe because you can't batch 0 contiguous tasks.
+                let first_task = &tasks[0];
+                // and the two kind of tasks we batch MUST have ONE index name.
+                let index_name = first_task.indexes().unwrap()[0];
+                let index = self.index(index_name)?;
+
+                match kind {
+                    Kind::DocumentAddition => {
+                        let content_files = tasks.iter().map(|task| match &task.kind {
+                            KindWithContent::DocumentAddition { content_file, .. } => {
+                                content_file.clone()
+                            }
+                            k => unreachable!(
+                                "Internal error, `{:?}` is not supposed to be reachable here",
+                                k.as_kind()
+                            ),
+                        });
+                        let results = index.update_documents(
+                            IndexDocumentsMethod::UpdateDocuments,
+                            None,
+                            self.file_store.clone(),
+                            content_files,
+                        )?;
+
+                        for (task, result) in tasks.iter_mut().zip(results) {
+                            task.finished_at = Some(OffsetDateTime::now_utc());
+                            match result {
+                                Ok(_) => task.status = Status::Succeeded,
+                                Err(_) => task.status = Status::Succeeded,
+                            }
+                        }
+                    }
+                    Kind::DocumentDeletion => {
+                        let ids: Vec<_> = tasks
+                            .iter()
+                            .flat_map(|task| match &task.kind {
+                                KindWithContent::DocumentDeletion { documents_ids, .. } => {
+                                    documents_ids.clone()
+                                }
+                                k => unreachable!(
+                                    "Internal error, `{:?}` is not supposed to be reachable here",
+                                    k.as_kind()
+                                ),
+                            })
+                            .collect();
+
+                        let result = index.delete_documents(&ids);
+
+                        for task in tasks.iter_mut() {
+                            task.finished_at = Some(OffsetDateTime::now_utc());
+                            match result {
+                                Ok(_) => task.status = Status::Succeeded,
+                                Err(_) => task.status = Status::Succeeded,
+                            }
+                        }
+                    }
+                    _ => unreachable!(),
+                }
+            }
+            Batch::Empty => todo!(),
+        }
+
+        Ok(())
+    }
+
+    /// Notify the scheduler there is or may be work to do.
+    pub fn notify(&self) {
+        self.wake_up.signal()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use tempfile::TempDir;
+
+    use super::*;
+
+    fn new() -> IndexScheduler {
+        let dir = TempDir::new().unwrap();
+        IndexScheduler::new(
+            dir.path().join("db_path"),
+            dir.path().join("file_store"),
+            dir.path().join("indexes"),
+            100_000_000,
+            IndexerConfig::default(),
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn simple_new() {
+        new();
+    }
+}
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -2,380 +2,16 @@ mod autobatcher;
 mod batch;
 pub mod error;
 mod index_mapper;
+mod index_scheduler;
 pub mod task;
 mod utils;

-
-pub use error::Error;
-use file_store::FileStore;
-use index::Index;
-use index_mapper::IndexMapper;
-use synchronoise::SignalEvent;
-pub use task::Task;
-use task::{Kind, Status};
-
-
-
-
-
-use std::sync::Arc;
-use std::{sync::RwLock};
-
-use milli::heed::types::{OwnedType, SerdeBincode, Str};
-use milli::heed::{Database, Env};
-
-use milli::{RoaringBitmapCodec, BEU32};
-use roaring::RoaringBitmap;
-use serde::Deserialize;
-
 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;
-type IndexName = String;
-type IndexUuid = String;

-const DEFAULT_LIMIT: fn() -> u32 = || 20;
-
-#[derive(Debug, Clone, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct Query {
-    #[serde(default = "DEFAULT_LIMIT")]
-    limit: u32,
-    from: Option<u32>,
-    status: Option<Vec<Status>>,
-    #[serde(rename = "type")]
-    kind: Option<Vec<Kind>>,
-    index_uid: Option<Vec<String>>,
-}
-
-/// This module is responsible for two things;
-/// 1. Resolve the name of the indexes.
-/// 2. Schedule the tasks.
-#[derive(Clone)]
-pub struct IndexScheduler {
-    /// The list of tasks currently processing.
-    processing_tasks: Arc<RwLock<RoaringBitmap>>,
-
-    file_store: FileStore,
-
-    /// The LMDB environment which the DBs are associated with.
-    env: Env,
-
-    // The main database, it contains all the tasks accessible by their Id.
-    all_tasks: Database<OwnedType<BEU32>, SerdeBincode<Task>>,
-
-    /// All the tasks ids grouped by their status.
-    status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
-    /// All the tasks ids grouped by their kind.
-    kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
-    /// Store the tasks associated to an index.
-    index_tasks: Database<Str, RoaringBitmapCodec>,
-
-    /// In charge of creating and returning indexes.
-    index_mapper: IndexMapper,
-
-    // set to true when there is work to do.
-    wake_up: Arc<SignalEvent>,
-}
-
-impl IndexScheduler {
-    pub fn new() -> Self {
-        // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
-        let _wake_up = SignalEvent::auto(true);
-        todo!()
-    }
-
-    /// Return the index corresponding to the name. If it wasn't opened before
-    /// it'll be opened. But if it doesn't exist on disk it'll throw an
-    /// `IndexNotFound` error.
-    pub fn index(&self, name: &str) -> Result<Index> {
-        let rtxn = self.env.read_txn()?;
-        self.index_mapper.index(&rtxn, name)
-    }
-
-    /// Returns the tasks corresponding to the query.
-    pub fn get_tasks(&self, query: Query) -> Result<Vec<Task>> {
-        let rtxn = self.env.read_txn()?;
-        let last_task_id = match self.last_task_id(&rtxn)? {
-            Some(tid) => query.from.map(|from| from.min(tid)).unwrap_or(tid),
-            None => return Ok(Vec::new()),
-        };
-
-        // This is the list of all the tasks.
-        let mut tasks = RoaringBitmap::from_iter(0..last_task_id);
-
-        if let Some(status) = query.status {
-            let mut status_tasks = RoaringBitmap::new();
-            for status in status {
-                status_tasks |= self.get_status(&rtxn, status)?;
-            }
-            tasks &= status_tasks;
-        }
-
-        if let Some(kind) = query.kind {
-            let mut kind_tasks = RoaringBitmap::new();
-            for kind in kind {
-                kind_tasks |= self.get_kind(&rtxn, kind)?;
-            }
-            tasks &= kind_tasks;
-        }
-
-        if let Some(index) = query.index_uid {
-            let mut index_tasks = RoaringBitmap::new();
-            for index in index {
-                index_tasks |= self.get_index(&rtxn, &index)?;
-            }
-            tasks &= index_tasks;
-        }
-
-        self.get_existing_tasks(&rtxn, tasks.into_iter().rev().take(query.limit as usize))
-    }
-
-    /// Register a new task in the scheduler. If it fails and data was associated with the task
-    /// it tries to delete the file.
-    pub fn register(&self, task: Task) -> Result<()> {
-        let mut wtxn = self.env.write_txn()?;
-
-        let task_id = self.next_task_id(&wtxn)?;
-
-        self.all_tasks
-            .append(&mut wtxn, &BEU32::new(task_id), &task)?;
-
-        if let Some(indexes) = task.indexes() {
-            for index in indexes {
-                self.update_index(&mut wtxn, index, |bitmap| drop(bitmap.insert(task_id)))?;
-            }
-        }
-
-        self.update_status(&mut wtxn, Status::Enqueued, |bitmap| {
-            bitmap.insert(task_id);
-        })?;
-
-        self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
-            (bitmap.insert(task_id));
-        })?;
-
-        // we persist the file in last to be sure everything before was applied successfuly
-        task.persist()?;
-
-        match wtxn.commit() {
-            Ok(()) => (),
-            e @ Err(_) => {
-                task.remove_data()?;
-                e?;
-            }
-        }
-
-        self.notify();
-
-        Ok(())
-    }
-
-    /// This worker function must be run in a different thread and must be run only once.
-    fn run(&self) {
-        loop {
-            self.wake_up.wait();
-
-            let mut wtxn = match self.env.write_txn() {
-                Ok(wtxn) => wtxn,
-                Err(e) => {
-                    log::error!("{}", e);
-                    continue;
-                }
-            };
-            let batch = match self.create_next_batch(&wtxn) {
-                Ok(Some(batch)) => batch,
-                Ok(None) => continue,
-                Err(e) => {
-                    log::error!("{}", e);
-                    continue;
-                }
-            };
-            // 1. store the starting date with the bitmap of processing tasks
-            // 2. update the tasks with a starting date *but* do not write anything on disk
-
-            // 3. process the tasks
-            let _res = self.process_batch(&mut wtxn, batch);
-
-            // 4. store the updated tasks on disk
-
-            // TODO: TAMO: do this later
-            // must delete the file on disk
-            // in case of error, must update the tasks with the error
-            // in case of « success » we must update all the task on disk
-            // self.handle_batch_result(res);
-
-            match wtxn.commit() {
-                Ok(()) => log::info!("A batch of tasks was successfully completed."),
-                Err(e) => {
-                    log::error!("{}", e);
-                    continue;
-                }
-            }
-        }
-    }
-
-    #[cfg(truc)]
-    fn process_batch(&self, wtxn: &mut RwTxn, batch: &mut Batch) -> Result<()> {
-        match batch {
-            Batch::One(task) => match &task.kind {
-                KindWithContent::ClearAllDocuments { index_name } => {
-                    self.index(&index_name)?.clear_documents()?;
-                }
-                KindWithContent::RenameIndex {
-                    index_name: _,
-                    new_name,
-                } => {
-                    if self.available_index.get(wtxn, &new_name)?.unwrap_or(false) {
-                        return Err(Error::IndexAlreadyExists(new_name.to_string()));
-                    }
-                    todo!("wait for @guigui insight");
-                }
-                KindWithContent::CreateIndex {
-                    index_name,
-                    primary_key,
-                } => {
-                    if self
-                        .available_index
-                        .get(wtxn, &index_name)?
-                        .unwrap_or(false)
-                    {
-                        return Err(Error::IndexAlreadyExists(index_name.to_string()));
-                    }
-
-                    self.available_index.put(wtxn, &index_name, &true)?;
-                    // TODO: TAMO: give real info to the index
-                    let index = Index::open(
-                        index_name.to_string(),
-                        index_name.to_string(),
-                        100_000_000,
-                        Arc::default(),
-                    )?;
-                    if let Some(primary_key) = primary_key {
-                        index.update_primary_key(primary_key.to_string())?;
-                    }
-                    self.index_map
-                        .write()
-                        .map_err(|_| Error::CorruptedTaskQueue)?
-                        .insert(index_name.to_string(), index.clone());
-                }
-                KindWithContent::DeleteIndex { index_name } => {
-                    if !self.available_index.delete(wtxn, &index_name)? {
-                        return Err(Error::IndexNotFound(index_name.to_string()));
-                    }
-                    if let Some(index) = self
-                        .index_map
-                        .write()
-                        .map_err(|_| Error::CorruptedTaskQueue)?
-                        .remove(index_name)
-                    {
-                        index.delete()?;
-                    } else {
-                        // TODO: TAMO: fix the path
-                        std::fs::remove_file(index_name)?;
-                    }
-                }
-                KindWithContent::SwapIndex { lhs, rhs } => {
-                    if !self.available_index.get(wtxn, &lhs)?.unwrap_or(false) {
-                        return Err(Error::IndexNotFound(lhs.to_string()));
-                    }
-                    if !self.available_index.get(wtxn, &rhs)?.unwrap_or(false) {
-                        return Err(Error::IndexNotFound(rhs.to_string()));
-                    }
-
-                    let lhs_bitmap = self.index_tasks.get(wtxn, lhs)?;
-                    let rhs_bitmap = self.index_tasks.get(wtxn, rhs)?;
-                    // the bitmap are lazily created and thus may not exists.
-                    if let Some(bitmap) = rhs_bitmap {
-                        self.index_tasks.put(wtxn, lhs, &bitmap)?;
-                    }
-                    if let Some(bitmap) = lhs_bitmap {
-                        self.index_tasks.put(wtxn, rhs, &bitmap)?;
-                    }
-
-                    let mut index_map = self
-                        .index_map
-                        .write()
-                        .map_err(|_| Error::CorruptedTaskQueue)?;
-
-                    let lhs_index = index_map.remove(lhs).unwrap();
-                    let rhs_index = index_map.remove(rhs).unwrap();
-
-                    index_map.insert(lhs.to_string(), rhs_index);
-                    index_map.insert(rhs.to_string(), lhs_index);
-                }
-                _ => unreachable!(),
-            },
-            Batch::Cancel(_) => todo!(),
-            Batch::Snapshot(_) => todo!(),
-            Batch::Dump(_) => todo!(),
-            Batch::Contiguous { tasks, kind } => {
-                // it's safe because you can't batch 0 contiguous tasks.
-                let first_task = &tasks[0];
-                // and the two kind of tasks we batch MUST have ONE index name.
-                let index_name = first_task.indexes().unwrap()[0];
-                let index = self.index(index_name)?;
-
-                match kind {
-                    Kind::DocumentAddition => {
-                        let content_files = tasks.iter().map(|task| match &task.kind {
-                            KindWithContent::DocumentAddition { content_file, .. } => {
-                                content_file.clone()
-                            }
-                            k => unreachable!(
-                                "Internal error, `{:?}` is not supposed to be reachable here",
-                                k.as_kind()
-                            ),
-                        });
-                        let results = index.update_documents(
-                            IndexDocumentsMethod::UpdateDocuments,
-                            None,
-                            self.file_store.clone(),
-                            content_files,
-                        )?;
-
-                        for (task, result) in tasks.iter_mut().zip(results) {
-                            task.finished_at = Some(OffsetDateTime::now_utc());
-                            match result {
-                                Ok(_) => task.status = Status::Succeeded,
-                                Err(_) => task.status = Status::Succeeded,
-                            }
-                        }
-                    }
-                    Kind::DocumentDeletion => {
-                        let ids: Vec<_> = tasks
-                            .iter()
-                            .flat_map(|task| match &task.kind {
-                                KindWithContent::DocumentDeletion { documents_ids, .. } => {
-                                    documents_ids.clone()
-                                }
-                                k => unreachable!(
-                                    "Internal error, `{:?}` is not supposed to be reachable here",
-                                    k.as_kind()
-                                ),
-                            })
-                            .collect();
-
-                        let result = index.delete_documents(&ids);
-
-                        for task in tasks.iter_mut() {
-                            task.finished_at = Some(OffsetDateTime::now_utc());
-                            match result {
-                                Ok(_) => task.status = Status::Succeeded,
-                                Err(_) => task.status = Status::Succeeded,
-                            }
-                        }
-                    }
-                    _ => unreachable!(),
-                }
-            }
-            Batch::Empty => todo!(),
-        }
-
-        Ok(())
-    }
-
-    /// Notify the scheduler there is or may be work to do.
-    pub fn notify(&self) {
-        self.wake_up.signal()
-    }
-}
+pub use crate::index_scheduler::IndexScheduler;
+pub use error::Error;
+/// from the exterior you don't need to know there is multiple type of `Kind`
+pub use task::KindWithContent as TaskKind;
+/// from the exterior you don't need to know there is multiple type of `Task`
+pub use task::TaskView as Task;
--- a/index-scheduler/src/task.rs
+++ b/index-scheduler/src/task.rs
@ -1,9 +1,9 @@
 use anyhow::Result;
 use index::{Settings, Unchecked};

-use serde::{Deserialize, Serialize};
-use std::path::PathBuf;
-use time::OffsetDateTime;
+use serde::{Deserialize, Serialize, Serializer};
+use std::{fmt::Write, path::PathBuf};
+use time::{Duration, OffsetDateTime};
 use uuid::Uuid;

 use crate::TaskId;
@ -17,6 +17,38 @@ pub enum Status {
    Failed,
 }

+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Error {
+    message: String,
+    code: String,
+    #[serde(rename = "type")]
+    kind: String,
+    link: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TaskView {
+    pub uid: TaskId,
+    pub index_uid: Option<String>,
+    pub status: Status,
+    #[serde(rename = "type")]
+    pub kind: Kind,
+
+    pub details: Option<Details>,
+    pub error: Option<Error>,
+
+    #[serde(serialize_with = "serialize_duration")]
+    pub duration: Option<Duration>,
+    #[serde(with = "time::serde::rfc3339")]
+    pub enqueued_at: OffsetDateTime,
+    #[serde(with = "time::serde::rfc3339::option")]
+    pub started_at: Option<OffsetDateTime>,
+    #[serde(with = "time::serde::rfc3339::option")]
+    pub finished_at: Option<OffsetDateTime>,
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct Task {
@ -29,8 +61,8 @@ pub struct Task {
    #[serde(with = "time::serde::rfc3339::option")]
    pub finished_at: Option<OffsetDateTime>,

-    pub error: Option<String>,
-    pub info: Option<String>,
+    pub error: Option<Error>,
+    pub details: Option<Details>,

    pub status: Status,
    pub kind: KindWithContent,
@ -51,6 +83,27 @@ impl Task {
    pub fn indexes(&self) -> Option<Vec<&str>> {
        self.kind.indexes()
    }
+
+    /// Convert a Task to a TaskView
+    pub fn as_task_view(&self) -> TaskView {
+        TaskView {
+            uid: self.uid,
+            index_uid: self
+                .indexes()
+                .and_then(|vec| vec.first().map(|i| i.to_string())),
+            status: self.status,
+            kind: self.kind.as_kind(),
+            details: self.details.clone(),
+            error: self.error.clone(),
+            duration: self
+                .started_at
+                .zip(self.finished_at)
+                .map(|(start, end)| end - start),
+            enqueued_at: self.enqueued_at,
+            started_at: self.started_at,
+            finished_at: self.finished_at,
+        }
+    }
 }

 #[derive(Debug, Serialize, Deserialize)]
@ -215,3 +268,81 @@ pub enum Kind {
    DumpExport,
    Snapshot,
 }
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+#[allow(clippy::large_enum_variant)]
+pub enum Details {
+    #[serde(rename_all = "camelCase")]
+    DocumentAddition {
+        received_documents: usize,
+        indexed_documents: Option<u64>,
+    },
+    #[serde(rename_all = "camelCase")]
+    Settings {
+        #[serde(flatten)]
+        settings: Settings<Unchecked>,
+    },
+    #[serde(rename_all = "camelCase")]
+    IndexInfo { primary_key: Option<String> },
+    #[serde(rename_all = "camelCase")]
+    DocumentDeletion {
+        received_document_ids: usize,
+        deleted_documents: Option<u64>,
+    },
+    #[serde(rename_all = "camelCase")]
+    ClearAll { deleted_documents: Option<u64> },
+    #[serde(rename_all = "camelCase")]
+    Dump { dump_uid: String },
+}
+
+/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for
+/// https://github.com/time-rs/time/issues/378.
+/// This code is a port of the old code of time that was removed in 0.2.
+fn serialize_duration<S: Serializer>(
+    duration: &Option<Duration>,
+    serializer: S,
+) -> Result<S::Ok, S::Error> {
+    match duration {
+        Some(duration) => {
+            // technically speaking, negative duration is not valid ISO 8601
+            if duration.is_negative() {
+                return serializer.serialize_none();
+            }
+
+            const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds();
+            let secs = duration.whole_seconds();
+            let days = secs / SECS_PER_DAY;
+            let secs = secs - days * SECS_PER_DAY;
+            let hasdate = days != 0;
+            let nanos = duration.subsec_nanoseconds();
+            let hastime = (secs != 0 || nanos != 0) || !hasdate;
+
+            // all the following unwrap can't fail
+            let mut res = String::new();
+            write!(&mut res, "P").unwrap();
+
+            if hasdate {
+                write!(&mut res, "{}D", days).unwrap();
+            }
+
+            const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds();
+            const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds();
+
+            if hastime {
+                if nanos == 0 {
+                    write!(&mut res, "T{}S", secs).unwrap();
+                } else if nanos % NANOS_PER_MILLI == 0 {
+                    write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap();
+                } else if nanos % NANOS_PER_MICRO == 0 {
+                    write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap();
+                } else {
+                    write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap();
+                }
+            }
+
+            serializer.serialize_str(&res)
+        }
+        None => serializer.serialize_none(),
+    }
+}
--- a/index-scheduler/src/utils.rs
+++ b/index-scheduler/src/utils.rs
@ -7,8 +7,8 @@ use milli::{
 use roaring::RoaringBitmap;

 use crate::{
-    task::{Kind, Status},
-    Error, IndexScheduler, Result, Task, TaskId,
+    task::{Kind, Status, Task},
+    Error, IndexScheduler, Result, TaskId,
 };

 impl IndexScheduler {