mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
start integrating the index-scheduler in the meilisearch codebase
This commit is contained in:
parent
b816535e33
commit
fc098022c7
28 changed files with 679 additions and 3170 deletions
|
@ -1,6 +1,6 @@
|
|||
use crate::{
|
||||
autobatcher::BatchKind,
|
||||
task::{KindWithContent, Status},
|
||||
task::{Kind, KindWithContent, Status, Task},
|
||||
Error, IndexScheduler, Result,
|
||||
};
|
||||
use index::{Settings, Unchecked};
|
||||
|
@ -10,8 +10,6 @@ use milli::{
|
|||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{task::Kind, Task};
|
||||
|
||||
pub(crate) enum Batch {
|
||||
Cancel(Task),
|
||||
Snapshot(Vec<Task>),
|
||||
|
@ -230,8 +228,8 @@ impl IndexScheduler {
|
|||
|
||||
for (ret, mut task) in ret.iter().zip(document_addition_tasks.into_iter()) {
|
||||
match ret {
|
||||
Ok(ret) => task.info = Some(format!("{:?}", ret)),
|
||||
Err(err) => task.error = Some(err.to_string()),
|
||||
Ok(ret) => todo!(), // task.info = Some(format!("{:?}", ret)),
|
||||
Err(err) => todo!(), // task.error = Some(err.to_string()),
|
||||
}
|
||||
updated_tasks.push(task);
|
||||
}
|
||||
|
|
|
@ -13,9 +13,11 @@ pub enum Error {
|
|||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("{0}")]
|
||||
#[error(transparent)]
|
||||
IndexError(#[from] index::error::IndexError),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
IoError(#[from] std::io::Error),
|
||||
|
||||
#[error(transparent)]
|
||||
|
|
|
@ -8,11 +8,13 @@ use index::Index;
|
|||
use milli::heed::types::SerdeBincode;
|
||||
use milli::heed::types::Str;
|
||||
use milli::heed::Database;
|
||||
use milli::heed::Env;
|
||||
use milli::heed::RoTxn;
|
||||
use milli::heed::RwTxn;
|
||||
use milli::update::IndexerConfig;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_scheduler::db_name;
|
||||
use crate::Error;
|
||||
use crate::Result;
|
||||
|
||||
|
@ -31,9 +33,24 @@ pub struct IndexMapper {
|
|||
}
|
||||
|
||||
impl IndexMapper {
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
base_path: PathBuf,
|
||||
index_size: usize,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
index_map: Arc::default(),
|
||||
index_mapping: env.create_database(Some(db_name::INDEX_MAPPING))?,
|
||||
base_path,
|
||||
index_size,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get or create the index.
|
||||
pub fn create_index(&self, rwtxn: &mut RwTxn, name: &str) -> Result<Index> {
|
||||
let index = match self.index(rwtxn, name) {
|
||||
pub fn create_index(&self, wtxn: &mut RwTxn, name: &str) -> Result<Index> {
|
||||
let index = match self.index(wtxn, name) {
|
||||
Ok(index) => index,
|
||||
Err(Error::IndexNotFound(_)) => {
|
||||
let uuid = Uuid::new_v4();
|
||||
|
|
435
index-scheduler/src/index_scheduler.rs
Normal file
435
index-scheduler/src/index_scheduler.rs
Normal file
|
@ -0,0 +1,435 @@
|
|||
use crate::index_mapper::IndexMapper;
|
||||
use crate::task::{Kind, KindWithContent, Status, Task, TaskView};
|
||||
use crate::Result;
|
||||
use file_store::FileStore;
|
||||
use index::Index;
|
||||
use milli::update::IndexerConfig;
|
||||
use synchronoise::SignalEvent;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use milli::heed::types::{OwnedType, SerdeBincode, Str};
|
||||
use milli::heed::{self, Database, Env};
|
||||
|
||||
use milli::{RoaringBitmapCodec, BEU32};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Deserialize;
|
||||
|
||||
const DEFAULT_LIMIT: fn() -> u32 = || 20;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Query {
|
||||
#[serde(default = "DEFAULT_LIMIT")]
|
||||
limit: u32,
|
||||
from: Option<u32>,
|
||||
status: Option<Vec<Status>>,
|
||||
#[serde(rename = "type")]
|
||||
kind: Option<Vec<Kind>>,
|
||||
index_uid: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
pub const ALL_TASKS: &str = "all-tasks";
|
||||
pub const STATUS: &str = "status";
|
||||
pub const KIND: &str = "kind";
|
||||
pub const INDEX_TASKS: &str = "index-tasks";
|
||||
|
||||
pub const INDEX_MAPPING: &str = "index-mapping";
|
||||
}
|
||||
|
||||
/// This module is responsible for two things;
|
||||
/// 1. Resolve the name of the indexes.
|
||||
/// 2. Schedule the tasks.
|
||||
#[derive(Clone)]
|
||||
pub struct IndexScheduler {
|
||||
/// The list of tasks currently processing.
|
||||
pub(crate) processing_tasks: Arc<RwLock<RoaringBitmap>>,
|
||||
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
/// The LMDB environment which the DBs are associated with.
|
||||
pub(crate) env: Env,
|
||||
|
||||
// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeBincode<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the tasks ids grouped by their kind.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
|
||||
/// In charge of creating, opening, storing and returning indexes.
|
||||
pub(crate) index_mapper: IndexMapper,
|
||||
|
||||
// set to true when there is work to do.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
pub fn new(
|
||||
db_path: PathBuf,
|
||||
update_file_path: PathBuf,
|
||||
indexes_path: PathBuf,
|
||||
index_size: usize,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
std::fs::create_dir_all(&db_path)?;
|
||||
std::fs::create_dir_all(&update_file_path)?;
|
||||
std::fs::create_dir_all(&indexes_path)?;
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.max_dbs(6);
|
||||
|
||||
let env = options.open(db_path)?;
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
let wake_up = SignalEvent::auto(true);
|
||||
|
||||
Ok(Self {
|
||||
// by default there is no processing tasks
|
||||
processing_tasks: Arc::default(),
|
||||
file_store: FileStore::new(update_file_path)?,
|
||||
all_tasks: env.create_database(Some(db_name::ALL_TASKS))?,
|
||||
status: env.create_database(Some(db_name::STATUS))?,
|
||||
kind: env.create_database(Some(db_name::KIND))?,
|
||||
index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?,
|
||||
index_mapper: IndexMapper::new(&env, indexes_path, index_size, indexer_config)?,
|
||||
env,
|
||||
wake_up: Arc::new(wake_up),
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the index corresponding to the name. If it wasn't opened before
|
||||
/// it'll be opened. But if it doesn't exist on disk it'll throw an
|
||||
/// `IndexNotFound` error.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
|
||||
/// Returns the tasks corresponding to the query.
|
||||
pub fn get_tasks(&self, query: Query) -> Result<Vec<TaskView>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let last_task_id = match self.last_task_id(&rtxn)? {
|
||||
Some(tid) => query.from.map(|from| from.min(tid)).unwrap_or(tid),
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
// This is the list of all the tasks.
|
||||
let mut tasks = RoaringBitmap::from_iter(0..last_task_id);
|
||||
|
||||
if let Some(status) = query.status {
|
||||
let mut status_tasks = RoaringBitmap::new();
|
||||
for status in status {
|
||||
status_tasks |= self.get_status(&rtxn, status)?;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
|
||||
if let Some(kind) = query.kind {
|
||||
let mut kind_tasks = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_tasks |= self.get_kind(&rtxn, kind)?;
|
||||
}
|
||||
tasks &= kind_tasks;
|
||||
}
|
||||
|
||||
if let Some(index) = query.index_uid {
|
||||
let mut index_tasks = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_tasks |= self.get_index(&rtxn, &index)?;
|
||||
}
|
||||
tasks &= index_tasks;
|
||||
}
|
||||
|
||||
let tasks =
|
||||
self.get_existing_tasks(&rtxn, tasks.into_iter().rev().take(query.limit as usize))?;
|
||||
Ok(tasks.into_iter().map(|task| task.as_task_view()).collect())
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler. If it fails and data was associated with the task
|
||||
/// it tries to delete the file.
|
||||
pub fn register(&self, task: KindWithContent) -> Result<TaskView> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
let task = Task {
|
||||
uid: self.next_task_id(&wtxn)?,
|
||||
enqueued_at: time::OffsetDateTime::now_utc(),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
error: None,
|
||||
details: None,
|
||||
status: Status::Enqueued,
|
||||
kind: task,
|
||||
};
|
||||
|
||||
self.all_tasks
|
||||
.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
if let Some(indexes) = task.indexes() {
|
||||
for index in indexes {
|
||||
self.update_index(&mut wtxn, index, |bitmap| drop(bitmap.insert(task.uid)))?;
|
||||
}
|
||||
}
|
||||
|
||||
self.update_status(&mut wtxn, Status::Enqueued, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
|
||||
(bitmap.insert(task.uid));
|
||||
})?;
|
||||
|
||||
// we persist the file in last to be sure everything before was applied successfuly
|
||||
task.persist()?;
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => (),
|
||||
e @ Err(_) => {
|
||||
task.remove_data()?;
|
||||
e?;
|
||||
}
|
||||
}
|
||||
|
||||
self.notify();
|
||||
|
||||
Ok(task.as_task_view())
|
||||
}
|
||||
|
||||
/// This worker function must be run in a different thread and must be run only once.
|
||||
fn run(&self) {
|
||||
loop {
|
||||
self.wake_up.wait();
|
||||
|
||||
let mut wtxn = match self.env.write_txn() {
|
||||
Ok(wtxn) => wtxn,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let batch = match self.create_next_batch(&wtxn) {
|
||||
Ok(Some(batch)) => batch,
|
||||
Ok(None) => continue,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
// 1. store the starting date with the bitmap of processing tasks
|
||||
// 2. update the tasks with a starting date *but* do not write anything on disk
|
||||
|
||||
// 3. process the tasks
|
||||
let _res = self.process_batch(&mut wtxn, batch);
|
||||
|
||||
// 4. store the updated tasks on disk
|
||||
|
||||
// TODO: TAMO: do this later
|
||||
// must delete the file on disk
|
||||
// in case of error, must update the tasks with the error
|
||||
// in case of « success » we must update all the task on disk
|
||||
// self.handle_batch_result(res);
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => log::info!("A batch of tasks was successfully completed."),
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(truc)]
|
||||
fn process_batch(&self, wtxn: &mut RwTxn, batch: &mut Batch) -> Result<()> {
|
||||
match batch {
|
||||
Batch::One(task) => match &task.kind {
|
||||
KindWithContent::ClearAllDocuments { index_name } => {
|
||||
self.index(&index_name)?.clear_documents()?;
|
||||
}
|
||||
KindWithContent::RenameIndex {
|
||||
index_name: _,
|
||||
new_name,
|
||||
} => {
|
||||
if self.available_index.get(wtxn, &new_name)?.unwrap_or(false) {
|
||||
return Err(Error::IndexAlreadyExists(new_name.to_string()));
|
||||
}
|
||||
todo!("wait for @guigui insight");
|
||||
}
|
||||
KindWithContent::CreateIndex {
|
||||
index_name,
|
||||
primary_key,
|
||||
} => {
|
||||
if self
|
||||
.available_index
|
||||
.get(wtxn, &index_name)?
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return Err(Error::IndexAlreadyExists(index_name.to_string()));
|
||||
}
|
||||
|
||||
self.available_index.put(wtxn, &index_name, &true)?;
|
||||
// TODO: TAMO: give real info to the index
|
||||
let index = Index::open(
|
||||
index_name.to_string(),
|
||||
index_name.to_string(),
|
||||
100_000_000,
|
||||
Arc::default(),
|
||||
)?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
index.update_primary_key(primary_key.to_string())?;
|
||||
}
|
||||
self.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.insert(index_name.to_string(), index.clone());
|
||||
}
|
||||
KindWithContent::DeleteIndex { index_name } => {
|
||||
if !self.available_index.delete(wtxn, &index_name)? {
|
||||
return Err(Error::IndexNotFound(index_name.to_string()));
|
||||
}
|
||||
if let Some(index) = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.remove(index_name)
|
||||
{
|
||||
index.delete()?;
|
||||
} else {
|
||||
// TODO: TAMO: fix the path
|
||||
std::fs::remove_file(index_name)?;
|
||||
}
|
||||
}
|
||||
KindWithContent::SwapIndex { lhs, rhs } => {
|
||||
if !self.available_index.get(wtxn, &lhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(lhs.to_string()));
|
||||
}
|
||||
if !self.available_index.get(wtxn, &rhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(rhs.to_string()));
|
||||
}
|
||||
|
||||
let lhs_bitmap = self.index_tasks.get(wtxn, lhs)?;
|
||||
let rhs_bitmap = self.index_tasks.get(wtxn, rhs)?;
|
||||
// the bitmap are lazily created and thus may not exists.
|
||||
if let Some(bitmap) = rhs_bitmap {
|
||||
self.index_tasks.put(wtxn, lhs, &bitmap)?;
|
||||
}
|
||||
if let Some(bitmap) = lhs_bitmap {
|
||||
self.index_tasks.put(wtxn, rhs, &bitmap)?;
|
||||
}
|
||||
|
||||
let mut index_map = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?;
|
||||
|
||||
let lhs_index = index_map.remove(lhs).unwrap();
|
||||
let rhs_index = index_map.remove(rhs).unwrap();
|
||||
|
||||
index_map.insert(lhs.to_string(), rhs_index);
|
||||
index_map.insert(rhs.to_string(), lhs_index);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Batch::Cancel(_) => todo!(),
|
||||
Batch::Snapshot(_) => todo!(),
|
||||
Batch::Dump(_) => todo!(),
|
||||
Batch::Contiguous { tasks, kind } => {
|
||||
// it's safe because you can't batch 0 contiguous tasks.
|
||||
let first_task = &tasks[0];
|
||||
// and the two kind of tasks we batch MUST have ONE index name.
|
||||
let index_name = first_task.indexes().unwrap()[0];
|
||||
let index = self.index(index_name)?;
|
||||
|
||||
match kind {
|
||||
Kind::DocumentAddition => {
|
||||
let content_files = tasks.iter().map(|task| match &task.kind {
|
||||
KindWithContent::DocumentAddition { content_file, .. } => {
|
||||
content_file.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
});
|
||||
let results = index.update_documents(
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
None,
|
||||
self.file_store.clone(),
|
||||
content_files,
|
||||
)?;
|
||||
|
||||
for (task, result) in tasks.iter_mut().zip(results) {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
Kind::DocumentDeletion => {
|
||||
let ids: Vec<_> = tasks
|
||||
.iter()
|
||||
.flat_map(|task| match &task.kind {
|
||||
KindWithContent::DocumentDeletion { documents_ids, .. } => {
|
||||
documents_ids.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result = index.delete_documents(&ids);
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Batch::Empty => todo!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify the scheduler there is or may be work to do.
|
||||
pub fn notify(&self) {
|
||||
self.wake_up.signal()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn new() -> IndexScheduler {
|
||||
let dir = TempDir::new().unwrap();
|
||||
IndexScheduler::new(
|
||||
dir.path().join("db_path"),
|
||||
dir.path().join("file_store"),
|
||||
dir.path().join("indexes"),
|
||||
100_000_000,
|
||||
IndexerConfig::default(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_new() {
|
||||
new();
|
||||
}
|
||||
}
|
|
@ -2,380 +2,16 @@ mod autobatcher;
|
|||
mod batch;
|
||||
pub mod error;
|
||||
mod index_mapper;
|
||||
mod index_scheduler;
|
||||
pub mod task;
|
||||
mod utils;
|
||||
|
||||
|
||||
pub use error::Error;
|
||||
use file_store::FileStore;
|
||||
use index::Index;
|
||||
use index_mapper::IndexMapper;
|
||||
use synchronoise::SignalEvent;
|
||||
pub use task::Task;
|
||||
use task::{Kind, Status};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{sync::RwLock};
|
||||
|
||||
use milli::heed::types::{OwnedType, SerdeBincode, Str};
|
||||
use milli::heed::{Database, Env};
|
||||
|
||||
use milli::{RoaringBitmapCodec, BEU32};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Deserialize;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
type IndexName = String;
|
||||
type IndexUuid = String;
|
||||
|
||||
const DEFAULT_LIMIT: fn() -> u32 = || 20;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Query {
|
||||
#[serde(default = "DEFAULT_LIMIT")]
|
||||
limit: u32,
|
||||
from: Option<u32>,
|
||||
status: Option<Vec<Status>>,
|
||||
#[serde(rename = "type")]
|
||||
kind: Option<Vec<Kind>>,
|
||||
index_uid: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
/// This module is responsible for two things;
|
||||
/// 1. Resolve the name of the indexes.
|
||||
/// 2. Schedule the tasks.
|
||||
#[derive(Clone)]
|
||||
pub struct IndexScheduler {
|
||||
/// The list of tasks currently processing.
|
||||
processing_tasks: Arc<RwLock<RoaringBitmap>>,
|
||||
|
||||
file_store: FileStore,
|
||||
|
||||
/// The LMDB environment which the DBs are associated with.
|
||||
env: Env,
|
||||
|
||||
// The main database, it contains all the tasks accessible by their Id.
|
||||
all_tasks: Database<OwnedType<BEU32>, SerdeBincode<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the tasks ids grouped by their kind.
|
||||
kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
|
||||
/// In charge of creating and returning indexes.
|
||||
index_mapper: IndexMapper,
|
||||
|
||||
// set to true when there is work to do.
|
||||
wake_up: Arc<SignalEvent>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
pub fn new() -> Self {
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
let _wake_up = SignalEvent::auto(true);
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Return the index corresponding to the name. If it wasn't opened before
|
||||
/// it'll be opened. But if it doesn't exist on disk it'll throw an
|
||||
/// `IndexNotFound` error.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
|
||||
/// Returns the tasks corresponding to the query.
|
||||
pub fn get_tasks(&self, query: Query) -> Result<Vec<Task>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let last_task_id = match self.last_task_id(&rtxn)? {
|
||||
Some(tid) => query.from.map(|from| from.min(tid)).unwrap_or(tid),
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
// This is the list of all the tasks.
|
||||
let mut tasks = RoaringBitmap::from_iter(0..last_task_id);
|
||||
|
||||
if let Some(status) = query.status {
|
||||
let mut status_tasks = RoaringBitmap::new();
|
||||
for status in status {
|
||||
status_tasks |= self.get_status(&rtxn, status)?;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
|
||||
if let Some(kind) = query.kind {
|
||||
let mut kind_tasks = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_tasks |= self.get_kind(&rtxn, kind)?;
|
||||
}
|
||||
tasks &= kind_tasks;
|
||||
}
|
||||
|
||||
if let Some(index) = query.index_uid {
|
||||
let mut index_tasks = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_tasks |= self.get_index(&rtxn, &index)?;
|
||||
}
|
||||
tasks &= index_tasks;
|
||||
}
|
||||
|
||||
self.get_existing_tasks(&rtxn, tasks.into_iter().rev().take(query.limit as usize))
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler. If it fails and data was associated with the task
|
||||
/// it tries to delete the file.
|
||||
pub fn register(&self, task: Task) -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
let task_id = self.next_task_id(&wtxn)?;
|
||||
|
||||
self.all_tasks
|
||||
.append(&mut wtxn, &BEU32::new(task_id), &task)?;
|
||||
|
||||
if let Some(indexes) = task.indexes() {
|
||||
for index in indexes {
|
||||
self.update_index(&mut wtxn, index, |bitmap| drop(bitmap.insert(task_id)))?;
|
||||
}
|
||||
}
|
||||
|
||||
self.update_status(&mut wtxn, Status::Enqueued, |bitmap| {
|
||||
bitmap.insert(task_id);
|
||||
})?;
|
||||
|
||||
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
|
||||
(bitmap.insert(task_id));
|
||||
})?;
|
||||
|
||||
// we persist the file in last to be sure everything before was applied successfuly
|
||||
task.persist()?;
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => (),
|
||||
e @ Err(_) => {
|
||||
task.remove_data()?;
|
||||
e?;
|
||||
}
|
||||
}
|
||||
|
||||
self.notify();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This worker function must be run in a different thread and must be run only once.
|
||||
fn run(&self) {
|
||||
loop {
|
||||
self.wake_up.wait();
|
||||
|
||||
let mut wtxn = match self.env.write_txn() {
|
||||
Ok(wtxn) => wtxn,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let batch = match self.create_next_batch(&wtxn) {
|
||||
Ok(Some(batch)) => batch,
|
||||
Ok(None) => continue,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
// 1. store the starting date with the bitmap of processing tasks
|
||||
// 2. update the tasks with a starting date *but* do not write anything on disk
|
||||
|
||||
// 3. process the tasks
|
||||
let _res = self.process_batch(&mut wtxn, batch);
|
||||
|
||||
// 4. store the updated tasks on disk
|
||||
|
||||
// TODO: TAMO: do this later
|
||||
// must delete the file on disk
|
||||
// in case of error, must update the tasks with the error
|
||||
// in case of « success » we must update all the task on disk
|
||||
// self.handle_batch_result(res);
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => log::info!("A batch of tasks was successfully completed."),
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(truc)]
|
||||
fn process_batch(&self, wtxn: &mut RwTxn, batch: &mut Batch) -> Result<()> {
|
||||
match batch {
|
||||
Batch::One(task) => match &task.kind {
|
||||
KindWithContent::ClearAllDocuments { index_name } => {
|
||||
self.index(&index_name)?.clear_documents()?;
|
||||
}
|
||||
KindWithContent::RenameIndex {
|
||||
index_name: _,
|
||||
new_name,
|
||||
} => {
|
||||
if self.available_index.get(wtxn, &new_name)?.unwrap_or(false) {
|
||||
return Err(Error::IndexAlreadyExists(new_name.to_string()));
|
||||
}
|
||||
todo!("wait for @guigui insight");
|
||||
}
|
||||
KindWithContent::CreateIndex {
|
||||
index_name,
|
||||
primary_key,
|
||||
} => {
|
||||
if self
|
||||
.available_index
|
||||
.get(wtxn, &index_name)?
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return Err(Error::IndexAlreadyExists(index_name.to_string()));
|
||||
}
|
||||
|
||||
self.available_index.put(wtxn, &index_name, &true)?;
|
||||
// TODO: TAMO: give real info to the index
|
||||
let index = Index::open(
|
||||
index_name.to_string(),
|
||||
index_name.to_string(),
|
||||
100_000_000,
|
||||
Arc::default(),
|
||||
)?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
index.update_primary_key(primary_key.to_string())?;
|
||||
}
|
||||
self.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.insert(index_name.to_string(), index.clone());
|
||||
}
|
||||
KindWithContent::DeleteIndex { index_name } => {
|
||||
if !self.available_index.delete(wtxn, &index_name)? {
|
||||
return Err(Error::IndexNotFound(index_name.to_string()));
|
||||
}
|
||||
if let Some(index) = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.remove(index_name)
|
||||
{
|
||||
index.delete()?;
|
||||
} else {
|
||||
// TODO: TAMO: fix the path
|
||||
std::fs::remove_file(index_name)?;
|
||||
}
|
||||
}
|
||||
KindWithContent::SwapIndex { lhs, rhs } => {
|
||||
if !self.available_index.get(wtxn, &lhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(lhs.to_string()));
|
||||
}
|
||||
if !self.available_index.get(wtxn, &rhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(rhs.to_string()));
|
||||
}
|
||||
|
||||
let lhs_bitmap = self.index_tasks.get(wtxn, lhs)?;
|
||||
let rhs_bitmap = self.index_tasks.get(wtxn, rhs)?;
|
||||
// the bitmap are lazily created and thus may not exists.
|
||||
if let Some(bitmap) = rhs_bitmap {
|
||||
self.index_tasks.put(wtxn, lhs, &bitmap)?;
|
||||
}
|
||||
if let Some(bitmap) = lhs_bitmap {
|
||||
self.index_tasks.put(wtxn, rhs, &bitmap)?;
|
||||
}
|
||||
|
||||
let mut index_map = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?;
|
||||
|
||||
let lhs_index = index_map.remove(lhs).unwrap();
|
||||
let rhs_index = index_map.remove(rhs).unwrap();
|
||||
|
||||
index_map.insert(lhs.to_string(), rhs_index);
|
||||
index_map.insert(rhs.to_string(), lhs_index);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Batch::Cancel(_) => todo!(),
|
||||
Batch::Snapshot(_) => todo!(),
|
||||
Batch::Dump(_) => todo!(),
|
||||
Batch::Contiguous { tasks, kind } => {
|
||||
// it's safe because you can't batch 0 contiguous tasks.
|
||||
let first_task = &tasks[0];
|
||||
// and the two kind of tasks we batch MUST have ONE index name.
|
||||
let index_name = first_task.indexes().unwrap()[0];
|
||||
let index = self.index(index_name)?;
|
||||
|
||||
match kind {
|
||||
Kind::DocumentAddition => {
|
||||
let content_files = tasks.iter().map(|task| match &task.kind {
|
||||
KindWithContent::DocumentAddition { content_file, .. } => {
|
||||
content_file.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
});
|
||||
let results = index.update_documents(
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
None,
|
||||
self.file_store.clone(),
|
||||
content_files,
|
||||
)?;
|
||||
|
||||
for (task, result) in tasks.iter_mut().zip(results) {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
Kind::DocumentDeletion => {
|
||||
let ids: Vec<_> = tasks
|
||||
.iter()
|
||||
.flat_map(|task| match &task.kind {
|
||||
KindWithContent::DocumentDeletion { documents_ids, .. } => {
|
||||
documents_ids.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result = index.delete_documents(&ids);
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Batch::Empty => todo!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify the scheduler there is or may be work to do.
|
||||
pub fn notify(&self) {
|
||||
self.wake_up.signal()
|
||||
}
|
||||
}
|
||||
pub use crate::index_scheduler::IndexScheduler;
|
||||
pub use error::Error;
|
||||
/// from the exterior you don't need to know there is multiple type of `Kind`
|
||||
pub use task::KindWithContent as TaskKind;
|
||||
/// from the exterior you don't need to know there is multiple type of `Task`
|
||||
pub use task::TaskView as Task;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use anyhow::Result;
|
||||
use index::{Settings, Unchecked};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
use time::OffsetDateTime;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use std::{fmt::Write, path::PathBuf};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::TaskId;
|
||||
|
@ -17,6 +17,38 @@ pub enum Status {
|
|||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Error {
|
||||
message: String,
|
||||
code: String,
|
||||
#[serde(rename = "type")]
|
||||
kind: String,
|
||||
link: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
|
||||
pub details: Option<Details>,
|
||||
pub error: Option<Error>,
|
||||
|
||||
#[serde(serialize_with = "serialize_duration")]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Task {
|
||||
|
@ -29,8 +61,8 @@ pub struct Task {
|
|||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
pub error: Option<String>,
|
||||
pub info: Option<String>,
|
||||
pub error: Option<Error>,
|
||||
pub details: Option<Details>,
|
||||
|
||||
pub status: Status,
|
||||
pub kind: KindWithContent,
|
||||
|
@ -51,6 +83,27 @@ impl Task {
|
|||
pub fn indexes(&self) -> Option<Vec<&str>> {
|
||||
self.kind.indexes()
|
||||
}
|
||||
|
||||
/// Convert a Task to a TaskView
|
||||
pub fn as_task_view(&self) -> TaskView {
|
||||
TaskView {
|
||||
uid: self.uid,
|
||||
index_uid: self
|
||||
.indexes()
|
||||
.and_then(|vec| vec.first().map(|i| i.to_string())),
|
||||
status: self.status,
|
||||
kind: self.kind.as_kind(),
|
||||
details: self.details.clone(),
|
||||
error: self.error.clone(),
|
||||
duration: self
|
||||
.started_at
|
||||
.zip(self.finished_at)
|
||||
.map(|(start, end)| end - start),
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
@ -215,3 +268,81 @@ pub enum Kind {
|
|||
DumpExport,
|
||||
Snapshot,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum Details {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
DocumentAddition {
|
||||
received_documents: usize,
|
||||
indexed_documents: Option<u64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
Settings {
|
||||
#[serde(flatten)]
|
||||
settings: Settings<Unchecked>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
IndexInfo { primary_key: Option<String> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
DocumentDeletion {
|
||||
received_document_ids: usize,
|
||||
deleted_documents: Option<u64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
ClearAll { deleted_documents: Option<u64> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
Dump { dump_uid: String },
|
||||
}
|
||||
|
||||
/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for
|
||||
/// https://github.com/time-rs/time/issues/378.
|
||||
/// This code is a port of the old code of time that was removed in 0.2.
|
||||
fn serialize_duration<S: Serializer>(
|
||||
duration: &Option<Duration>,
|
||||
serializer: S,
|
||||
) -> Result<S::Ok, S::Error> {
|
||||
match duration {
|
||||
Some(duration) => {
|
||||
// technically speaking, negative duration is not valid ISO 8601
|
||||
if duration.is_negative() {
|
||||
return serializer.serialize_none();
|
||||
}
|
||||
|
||||
const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds();
|
||||
let secs = duration.whole_seconds();
|
||||
let days = secs / SECS_PER_DAY;
|
||||
let secs = secs - days * SECS_PER_DAY;
|
||||
let hasdate = days != 0;
|
||||
let nanos = duration.subsec_nanoseconds();
|
||||
let hastime = (secs != 0 || nanos != 0) || !hasdate;
|
||||
|
||||
// all the following unwrap can't fail
|
||||
let mut res = String::new();
|
||||
write!(&mut res, "P").unwrap();
|
||||
|
||||
if hasdate {
|
||||
write!(&mut res, "{}D", days).unwrap();
|
||||
}
|
||||
|
||||
const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds();
|
||||
const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds();
|
||||
|
||||
if hastime {
|
||||
if nanos == 0 {
|
||||
write!(&mut res, "T{}S", secs).unwrap();
|
||||
} else if nanos % NANOS_PER_MILLI == 0 {
|
||||
write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap();
|
||||
} else if nanos % NANOS_PER_MICRO == 0 {
|
||||
write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap();
|
||||
} else {
|
||||
write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
serializer.serialize_str(&res)
|
||||
}
|
||||
None => serializer.serialize_none(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,8 +7,8 @@ use milli::{
|
|||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{
|
||||
task::{Kind, Status},
|
||||
Error, IndexScheduler, Result, Task, TaskId,
|
||||
task::{Kind, Status, Task},
|
||||
Error, IndexScheduler, Result, TaskId,
|
||||
};
|
||||
|
||||
impl IndexScheduler {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue