mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 03:07:27 +01:00
start integrating the index-scheduler in the meilisearch codebase
This commit is contained in:
parent
b816535e33
commit
fc098022c7
3
Cargo.lock
generated
3
Cargo.lock
generated
@ -2360,12 +2360,15 @@ dependencies = [
|
||||
"csv",
|
||||
"derivative",
|
||||
"either",
|
||||
"file-store",
|
||||
"flate2",
|
||||
"fs_extra",
|
||||
"fst",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"http",
|
||||
"index",
|
||||
"index-scheduler",
|
||||
"indexmap",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
|
@ -1,6 +1,6 @@
|
||||
use crate::{
|
||||
autobatcher::BatchKind,
|
||||
task::{KindWithContent, Status},
|
||||
task::{Kind, KindWithContent, Status, Task},
|
||||
Error, IndexScheduler, Result,
|
||||
};
|
||||
use index::{Settings, Unchecked};
|
||||
@ -10,8 +10,6 @@ use milli::{
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{task::Kind, Task};
|
||||
|
||||
pub(crate) enum Batch {
|
||||
Cancel(Task),
|
||||
Snapshot(Vec<Task>),
|
||||
@ -230,8 +228,8 @@ impl IndexScheduler {
|
||||
|
||||
for (ret, mut task) in ret.iter().zip(document_addition_tasks.into_iter()) {
|
||||
match ret {
|
||||
Ok(ret) => task.info = Some(format!("{:?}", ret)),
|
||||
Err(err) => task.error = Some(err.to_string()),
|
||||
Ok(ret) => todo!(), // task.info = Some(format!("{:?}", ret)),
|
||||
Err(err) => todo!(), // task.error = Some(err.to_string()),
|
||||
}
|
||||
updated_tasks.push(task);
|
||||
}
|
||||
|
@ -13,9 +13,11 @@ pub enum Error {
|
||||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("{0}")]
|
||||
#[error(transparent)]
|
||||
IndexError(#[from] index::error::IndexError),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
IoError(#[from] std::io::Error),
|
||||
|
||||
#[error(transparent)]
|
||||
|
@ -8,11 +8,13 @@ use index::Index;
|
||||
use milli::heed::types::SerdeBincode;
|
||||
use milli::heed::types::Str;
|
||||
use milli::heed::Database;
|
||||
use milli::heed::Env;
|
||||
use milli::heed::RoTxn;
|
||||
use milli::heed::RwTxn;
|
||||
use milli::update::IndexerConfig;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_scheduler::db_name;
|
||||
use crate::Error;
|
||||
use crate::Result;
|
||||
|
||||
@ -31,9 +33,24 @@ pub struct IndexMapper {
|
||||
}
|
||||
|
||||
impl IndexMapper {
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
base_path: PathBuf,
|
||||
index_size: usize,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
index_map: Arc::default(),
|
||||
index_mapping: env.create_database(Some(db_name::INDEX_MAPPING))?,
|
||||
base_path,
|
||||
index_size,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get or create the index.
|
||||
pub fn create_index(&self, rwtxn: &mut RwTxn, name: &str) -> Result<Index> {
|
||||
let index = match self.index(rwtxn, name) {
|
||||
pub fn create_index(&self, wtxn: &mut RwTxn, name: &str) -> Result<Index> {
|
||||
let index = match self.index(wtxn, name) {
|
||||
Ok(index) => index,
|
||||
Err(Error::IndexNotFound(_)) => {
|
||||
let uuid = Uuid::new_v4();
|
||||
|
435
index-scheduler/src/index_scheduler.rs
Normal file
435
index-scheduler/src/index_scheduler.rs
Normal file
@ -0,0 +1,435 @@
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::task::{Kind, KindWithContent, Status, Task, TaskView};
|
||||
use crate::Result;
|
||||
use file_store::FileStore;
|
||||
use index::Index;
|
||||
use milli::update::IndexerConfig;
|
||||
use synchronoise::SignalEvent;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use milli::heed::types::{OwnedType, SerdeBincode, Str};
|
||||
use milli::heed::{self, Database, Env};
|
||||
|
||||
use milli::{RoaringBitmapCodec, BEU32};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Deserialize;
|
||||
|
||||
const DEFAULT_LIMIT: fn() -> u32 = || 20;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Query {
|
||||
#[serde(default = "DEFAULT_LIMIT")]
|
||||
limit: u32,
|
||||
from: Option<u32>,
|
||||
status: Option<Vec<Status>>,
|
||||
#[serde(rename = "type")]
|
||||
kind: Option<Vec<Kind>>,
|
||||
index_uid: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
pub const ALL_TASKS: &str = "all-tasks";
|
||||
pub const STATUS: &str = "status";
|
||||
pub const KIND: &str = "kind";
|
||||
pub const INDEX_TASKS: &str = "index-tasks";
|
||||
|
||||
pub const INDEX_MAPPING: &str = "index-mapping";
|
||||
}
|
||||
|
||||
/// This module is responsible for two things;
|
||||
/// 1. Resolve the name of the indexes.
|
||||
/// 2. Schedule the tasks.
|
||||
#[derive(Clone)]
|
||||
pub struct IndexScheduler {
|
||||
/// The list of tasks currently processing.
|
||||
pub(crate) processing_tasks: Arc<RwLock<RoaringBitmap>>,
|
||||
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
/// The LMDB environment which the DBs are associated with.
|
||||
pub(crate) env: Env,
|
||||
|
||||
// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeBincode<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the tasks ids grouped by their kind.
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
|
||||
/// In charge of creating, opening, storing and returning indexes.
|
||||
pub(crate) index_mapper: IndexMapper,
|
||||
|
||||
// set to true when there is work to do.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
pub fn new(
|
||||
db_path: PathBuf,
|
||||
update_file_path: PathBuf,
|
||||
indexes_path: PathBuf,
|
||||
index_size: usize,
|
||||
indexer_config: IndexerConfig,
|
||||
) -> Result<Self> {
|
||||
std::fs::create_dir_all(&db_path)?;
|
||||
std::fs::create_dir_all(&update_file_path)?;
|
||||
std::fs::create_dir_all(&indexes_path)?;
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.max_dbs(6);
|
||||
|
||||
let env = options.open(db_path)?;
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
let wake_up = SignalEvent::auto(true);
|
||||
|
||||
Ok(Self {
|
||||
// by default there is no processing tasks
|
||||
processing_tasks: Arc::default(),
|
||||
file_store: FileStore::new(update_file_path)?,
|
||||
all_tasks: env.create_database(Some(db_name::ALL_TASKS))?,
|
||||
status: env.create_database(Some(db_name::STATUS))?,
|
||||
kind: env.create_database(Some(db_name::KIND))?,
|
||||
index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?,
|
||||
index_mapper: IndexMapper::new(&env, indexes_path, index_size, indexer_config)?,
|
||||
env,
|
||||
wake_up: Arc::new(wake_up),
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the index corresponding to the name. If it wasn't opened before
|
||||
/// it'll be opened. But if it doesn't exist on disk it'll throw an
|
||||
/// `IndexNotFound` error.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
|
||||
/// Returns the tasks corresponding to the query.
|
||||
pub fn get_tasks(&self, query: Query) -> Result<Vec<TaskView>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let last_task_id = match self.last_task_id(&rtxn)? {
|
||||
Some(tid) => query.from.map(|from| from.min(tid)).unwrap_or(tid),
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
// This is the list of all the tasks.
|
||||
let mut tasks = RoaringBitmap::from_iter(0..last_task_id);
|
||||
|
||||
if let Some(status) = query.status {
|
||||
let mut status_tasks = RoaringBitmap::new();
|
||||
for status in status {
|
||||
status_tasks |= self.get_status(&rtxn, status)?;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
|
||||
if let Some(kind) = query.kind {
|
||||
let mut kind_tasks = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_tasks |= self.get_kind(&rtxn, kind)?;
|
||||
}
|
||||
tasks &= kind_tasks;
|
||||
}
|
||||
|
||||
if let Some(index) = query.index_uid {
|
||||
let mut index_tasks = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_tasks |= self.get_index(&rtxn, &index)?;
|
||||
}
|
||||
tasks &= index_tasks;
|
||||
}
|
||||
|
||||
let tasks =
|
||||
self.get_existing_tasks(&rtxn, tasks.into_iter().rev().take(query.limit as usize))?;
|
||||
Ok(tasks.into_iter().map(|task| task.as_task_view()).collect())
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler. If it fails and data was associated with the task
|
||||
/// it tries to delete the file.
|
||||
pub fn register(&self, task: KindWithContent) -> Result<TaskView> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
let task = Task {
|
||||
uid: self.next_task_id(&wtxn)?,
|
||||
enqueued_at: time::OffsetDateTime::now_utc(),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
error: None,
|
||||
details: None,
|
||||
status: Status::Enqueued,
|
||||
kind: task,
|
||||
};
|
||||
|
||||
self.all_tasks
|
||||
.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
if let Some(indexes) = task.indexes() {
|
||||
for index in indexes {
|
||||
self.update_index(&mut wtxn, index, |bitmap| drop(bitmap.insert(task.uid)))?;
|
||||
}
|
||||
}
|
||||
|
||||
self.update_status(&mut wtxn, Status::Enqueued, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
|
||||
(bitmap.insert(task.uid));
|
||||
})?;
|
||||
|
||||
// we persist the file in last to be sure everything before was applied successfuly
|
||||
task.persist()?;
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => (),
|
||||
e @ Err(_) => {
|
||||
task.remove_data()?;
|
||||
e?;
|
||||
}
|
||||
}
|
||||
|
||||
self.notify();
|
||||
|
||||
Ok(task.as_task_view())
|
||||
}
|
||||
|
||||
/// This worker function must be run in a different thread and must be run only once.
|
||||
fn run(&self) {
|
||||
loop {
|
||||
self.wake_up.wait();
|
||||
|
||||
let mut wtxn = match self.env.write_txn() {
|
||||
Ok(wtxn) => wtxn,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let batch = match self.create_next_batch(&wtxn) {
|
||||
Ok(Some(batch)) => batch,
|
||||
Ok(None) => continue,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
// 1. store the starting date with the bitmap of processing tasks
|
||||
// 2. update the tasks with a starting date *but* do not write anything on disk
|
||||
|
||||
// 3. process the tasks
|
||||
let _res = self.process_batch(&mut wtxn, batch);
|
||||
|
||||
// 4. store the updated tasks on disk
|
||||
|
||||
// TODO: TAMO: do this later
|
||||
// must delete the file on disk
|
||||
// in case of error, must update the tasks with the error
|
||||
// in case of « success » we must update all the task on disk
|
||||
// self.handle_batch_result(res);
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => log::info!("A batch of tasks was successfully completed."),
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(truc)]
|
||||
fn process_batch(&self, wtxn: &mut RwTxn, batch: &mut Batch) -> Result<()> {
|
||||
match batch {
|
||||
Batch::One(task) => match &task.kind {
|
||||
KindWithContent::ClearAllDocuments { index_name } => {
|
||||
self.index(&index_name)?.clear_documents()?;
|
||||
}
|
||||
KindWithContent::RenameIndex {
|
||||
index_name: _,
|
||||
new_name,
|
||||
} => {
|
||||
if self.available_index.get(wtxn, &new_name)?.unwrap_or(false) {
|
||||
return Err(Error::IndexAlreadyExists(new_name.to_string()));
|
||||
}
|
||||
todo!("wait for @guigui insight");
|
||||
}
|
||||
KindWithContent::CreateIndex {
|
||||
index_name,
|
||||
primary_key,
|
||||
} => {
|
||||
if self
|
||||
.available_index
|
||||
.get(wtxn, &index_name)?
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return Err(Error::IndexAlreadyExists(index_name.to_string()));
|
||||
}
|
||||
|
||||
self.available_index.put(wtxn, &index_name, &true)?;
|
||||
// TODO: TAMO: give real info to the index
|
||||
let index = Index::open(
|
||||
index_name.to_string(),
|
||||
index_name.to_string(),
|
||||
100_000_000,
|
||||
Arc::default(),
|
||||
)?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
index.update_primary_key(primary_key.to_string())?;
|
||||
}
|
||||
self.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.insert(index_name.to_string(), index.clone());
|
||||
}
|
||||
KindWithContent::DeleteIndex { index_name } => {
|
||||
if !self.available_index.delete(wtxn, &index_name)? {
|
||||
return Err(Error::IndexNotFound(index_name.to_string()));
|
||||
}
|
||||
if let Some(index) = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.remove(index_name)
|
||||
{
|
||||
index.delete()?;
|
||||
} else {
|
||||
// TODO: TAMO: fix the path
|
||||
std::fs::remove_file(index_name)?;
|
||||
}
|
||||
}
|
||||
KindWithContent::SwapIndex { lhs, rhs } => {
|
||||
if !self.available_index.get(wtxn, &lhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(lhs.to_string()));
|
||||
}
|
||||
if !self.available_index.get(wtxn, &rhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(rhs.to_string()));
|
||||
}
|
||||
|
||||
let lhs_bitmap = self.index_tasks.get(wtxn, lhs)?;
|
||||
let rhs_bitmap = self.index_tasks.get(wtxn, rhs)?;
|
||||
// the bitmap are lazily created and thus may not exists.
|
||||
if let Some(bitmap) = rhs_bitmap {
|
||||
self.index_tasks.put(wtxn, lhs, &bitmap)?;
|
||||
}
|
||||
if let Some(bitmap) = lhs_bitmap {
|
||||
self.index_tasks.put(wtxn, rhs, &bitmap)?;
|
||||
}
|
||||
|
||||
let mut index_map = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?;
|
||||
|
||||
let lhs_index = index_map.remove(lhs).unwrap();
|
||||
let rhs_index = index_map.remove(rhs).unwrap();
|
||||
|
||||
index_map.insert(lhs.to_string(), rhs_index);
|
||||
index_map.insert(rhs.to_string(), lhs_index);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Batch::Cancel(_) => todo!(),
|
||||
Batch::Snapshot(_) => todo!(),
|
||||
Batch::Dump(_) => todo!(),
|
||||
Batch::Contiguous { tasks, kind } => {
|
||||
// it's safe because you can't batch 0 contiguous tasks.
|
||||
let first_task = &tasks[0];
|
||||
// and the two kind of tasks we batch MUST have ONE index name.
|
||||
let index_name = first_task.indexes().unwrap()[0];
|
||||
let index = self.index(index_name)?;
|
||||
|
||||
match kind {
|
||||
Kind::DocumentAddition => {
|
||||
let content_files = tasks.iter().map(|task| match &task.kind {
|
||||
KindWithContent::DocumentAddition { content_file, .. } => {
|
||||
content_file.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
});
|
||||
let results = index.update_documents(
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
None,
|
||||
self.file_store.clone(),
|
||||
content_files,
|
||||
)?;
|
||||
|
||||
for (task, result) in tasks.iter_mut().zip(results) {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
Kind::DocumentDeletion => {
|
||||
let ids: Vec<_> = tasks
|
||||
.iter()
|
||||
.flat_map(|task| match &task.kind {
|
||||
KindWithContent::DocumentDeletion { documents_ids, .. } => {
|
||||
documents_ids.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result = index.delete_documents(&ids);
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Batch::Empty => todo!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify the scheduler there is or may be work to do.
|
||||
pub fn notify(&self) {
|
||||
self.wake_up.signal()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn new() -> IndexScheduler {
|
||||
let dir = TempDir::new().unwrap();
|
||||
IndexScheduler::new(
|
||||
dir.path().join("db_path"),
|
||||
dir.path().join("file_store"),
|
||||
dir.path().join("indexes"),
|
||||
100_000_000,
|
||||
IndexerConfig::default(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_new() {
|
||||
new();
|
||||
}
|
||||
}
|
@ -2,380 +2,16 @@ mod autobatcher;
|
||||
mod batch;
|
||||
pub mod error;
|
||||
mod index_mapper;
|
||||
mod index_scheduler;
|
||||
pub mod task;
|
||||
mod utils;
|
||||
|
||||
|
||||
pub use error::Error;
|
||||
use file_store::FileStore;
|
||||
use index::Index;
|
||||
use index_mapper::IndexMapper;
|
||||
use synchronoise::SignalEvent;
|
||||
pub use task::Task;
|
||||
use task::{Kind, Status};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{sync::RwLock};
|
||||
|
||||
use milli::heed::types::{OwnedType, SerdeBincode, Str};
|
||||
use milli::heed::{Database, Env};
|
||||
|
||||
use milli::{RoaringBitmapCodec, BEU32};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Deserialize;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
type IndexName = String;
|
||||
type IndexUuid = String;
|
||||
|
||||
const DEFAULT_LIMIT: fn() -> u32 = || 20;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Query {
|
||||
#[serde(default = "DEFAULT_LIMIT")]
|
||||
limit: u32,
|
||||
from: Option<u32>,
|
||||
status: Option<Vec<Status>>,
|
||||
#[serde(rename = "type")]
|
||||
kind: Option<Vec<Kind>>,
|
||||
index_uid: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
/// This module is responsible for two things;
|
||||
/// 1. Resolve the name of the indexes.
|
||||
/// 2. Schedule the tasks.
|
||||
#[derive(Clone)]
|
||||
pub struct IndexScheduler {
|
||||
/// The list of tasks currently processing.
|
||||
processing_tasks: Arc<RwLock<RoaringBitmap>>,
|
||||
|
||||
file_store: FileStore,
|
||||
|
||||
/// The LMDB environment which the DBs are associated with.
|
||||
env: Env,
|
||||
|
||||
// The main database, it contains all the tasks accessible by their Id.
|
||||
all_tasks: Database<OwnedType<BEU32>, SerdeBincode<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
/// All the tasks ids grouped by their kind.
|
||||
kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
|
||||
/// In charge of creating and returning indexes.
|
||||
index_mapper: IndexMapper,
|
||||
|
||||
// set to true when there is work to do.
|
||||
wake_up: Arc<SignalEvent>,
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
pub fn new() -> Self {
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
let _wake_up = SignalEvent::auto(true);
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Return the index corresponding to the name. If it wasn't opened before
|
||||
/// it'll be opened. But if it doesn't exist on disk it'll throw an
|
||||
/// `IndexNotFound` error.
|
||||
pub fn index(&self, name: &str) -> Result<Index> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, name)
|
||||
}
|
||||
|
||||
/// Returns the tasks corresponding to the query.
|
||||
pub fn get_tasks(&self, query: Query) -> Result<Vec<Task>> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let last_task_id = match self.last_task_id(&rtxn)? {
|
||||
Some(tid) => query.from.map(|from| from.min(tid)).unwrap_or(tid),
|
||||
None => return Ok(Vec::new()),
|
||||
};
|
||||
|
||||
// This is the list of all the tasks.
|
||||
let mut tasks = RoaringBitmap::from_iter(0..last_task_id);
|
||||
|
||||
if let Some(status) = query.status {
|
||||
let mut status_tasks = RoaringBitmap::new();
|
||||
for status in status {
|
||||
status_tasks |= self.get_status(&rtxn, status)?;
|
||||
}
|
||||
tasks &= status_tasks;
|
||||
}
|
||||
|
||||
if let Some(kind) = query.kind {
|
||||
let mut kind_tasks = RoaringBitmap::new();
|
||||
for kind in kind {
|
||||
kind_tasks |= self.get_kind(&rtxn, kind)?;
|
||||
}
|
||||
tasks &= kind_tasks;
|
||||
}
|
||||
|
||||
if let Some(index) = query.index_uid {
|
||||
let mut index_tasks = RoaringBitmap::new();
|
||||
for index in index {
|
||||
index_tasks |= self.get_index(&rtxn, &index)?;
|
||||
}
|
||||
tasks &= index_tasks;
|
||||
}
|
||||
|
||||
self.get_existing_tasks(&rtxn, tasks.into_iter().rev().take(query.limit as usize))
|
||||
}
|
||||
|
||||
/// Register a new task in the scheduler. If it fails and data was associated with the task
|
||||
/// it tries to delete the file.
|
||||
pub fn register(&self, task: Task) -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
let task_id = self.next_task_id(&wtxn)?;
|
||||
|
||||
self.all_tasks
|
||||
.append(&mut wtxn, &BEU32::new(task_id), &task)?;
|
||||
|
||||
if let Some(indexes) = task.indexes() {
|
||||
for index in indexes {
|
||||
self.update_index(&mut wtxn, index, |bitmap| drop(bitmap.insert(task_id)))?;
|
||||
}
|
||||
}
|
||||
|
||||
self.update_status(&mut wtxn, Status::Enqueued, |bitmap| {
|
||||
bitmap.insert(task_id);
|
||||
})?;
|
||||
|
||||
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
|
||||
(bitmap.insert(task_id));
|
||||
})?;
|
||||
|
||||
// we persist the file in last to be sure everything before was applied successfuly
|
||||
task.persist()?;
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => (),
|
||||
e @ Err(_) => {
|
||||
task.remove_data()?;
|
||||
e?;
|
||||
}
|
||||
}
|
||||
|
||||
self.notify();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This worker function must be run in a different thread and must be run only once.
|
||||
fn run(&self) {
|
||||
loop {
|
||||
self.wake_up.wait();
|
||||
|
||||
let mut wtxn = match self.env.write_txn() {
|
||||
Ok(wtxn) => wtxn,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let batch = match self.create_next_batch(&wtxn) {
|
||||
Ok(Some(batch)) => batch,
|
||||
Ok(None) => continue,
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
// 1. store the starting date with the bitmap of processing tasks
|
||||
// 2. update the tasks with a starting date *but* do not write anything on disk
|
||||
|
||||
// 3. process the tasks
|
||||
let _res = self.process_batch(&mut wtxn, batch);
|
||||
|
||||
// 4. store the updated tasks on disk
|
||||
|
||||
// TODO: TAMO: do this later
|
||||
// must delete the file on disk
|
||||
// in case of error, must update the tasks with the error
|
||||
// in case of « success » we must update all the task on disk
|
||||
// self.handle_batch_result(res);
|
||||
|
||||
match wtxn.commit() {
|
||||
Ok(()) => log::info!("A batch of tasks was successfully completed."),
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(truc)]
|
||||
fn process_batch(&self, wtxn: &mut RwTxn, batch: &mut Batch) -> Result<()> {
|
||||
match batch {
|
||||
Batch::One(task) => match &task.kind {
|
||||
KindWithContent::ClearAllDocuments { index_name } => {
|
||||
self.index(&index_name)?.clear_documents()?;
|
||||
}
|
||||
KindWithContent::RenameIndex {
|
||||
index_name: _,
|
||||
new_name,
|
||||
} => {
|
||||
if self.available_index.get(wtxn, &new_name)?.unwrap_or(false) {
|
||||
return Err(Error::IndexAlreadyExists(new_name.to_string()));
|
||||
}
|
||||
todo!("wait for @guigui insight");
|
||||
}
|
||||
KindWithContent::CreateIndex {
|
||||
index_name,
|
||||
primary_key,
|
||||
} => {
|
||||
if self
|
||||
.available_index
|
||||
.get(wtxn, &index_name)?
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return Err(Error::IndexAlreadyExists(index_name.to_string()));
|
||||
}
|
||||
|
||||
self.available_index.put(wtxn, &index_name, &true)?;
|
||||
// TODO: TAMO: give real info to the index
|
||||
let index = Index::open(
|
||||
index_name.to_string(),
|
||||
index_name.to_string(),
|
||||
100_000_000,
|
||||
Arc::default(),
|
||||
)?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
index.update_primary_key(primary_key.to_string())?;
|
||||
}
|
||||
self.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.insert(index_name.to_string(), index.clone());
|
||||
}
|
||||
KindWithContent::DeleteIndex { index_name } => {
|
||||
if !self.available_index.delete(wtxn, &index_name)? {
|
||||
return Err(Error::IndexNotFound(index_name.to_string()));
|
||||
}
|
||||
if let Some(index) = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?
|
||||
.remove(index_name)
|
||||
{
|
||||
index.delete()?;
|
||||
} else {
|
||||
// TODO: TAMO: fix the path
|
||||
std::fs::remove_file(index_name)?;
|
||||
}
|
||||
}
|
||||
KindWithContent::SwapIndex { lhs, rhs } => {
|
||||
if !self.available_index.get(wtxn, &lhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(lhs.to_string()));
|
||||
}
|
||||
if !self.available_index.get(wtxn, &rhs)?.unwrap_or(false) {
|
||||
return Err(Error::IndexNotFound(rhs.to_string()));
|
||||
}
|
||||
|
||||
let lhs_bitmap = self.index_tasks.get(wtxn, lhs)?;
|
||||
let rhs_bitmap = self.index_tasks.get(wtxn, rhs)?;
|
||||
// the bitmap are lazily created and thus may not exists.
|
||||
if let Some(bitmap) = rhs_bitmap {
|
||||
self.index_tasks.put(wtxn, lhs, &bitmap)?;
|
||||
}
|
||||
if let Some(bitmap) = lhs_bitmap {
|
||||
self.index_tasks.put(wtxn, rhs, &bitmap)?;
|
||||
}
|
||||
|
||||
let mut index_map = self
|
||||
.index_map
|
||||
.write()
|
||||
.map_err(|_| Error::CorruptedTaskQueue)?;
|
||||
|
||||
let lhs_index = index_map.remove(lhs).unwrap();
|
||||
let rhs_index = index_map.remove(rhs).unwrap();
|
||||
|
||||
index_map.insert(lhs.to_string(), rhs_index);
|
||||
index_map.insert(rhs.to_string(), lhs_index);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Batch::Cancel(_) => todo!(),
|
||||
Batch::Snapshot(_) => todo!(),
|
||||
Batch::Dump(_) => todo!(),
|
||||
Batch::Contiguous { tasks, kind } => {
|
||||
// it's safe because you can't batch 0 contiguous tasks.
|
||||
let first_task = &tasks[0];
|
||||
// and the two kind of tasks we batch MUST have ONE index name.
|
||||
let index_name = first_task.indexes().unwrap()[0];
|
||||
let index = self.index(index_name)?;
|
||||
|
||||
match kind {
|
||||
Kind::DocumentAddition => {
|
||||
let content_files = tasks.iter().map(|task| match &task.kind {
|
||||
KindWithContent::DocumentAddition { content_file, .. } => {
|
||||
content_file.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
});
|
||||
let results = index.update_documents(
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
None,
|
||||
self.file_store.clone(),
|
||||
content_files,
|
||||
)?;
|
||||
|
||||
for (task, result) in tasks.iter_mut().zip(results) {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
Kind::DocumentDeletion => {
|
||||
let ids: Vec<_> = tasks
|
||||
.iter()
|
||||
.flat_map(|task| match &task.kind {
|
||||
KindWithContent::DocumentDeletion { documents_ids, .. } => {
|
||||
documents_ids.clone()
|
||||
}
|
||||
k => unreachable!(
|
||||
"Internal error, `{:?}` is not supposed to be reachable here",
|
||||
k.as_kind()
|
||||
),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result = index.delete_documents(&ids);
|
||||
|
||||
for task in tasks.iter_mut() {
|
||||
task.finished_at = Some(OffsetDateTime::now_utc());
|
||||
match result {
|
||||
Ok(_) => task.status = Status::Succeeded,
|
||||
Err(_) => task.status = Status::Succeeded,
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Batch::Empty => todo!(),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify the scheduler there is or may be work to do.
|
||||
pub fn notify(&self) {
|
||||
self.wake_up.signal()
|
||||
}
|
||||
}
|
||||
pub use crate::index_scheduler::IndexScheduler;
|
||||
pub use error::Error;
|
||||
/// from the exterior you don't need to know there is multiple type of `Kind`
|
||||
pub use task::KindWithContent as TaskKind;
|
||||
/// from the exterior you don't need to know there is multiple type of `Task`
|
||||
pub use task::TaskView as Task;
|
||||
|
@ -1,9 +1,9 @@
|
||||
use anyhow::Result;
|
||||
use index::{Settings, Unchecked};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
use time::OffsetDateTime;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use std::{fmt::Write, path::PathBuf};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::TaskId;
|
||||
@ -17,6 +17,38 @@ pub enum Status {
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Error {
|
||||
message: String,
|
||||
code: String,
|
||||
#[serde(rename = "type")]
|
||||
kind: String,
|
||||
link: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
pub uid: TaskId,
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
#[serde(rename = "type")]
|
||||
pub kind: Kind,
|
||||
|
||||
pub details: Option<Details>,
|
||||
pub error: Option<Error>,
|
||||
|
||||
#[serde(serialize_with = "serialize_duration")]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Task {
|
||||
@ -29,8 +61,8 @@ pub struct Task {
|
||||
#[serde(with = "time::serde::rfc3339::option")]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
pub error: Option<String>,
|
||||
pub info: Option<String>,
|
||||
pub error: Option<Error>,
|
||||
pub details: Option<Details>,
|
||||
|
||||
pub status: Status,
|
||||
pub kind: KindWithContent,
|
||||
@ -51,6 +83,27 @@ impl Task {
|
||||
pub fn indexes(&self) -> Option<Vec<&str>> {
|
||||
self.kind.indexes()
|
||||
}
|
||||
|
||||
/// Convert a Task to a TaskView
|
||||
pub fn as_task_view(&self) -> TaskView {
|
||||
TaskView {
|
||||
uid: self.uid,
|
||||
index_uid: self
|
||||
.indexes()
|
||||
.and_then(|vec| vec.first().map(|i| i.to_string())),
|
||||
status: self.status,
|
||||
kind: self.kind.as_kind(),
|
||||
details: self.details.clone(),
|
||||
error: self.error.clone(),
|
||||
duration: self
|
||||
.started_at
|
||||
.zip(self.finished_at)
|
||||
.map(|(start, end)| end - start),
|
||||
enqueued_at: self.enqueued_at,
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@ -215,3 +268,81 @@ pub enum Kind {
|
||||
DumpExport,
|
||||
Snapshot,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum Details {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
DocumentAddition {
|
||||
received_documents: usize,
|
||||
indexed_documents: Option<u64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
Settings {
|
||||
#[serde(flatten)]
|
||||
settings: Settings<Unchecked>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
IndexInfo { primary_key: Option<String> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
DocumentDeletion {
|
||||
received_document_ids: usize,
|
||||
deleted_documents: Option<u64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
ClearAll { deleted_documents: Option<u64> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
Dump { dump_uid: String },
|
||||
}
|
||||
|
||||
/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for
|
||||
/// https://github.com/time-rs/time/issues/378.
|
||||
/// This code is a port of the old code of time that was removed in 0.2.
|
||||
fn serialize_duration<S: Serializer>(
|
||||
duration: &Option<Duration>,
|
||||
serializer: S,
|
||||
) -> Result<S::Ok, S::Error> {
|
||||
match duration {
|
||||
Some(duration) => {
|
||||
// technically speaking, negative duration is not valid ISO 8601
|
||||
if duration.is_negative() {
|
||||
return serializer.serialize_none();
|
||||
}
|
||||
|
||||
const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds();
|
||||
let secs = duration.whole_seconds();
|
||||
let days = secs / SECS_PER_DAY;
|
||||
let secs = secs - days * SECS_PER_DAY;
|
||||
let hasdate = days != 0;
|
||||
let nanos = duration.subsec_nanoseconds();
|
||||
let hastime = (secs != 0 || nanos != 0) || !hasdate;
|
||||
|
||||
// all the following unwrap can't fail
|
||||
let mut res = String::new();
|
||||
write!(&mut res, "P").unwrap();
|
||||
|
||||
if hasdate {
|
||||
write!(&mut res, "{}D", days).unwrap();
|
||||
}
|
||||
|
||||
const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds();
|
||||
const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds();
|
||||
|
||||
if hastime {
|
||||
if nanos == 0 {
|
||||
write!(&mut res, "T{}S", secs).unwrap();
|
||||
} else if nanos % NANOS_PER_MILLI == 0 {
|
||||
write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap();
|
||||
} else if nanos % NANOS_PER_MICRO == 0 {
|
||||
write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap();
|
||||
} else {
|
||||
write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
serializer.serialize_str(&res)
|
||||
}
|
||||
None => serializer.serialize_none(),
|
||||
}
|
||||
}
|
||||
|
@ -7,8 +7,8 @@ use milli::{
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{
|
||||
task::{Kind, Status},
|
||||
Error, IndexScheduler, Result, Task, TaskId,
|
||||
task::{Kind, Status, Task},
|
||||
Error, IndexScheduler, Result, TaskId,
|
||||
};
|
||||
|
||||
impl IndexScheduler {
|
||||
|
@ -55,6 +55,9 @@ tokio = { version = "1.21.2", features = ["full"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.2"
|
||||
whoami = { version = "1.2.3", optional = true }
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
index = { path = "../index" }
|
||||
file-store = { path = "../file-store" }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.7.0"
|
||||
|
@ -1,61 +0,0 @@
|
||||
use std::error::Error;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::internal_error;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::{error::MilliError, update_file_store};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexError {
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("Document `{0}` not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("{0}")]
|
||||
Facet(#[from] FacetError),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
IndexError: std::io::Error,
|
||||
milli::heed::Error,
|
||||
fst::Error,
|
||||
serde_json::Error,
|
||||
update_file_store::UpdateFileStoreError,
|
||||
milli::documents::Error
|
||||
);
|
||||
|
||||
impl ErrorCode for IndexError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexError::Internal(_) => Code::Internal,
|
||||
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
IndexError::Facet(e) => e.error_code(),
|
||||
IndexError::Milli(e) => MilliError(e).error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<milli::UserError> for IndexError {
|
||||
fn from(error: milli::UserError) -> IndexError {
|
||||
IndexError::Milli(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum FacetError {
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
}
|
||||
|
||||
impl ErrorCode for FacetError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
FacetError::InvalidExpression(_, _) => Code::Filter,
|
||||
}
|
||||
}
|
||||
}
|
@ -7,12 +7,8 @@ use tokio::task::JoinError;
|
||||
|
||||
use super::DocumentAdditionFormat;
|
||||
use crate::document_formats::DocumentFormatError;
|
||||
use crate::dump::error::DumpError;
|
||||
use crate::index::error::IndexError;
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::update_file_store::UpdateFileStoreError;
|
||||
|
||||
use crate::index_resolver::error::IndexResolverError;
|
||||
// use crate::dump::error::DumpError;
|
||||
use index::error::IndexError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexControllerError>;
|
||||
|
||||
@ -20,17 +16,15 @@ pub type Result<T> = std::result::Result<T, IndexControllerError>;
|
||||
pub enum IndexControllerError {
|
||||
#[error("Index creation must have an uid")]
|
||||
MissingUid,
|
||||
#[error("{0}")]
|
||||
IndexResolver(#[from] IndexResolverError),
|
||||
#[error("{0}")]
|
||||
#[error(transparent)]
|
||||
IndexResolver(#[from] index_scheduler::Error),
|
||||
#[error(transparent)]
|
||||
IndexError(#[from] IndexError),
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
TaskError(#[from] TaskError),
|
||||
#[error("{0}")]
|
||||
DumpError(#[from] DumpError),
|
||||
#[error("{0}")]
|
||||
// #[error("{0}")]
|
||||
// DumpError(#[from] DumpError),
|
||||
#[error(transparent)]
|
||||
DocumentFormatError(#[from] DocumentFormatError),
|
||||
#[error("A {0} payload is missing.")]
|
||||
MissingPayload(DocumentAdditionFormat),
|
||||
@ -38,7 +32,7 @@ pub enum IndexControllerError {
|
||||
PayloadTooLarge,
|
||||
}
|
||||
|
||||
internal_error!(IndexControllerError: JoinError, UpdateFileStoreError);
|
||||
internal_error!(IndexControllerError: JoinError, file_store::Error);
|
||||
|
||||
impl From<actix_web::error::PayloadError> for IndexControllerError {
|
||||
fn from(other: actix_web::error::PayloadError) -> Self {
|
||||
@ -53,20 +47,20 @@ impl ErrorCode for IndexControllerError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexControllerError::MissingUid => Code::BadRequest,
|
||||
IndexControllerError::IndexResolver(e) => e.error_code(),
|
||||
IndexControllerError::IndexError(e) => e.error_code(),
|
||||
IndexControllerError::Internal(_) => Code::Internal,
|
||||
IndexControllerError::TaskError(e) => e.error_code(),
|
||||
IndexControllerError::DocumentFormatError(e) => e.error_code(),
|
||||
IndexControllerError::MissingPayload(_) => Code::MissingPayload,
|
||||
IndexControllerError::PayloadTooLarge => Code::PayloadTooLarge,
|
||||
IndexControllerError::DumpError(e) => e.error_code(),
|
||||
IndexControllerError::IndexResolver(_) => todo!(),
|
||||
IndexControllerError::IndexError(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
impl From<IndexUidFormatError> for IndexControllerError {
|
||||
fn from(err: IndexUidFormatError) -> Self {
|
||||
IndexResolverError::from(err).into()
|
||||
index_scheduler::Error::from(err).into()
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
@ -1,4 +1,3 @@
|
||||
use meilisearch_auth::SearchRules;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::io::Cursor;
|
||||
@ -9,10 +8,14 @@ use std::time::Duration;
|
||||
|
||||
use actix_web::error::PayloadError;
|
||||
use bytes::Bytes;
|
||||
use file_store::FileStore;
|
||||
use futures::Stream;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use index_scheduler::TaskKind;
|
||||
use meilisearch_auth::SearchRules;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::sync::RwLock;
|
||||
@ -21,32 +24,19 @@ use tokio::time::sleep;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::document_formats::{read_csv, read_json, read_ndjson};
|
||||
use crate::dump::{self, load_dump, DumpHandler};
|
||||
use crate::index::{
|
||||
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
|
||||
};
|
||||
use crate::index_resolver::error::IndexResolverError;
|
||||
// use crate::dump::{self, load_dump, DumpHandler};
|
||||
use crate::options::{IndexerOpts, SchedulerConfig};
|
||||
use crate::snapshot::{load_snapshot, SnapshotService};
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId};
|
||||
use crate::tasks::{
|
||||
BatchHandler, EmptyBatchHandler, Scheduler, SnapshotHandler, TaskFilter, TaskStore,
|
||||
};
|
||||
use error::Result;
|
||||
use index::{
|
||||
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
|
||||
};
|
||||
|
||||
use self::error::IndexControllerError;
|
||||
use crate::index_resolver::index_store::{IndexStore, MapIndexStore};
|
||||
use crate::index_resolver::meta_store::{HeedMetaStore, IndexMetaStore};
|
||||
use crate::index_resolver::{create_index_resolver, IndexResolver};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub mod error;
|
||||
pub mod versioning;
|
||||
|
||||
/// Concrete implementation of the IndexController, exposed by meilisearch-lib
|
||||
pub type MeiliSearch = IndexController<HeedMetaStore, MapIndexStore>;
|
||||
|
||||
pub type Payload = Box<
|
||||
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
|
||||
>;
|
||||
@ -74,23 +64,9 @@ pub struct IndexSettings {
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub struct IndexController<U, I> {
|
||||
pub index_resolver: Arc<IndexResolver<U, I>>,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
task_store: TaskStore,
|
||||
pub update_file_store: UpdateFileStore,
|
||||
}
|
||||
|
||||
/// Need a custom implementation for clone because deriving require that U and I are clone.
|
||||
impl<U, I> Clone for IndexController<U, I> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
index_resolver: self.index_resolver.clone(),
|
||||
scheduler: self.scheduler.clone(),
|
||||
update_file_store: self.update_file_store.clone(),
|
||||
task_store: self.task_store.clone(),
|
||||
}
|
||||
}
|
||||
#[derive(Clone)]
|
||||
pub struct Meilisearch {
|
||||
index_scheduler: IndexScheduler,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -170,7 +146,7 @@ impl IndexControllerBuilder {
|
||||
db_path: impl AsRef<Path>,
|
||||
indexer_options: IndexerOpts,
|
||||
scheduler_config: SchedulerConfig,
|
||||
) -> anyhow::Result<MeiliSearch> {
|
||||
) -> anyhow::Result<Meilisearch> {
|
||||
let index_size = self
|
||||
.max_index_size
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
|
||||
@ -178,6 +154,8 @@ impl IndexControllerBuilder {
|
||||
.max_task_store_size
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
|
||||
|
||||
/*
|
||||
TODO: TAMO: enable dumps and snapshots to happens
|
||||
if let Some(ref path) = self.import_snapshot {
|
||||
log::info!("Loading from snapshot {:?}", path);
|
||||
load_snapshot(
|
||||
@ -203,47 +181,35 @@ impl IndexControllerBuilder {
|
||||
versioning::check_version_file(db_path.as_ref())?;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
std::fs::create_dir_all(db_path.as_ref())?;
|
||||
|
||||
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);
|
||||
|
||||
let update_file_store = UpdateFileStore::new(&db_path)?;
|
||||
let file_store = FileStore::new(&db_path)?;
|
||||
// Create or overwrite the version file for this DB
|
||||
versioning::create_version_file(db_path.as_ref())?;
|
||||
|
||||
let index_resolver = Arc::new(create_index_resolver(
|
||||
&db_path,
|
||||
let indexer_config = IndexerConfig {
|
||||
log_every_n: Some(indexer_options.log_every_n),
|
||||
max_nb_chunks: indexer_options.max_nb_chunks,
|
||||
documents_chunk_size: None,
|
||||
// TODO: TAMO: Fix this thing
|
||||
max_memory: None, // Some(indexer_options.max_indexing_memory.into()),
|
||||
chunk_compression_type: milli::CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
// TODO: TAMO: do something with the indexing_config.max_indexing_threads
|
||||
thread_pool: None,
|
||||
max_positions_per_attributes: None,
|
||||
};
|
||||
|
||||
let scheduler = IndexScheduler::new(
|
||||
db_path.as_ref().to_path_buf(),
|
||||
index_size,
|
||||
&indexer_options,
|
||||
meta_env.clone(),
|
||||
update_file_store.clone(),
|
||||
)?);
|
||||
|
||||
let dump_path = self
|
||||
.dump_dst
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?;
|
||||
|
||||
let dump_handler = Arc::new(DumpHandler::new(
|
||||
dump_path,
|
||||
db_path.as_ref().into(),
|
||||
update_file_store.clone(),
|
||||
task_store_size,
|
||||
index_size,
|
||||
meta_env.clone(),
|
||||
index_resolver.clone(),
|
||||
));
|
||||
let task_store = TaskStore::new(meta_env)?;
|
||||
|
||||
// register all the batch handlers for use with the scheduler.
|
||||
let handlers: Vec<Arc<dyn BatchHandler + Sync + Send + 'static>> = vec![
|
||||
index_resolver.clone(),
|
||||
dump_handler,
|
||||
Arc::new(SnapshotHandler),
|
||||
// dummy handler to catch all empty batches
|
||||
Arc::new(EmptyBatchHandler),
|
||||
];
|
||||
let scheduler = Scheduler::new(task_store.clone(), handlers, scheduler_config)?;
|
||||
indexer_config,
|
||||
file_store,
|
||||
);
|
||||
|
||||
if self.schedule_snapshot {
|
||||
let snapshot_period = self
|
||||
@ -265,11 +231,8 @@ impl IndexControllerBuilder {
|
||||
tokio::task::spawn_local(snapshot_service.run());
|
||||
}
|
||||
|
||||
Ok(IndexController {
|
||||
index_resolver,
|
||||
scheduler,
|
||||
update_file_store,
|
||||
task_store,
|
||||
Ok(Meilisearch {
|
||||
index_scheduler: scheduler,
|
||||
})
|
||||
}
|
||||
|
||||
@ -350,100 +313,13 @@ impl IndexControllerBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> IndexController<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
impl Meilisearch {
|
||||
pub fn builder() -> IndexControllerBuilder {
|
||||
IndexControllerBuilder::default()
|
||||
}
|
||||
|
||||
pub async fn register_update(&self, uid: String, update: Update) -> Result<Task> {
|
||||
let index_uid = IndexUid::from_str(&uid).map_err(IndexResolverError::from)?;
|
||||
let content = match update {
|
||||
Update::DeleteDocuments(ids) => TaskContent::DocumentDeletion {
|
||||
index_uid,
|
||||
deletion: DocumentDeletion::Ids(ids),
|
||||
},
|
||||
Update::ClearDocuments => TaskContent::DocumentDeletion {
|
||||
index_uid,
|
||||
deletion: DocumentDeletion::Clear,
|
||||
},
|
||||
Update::Settings {
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
} => TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
},
|
||||
Update::DocumentAddition {
|
||||
mut payload,
|
||||
primary_key,
|
||||
format,
|
||||
method,
|
||||
allow_index_creation,
|
||||
} => {
|
||||
let mut buffer = Vec::new();
|
||||
while let Some(bytes) = payload.next().await {
|
||||
let bytes = bytes?;
|
||||
buffer.extend_from_slice(&bytes);
|
||||
}
|
||||
let (content_uuid, mut update_file) = self.update_file_store.new_update()?;
|
||||
let documents_count = tokio::task::spawn_blocking(move || -> Result<_> {
|
||||
// check if the payload is empty, and return an error
|
||||
if buffer.is_empty() {
|
||||
return Err(IndexControllerError::MissingPayload(format));
|
||||
}
|
||||
|
||||
let reader = Cursor::new(buffer);
|
||||
let count = match format {
|
||||
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
|
||||
};
|
||||
|
||||
update_file.persist()?;
|
||||
|
||||
Ok(count)
|
||||
})
|
||||
.await??;
|
||||
|
||||
TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy: method,
|
||||
primary_key,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
}
|
||||
}
|
||||
Update::DeleteIndex => TaskContent::IndexDeletion { index_uid },
|
||||
Update::CreateIndex { primary_key } => TaskContent::IndexCreation {
|
||||
primary_key,
|
||||
index_uid,
|
||||
},
|
||||
Update::UpdateIndex { primary_key } => TaskContent::IndexUpdate {
|
||||
primary_key,
|
||||
index_uid,
|
||||
},
|
||||
};
|
||||
|
||||
let task = self.task_store.register(content).await?;
|
||||
self.scheduler.read().await.notify();
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn register_dump_task(&self) -> Result<Task> {
|
||||
let uid = dump::generate_uid();
|
||||
let content = TaskContent::Dump { uid };
|
||||
let task = self.task_store.register(content).await?;
|
||||
self.scheduler.read().await.notify();
|
||||
Ok(task)
|
||||
pub async fn register_task(&self, task: TaskKind) -> Result<Task> {
|
||||
Ok(self.index_scheduler.register(task).await?)
|
||||
}
|
||||
|
||||
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
|
||||
@ -652,6 +528,9 @@ fn clamp_to_page_size(size: usize) -> usize {
|
||||
size / page_size::get() * page_size::get()
|
||||
}
|
||||
|
||||
/*
|
||||
TODO: TAMO: uncomment this test
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use futures::future::ok;
|
||||
@ -669,22 +548,6 @@ mod test {
|
||||
|
||||
use super::*;
|
||||
|
||||
impl IndexController<MockIndexMetaStore, MockIndexStore> {
|
||||
pub fn mock(
|
||||
index_resolver: Arc<IndexResolver<MockIndexMetaStore, MockIndexStore>>,
|
||||
task_store: TaskStore,
|
||||
update_file_store: UpdateFileStore,
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
) -> Self {
|
||||
IndexController {
|
||||
index_resolver,
|
||||
task_store,
|
||||
update_file_store,
|
||||
scheduler,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_search_simple() {
|
||||
let index_uid = "test";
|
||||
@ -781,3 +644,4 @@ mod test {
|
||||
assert_eq!(r, result);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
@ -1,71 +0,0 @@
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::index_uid::IndexUidFormatError;
|
||||
use meilisearch_types::internal_error;
|
||||
use tokio::sync::mpsc::error::SendError as MpscSendError;
|
||||
use tokio::sync::oneshot::error::RecvError as OneshotRecvError;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{error::MilliError, index::error::IndexError, update_file_store::UpdateFileStoreError};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexResolverError>;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum IndexResolverError {
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
#[error("Index `{0}` already exists.")]
|
||||
IndexAlreadyExists(String),
|
||||
#[error("Index `{0}` not found.")]
|
||||
UnexistingIndex(String),
|
||||
#[error("A primary key is already present. It's impossible to update it")]
|
||||
ExistingPrimaryKey,
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("The creation of the `{0}` index has failed due to `Index uuid is already assigned`.")]
|
||||
UuidAlreadyExists(Uuid),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("{0}")]
|
||||
BadlyFormatted(#[from] IndexUidFormatError),
|
||||
}
|
||||
|
||||
impl<T> From<MpscSendError<T>> for IndexResolverError
|
||||
where
|
||||
T: Send + Sync + 'static + fmt::Debug,
|
||||
{
|
||||
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<OneshotRecvError> for IndexResolverError {
|
||||
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
IndexResolverError: milli::heed::Error,
|
||||
uuid::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
serde_json::Error,
|
||||
UpdateFileStoreError
|
||||
);
|
||||
|
||||
impl ErrorCode for IndexResolverError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexResolverError::IndexError(e) => e.error_code(),
|
||||
IndexResolverError::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
|
||||
IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound,
|
||||
IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
|
||||
IndexResolverError::Internal(_) => Code::Internal,
|
||||
IndexResolverError::UuidAlreadyExists(_) => Code::CreateIndex,
|
||||
IndexResolverError::Milli(e) => MilliError(e).error_code(),
|
||||
IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,223 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use milli::heed::types::{SerdeBincode, Str};
|
||||
use milli::heed::{CompactionOption, Database, Env};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexResolverError, Result};
|
||||
use crate::tasks::task::TaskId;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct DumpEntry {
|
||||
pub uid: String,
|
||||
pub index_meta: IndexMeta,
|
||||
}
|
||||
|
||||
const UUIDS_DB_PATH: &str = "index_uuids";
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, mockall::automock)]
|
||||
pub trait IndexMetaStore: Sized {
|
||||
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
||||
// the uuid otherwise.
|
||||
async fn get(&self, uid: String) -> Result<(String, Option<IndexMeta>)>;
|
||||
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>>;
|
||||
async fn list(&self) -> Result<Vec<(String, IndexMeta)>>;
|
||||
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()>;
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct IndexMeta {
|
||||
pub uuid: Uuid,
|
||||
pub creation_task_id: TaskId,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HeedMetaStore {
|
||||
env: Arc<Env>,
|
||||
db: Database<Str, SerdeBincode<IndexMeta>>,
|
||||
}
|
||||
|
||||
impl Drop for HeedMetaStore {
|
||||
fn drop(&mut self) {
|
||||
if Arc::strong_count(&self.env) == 1 {
|
||||
self.env.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HeedMetaStore {
|
||||
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
|
||||
let db = env.create_database(Some("uuids"))?;
|
||||
Ok(Self { env, db })
|
||||
}
|
||||
|
||||
fn get(&self, name: &str) -> Result<Option<IndexMeta>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
match db.get(&txn, name)? {
|
||||
Some(meta) => Ok(Some(meta)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
match db.get(&txn, &uid)? {
|
||||
Some(meta) => {
|
||||
db.delete(&mut txn, &uid)?;
|
||||
txn.commit()?;
|
||||
Ok(Some(meta))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
let mut entries = Vec::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (name, meta) = entry?;
|
||||
entries.push((name.to_string(), meta))
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub(crate) fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
|
||||
if db.get(&txn, &name)?.is_some() {
|
||||
return Err(IndexResolverError::IndexAlreadyExists(name));
|
||||
}
|
||||
|
||||
db.put(&mut txn, &name, &meta)?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
// Write transaction to acquire a lock on the database.
|
||||
let txn = self.env.write_txn()?;
|
||||
let mut entries = HashSet::new();
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (_, IndexMeta { uuid, .. }) = entry?;
|
||||
entries.insert(uuid);
|
||||
}
|
||||
|
||||
// only perform snapshot if there are indexes
|
||||
if !entries.is_empty() {
|
||||
path.push(UUIDS_DB_PATH);
|
||||
create_dir_all(&path).unwrap();
|
||||
path.push("data.mdb");
|
||||
self.env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
fn get_size(&self) -> Result<u64> {
|
||||
Ok(WalkDir::new(self.env.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len()))
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: PathBuf) -> Result<()> {
|
||||
let dump_path = path.join(UUIDS_DB_PATH);
|
||||
create_dir_all(&dump_path)?;
|
||||
let dump_file_path = dump_path.join("data.jsonl");
|
||||
let mut dump_file = File::create(&dump_file_path)?;
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (uid, index_meta) = entry?;
|
||||
let uid = uid.to_string();
|
||||
|
||||
let entry = DumpEntry { uid, index_meta };
|
||||
serde_json::to_writer(&mut dump_file, &entry)?;
|
||||
dump_file.write_all(b"\n").unwrap();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, env: Arc<milli::heed::Env>) -> Result<()> {
|
||||
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
|
||||
let indexes = File::open(&src_indexes)?;
|
||||
let mut indexes = BufReader::new(indexes);
|
||||
let mut line = String::new();
|
||||
|
||||
let db = Self::new(env)?;
|
||||
let mut txn = db.env.write_txn()?;
|
||||
|
||||
loop {
|
||||
match indexes.read_line(&mut line) {
|
||||
Ok(0) => break,
|
||||
Ok(_) => {
|
||||
let DumpEntry { uid, index_meta } = serde_json::from_str(&line)?;
|
||||
db.db.put(&mut txn, &uid, &index_meta)?;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
|
||||
line.clear();
|
||||
}
|
||||
txn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexMetaStore for HeedMetaStore {
|
||||
async fn get(&self, name: String) -> Result<(String, Option<IndexMeta>)> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.get(&name).map(|res| (name, res))).await?
|
||||
}
|
||||
|
||||
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.list()).await?
|
||||
}
|
||||
|
||||
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.insert(name, meta)).await?
|
||||
}
|
||||
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
|
||||
}
|
||||
|
||||
async fn get_size(&self) -> Result<u64> {
|
||||
self.get_size()
|
||||
}
|
||||
|
||||
async fn dump(&self, path: PathBuf) -> Result<()> {
|
||||
let this = self.clone();
|
||||
Ok(tokio::task::spawn_blocking(move || this.dump(path)).await??)
|
||||
}
|
||||
}
|
@ -1,685 +0,0 @@
|
||||
pub mod error;
|
||||
pub mod index_store;
|
||||
pub mod meta_store;
|
||||
|
||||
use std::convert::TryFrom;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use error::{IndexResolverError, Result};
|
||||
use index_store::{IndexStore, MapIndexStore};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meta_store::{HeedMetaStore, IndexMetaStore};
|
||||
use milli::heed::Env;
|
||||
use milli::update::{DocumentDeletionResult, IndexerConfig};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{error::Result as IndexResult, Index};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
use self::meta_store::IndexMeta;
|
||||
|
||||
pub type HardStateIndexResolver = IndexResolver<HeedMetaStore, MapIndexStore>;
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub use real::IndexResolver;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use test::MockIndexResolver as IndexResolver;
|
||||
|
||||
pub fn create_index_resolver(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
meta_env: Arc<milli::heed::Env>,
|
||||
file_store: UpdateFileStore,
|
||||
) -> anyhow::Result<HardStateIndexResolver> {
|
||||
let uuid_store = HeedMetaStore::new(meta_env)?;
|
||||
let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?;
|
||||
Ok(IndexResolver::new(uuid_store, index_store, file_store))
|
||||
}
|
||||
|
||||
mod real {
|
||||
use super::*;
|
||||
|
||||
pub struct IndexResolver<U, I> {
|
||||
pub(super) index_uuid_store: U,
|
||||
pub(super) index_store: I,
|
||||
pub(super) file_store: UpdateFileStore,
|
||||
}
|
||||
|
||||
impl IndexResolver<HeedMetaStore, MapIndexStore> {
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
env: Arc<Env>,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
HeedMetaStore::load_dump(&src, env)?;
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
let indexes = indexes_path.read_dir()?;
|
||||
let indexer_config = IndexerConfig::try_from(indexer_opts)?;
|
||||
for index in indexes {
|
||||
Index::load_dump(&index?.path(), &dst, index_db_size, &indexer_config)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> IndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self {
|
||||
Self {
|
||||
index_uuid_store,
|
||||
index_store,
|
||||
file_store,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) {
|
||||
fn get_content_uuid(task: &Task) -> Uuid {
|
||||
match task {
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} => *content_uuid,
|
||||
_ => panic!("unexpected task in the document addition batch"),
|
||||
}
|
||||
}
|
||||
|
||||
let content_uuids = tasks.iter().map(get_content_uuid).collect::<Vec<_>>();
|
||||
|
||||
match tasks.first() {
|
||||
Some(Task {
|
||||
id,
|
||||
content:
|
||||
TaskContent::DocumentAddition {
|
||||
merge_strategy,
|
||||
primary_key,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
..
|
||||
},
|
||||
..
|
||||
}) => {
|
||||
let primary_key = primary_key.clone();
|
||||
let method = *merge_strategy;
|
||||
|
||||
let index = if *allow_index_creation {
|
||||
self.get_or_create_index(index_uid.clone(), *id).await
|
||||
} else {
|
||||
self.get_index(index_uid.as_str().to_string()).await
|
||||
};
|
||||
|
||||
// If the index doesn't exist and we are not allowed to create it with the first
|
||||
// task, we must fails the whole batch.
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let index = match index {
|
||||
Ok(index) => index,
|
||||
Err(e) => {
|
||||
let error = ResponseError::from(e);
|
||||
for task in tasks.iter_mut() {
|
||||
task.events.push(TaskEvent::Failed {
|
||||
error: error.clone(),
|
||||
timestamp: now,
|
||||
});
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let file_store = self.file_store.clone();
|
||||
let result = spawn_blocking(move || {
|
||||
index.update_documents(
|
||||
method,
|
||||
primary_key,
|
||||
file_store,
|
||||
content_uuids.into_iter(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(Ok(results)) => {
|
||||
for (task, result) in tasks.iter_mut().zip(results) {
|
||||
let event = match result {
|
||||
Ok(addition) => {
|
||||
TaskEvent::succeeded(TaskResult::DocumentAddition {
|
||||
indexed_documents: addition.indexed_documents,
|
||||
})
|
||||
}
|
||||
Err(error) => {
|
||||
TaskEvent::failed(IndexResolverError::from(error))
|
||||
}
|
||||
};
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
let event = TaskEvent::failed(e);
|
||||
for task in tasks.iter_mut() {
|
||||
task.events.push(event.clone());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let event = TaskEvent::failed(IndexResolverError::from(e));
|
||||
for task in tasks.iter_mut() {
|
||||
task.events.push(event.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => panic!("invalid batch!"),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> {
|
||||
self.file_store.delete(content_uuid).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_task_inner(&self, task: &Task) -> Result<TaskResult> {
|
||||
match &task.content {
|
||||
TaskContent::DocumentAddition { .. } => {
|
||||
panic!("updates should be handled by batch")
|
||||
}
|
||||
TaskContent::DocumentDeletion {
|
||||
deletion: DocumentDeletion::Ids(ids),
|
||||
index_uid,
|
||||
} => {
|
||||
let ids = ids.clone();
|
||||
let index = self.get_index(index_uid.clone().into_inner()).await?;
|
||||
|
||||
let DocumentDeletionResult {
|
||||
deleted_documents, ..
|
||||
} = spawn_blocking(move || index.delete_documents(&ids)).await??;
|
||||
|
||||
Ok(TaskResult::DocumentDeletion { deleted_documents })
|
||||
}
|
||||
TaskContent::DocumentDeletion {
|
||||
deletion: DocumentDeletion::Clear,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = self.get_index(index_uid.clone().into_inner()).await?;
|
||||
let deleted_documents = spawn_blocking(move || -> IndexResult<u64> {
|
||||
let number_documents = index.stats()?.number_of_documents;
|
||||
index.clear_documents()?;
|
||||
Ok(number_documents)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(TaskResult::ClearAll { deleted_documents })
|
||||
}
|
||||
TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = if *is_deletion || !*allow_index_creation {
|
||||
self.get_index(index_uid.clone().into_inner()).await?
|
||||
} else {
|
||||
self.get_or_create_index(index_uid.clone(), task.id).await?
|
||||
};
|
||||
|
||||
let settings = settings.clone();
|
||||
spawn_blocking(move || index.update_settings(&settings.check())).await??;
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
TaskContent::IndexDeletion { index_uid } => {
|
||||
let index = self.delete_index(index_uid.clone().into_inner()).await?;
|
||||
|
||||
let deleted_documents = spawn_blocking(move || -> IndexResult<u64> {
|
||||
Ok(index.stats()?.number_of_documents)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(TaskResult::ClearAll { deleted_documents })
|
||||
}
|
||||
TaskContent::IndexCreation {
|
||||
primary_key,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = self.create_index(index_uid.clone(), task.id).await?;
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
let primary_key = primary_key.clone();
|
||||
spawn_blocking(move || index.update_primary_key(primary_key)).await??;
|
||||
}
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
TaskContent::IndexUpdate {
|
||||
primary_key,
|
||||
index_uid,
|
||||
} => {
|
||||
let index = self.get_index(index_uid.clone().into_inner()).await?;
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
let primary_key = primary_key.clone();
|
||||
spawn_blocking(move || index.update_primary_key(primary_key)).await??;
|
||||
}
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
_ => unreachable!("Invalid task for index resolver"),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn process_task(&self, task: &mut Task) {
|
||||
match self.process_task_inner(task).await {
|
||||
Ok(res) => task.events.push(TaskEvent::succeeded(res)),
|
||||
Err(e) => task.events.push(TaskEvent::failed(e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
for (_, index) in self.list().await? {
|
||||
index.dump(&path)?;
|
||||
}
|
||||
self.index_uuid_store.dump(path.as_ref().to_owned()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_index(&self, uid: IndexUid, creation_task_id: TaskId) -> Result<Index> {
|
||||
match self.index_uuid_store.get(uid.into_inner()).await? {
|
||||
(uid, Some(_)) => Err(IndexResolverError::IndexAlreadyExists(uid)),
|
||||
(uid, None) => {
|
||||
let uuid = Uuid::new_v4();
|
||||
let index = self.index_store.create(uuid).await?;
|
||||
match self
|
||||
.index_uuid_store
|
||||
.insert(
|
||||
uid,
|
||||
IndexMeta {
|
||||
uuid,
|
||||
creation_task_id,
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(e) => {
|
||||
match self.index_store.delete(uuid).await {
|
||||
Ok(Some(index)) => {
|
||||
index.close();
|
||||
}
|
||||
Ok(None) => (),
|
||||
Err(e) => log::error!("Error while deleting index: {:?}", e),
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
Ok(()) => Ok(index),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create an index with name `uid`.
|
||||
pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result<Index> {
|
||||
match self.create_index(uid, task_id).await {
|
||||
Ok(index) => Ok(index),
|
||||
Err(IndexResolverError::IndexAlreadyExists(uid)) => self.get_index(uid).await,
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
|
||||
let uuids = self.index_uuid_store.list().await?;
|
||||
let mut indexes = Vec::new();
|
||||
for (name, IndexMeta { uuid, .. }) in uuids {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => indexes.push((name, index)),
|
||||
None => {
|
||||
// we found an unexisting index, we remove it from the uuid store
|
||||
let _ = self.index_uuid_store.delete(name).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(indexes)
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: String) -> Result<Index> {
|
||||
match self.index_uuid_store.delete(uid.clone()).await? {
|
||||
Some(IndexMeta { uuid, .. }) => match self.index_store.delete(uuid).await? {
|
||||
Some(index) => {
|
||||
index.clone().close();
|
||||
Ok(index)
|
||||
}
|
||||
None => Err(IndexResolverError::UnexistingIndex(uid)),
|
||||
},
|
||||
None => Err(IndexResolverError::UnexistingIndex(uid)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<Index> {
|
||||
match self.index_uuid_store.get(uid).await? {
|
||||
(name, Some(IndexMeta { uuid, .. })) => {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => Ok(index),
|
||||
None => {
|
||||
// For some reason we got a uuid to an unexisting index, we return an error,
|
||||
// and remove the uuid from the uuid store.
|
||||
let _ = self.index_uuid_store.delete(name.clone()).await;
|
||||
Err(IndexResolverError::UnexistingIndex(name))
|
||||
}
|
||||
}
|
||||
}
|
||||
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result<TaskId> {
|
||||
let (uid, meta) = self.index_uuid_store.get(index_uid).await?;
|
||||
meta.map(
|
||||
|IndexMeta {
|
||||
creation_task_id, ..
|
||||
}| creation_task_id,
|
||||
)
|
||||
.ok_or(IndexResolverError::UnexistingIndex(uid))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::index::IndexStats;
|
||||
|
||||
use super::index_store::MockIndexStore;
|
||||
use super::meta_store::MockIndexMetaStore;
|
||||
use super::*;
|
||||
|
||||
use futures::future::ok;
|
||||
use milli::FieldDistribution;
|
||||
use nelson::Mocker;
|
||||
|
||||
pub enum MockIndexResolver<U, I> {
|
||||
Real(super::real::IndexResolver<U, I>),
|
||||
Mock(Mocker),
|
||||
}
|
||||
|
||||
impl MockIndexResolver<HeedMetaStore, MapIndexStore> {
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
env: Arc<Env>,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
super::real::IndexResolver::load_dump(src, dst, index_db_size, env, indexer_opts)
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> MockIndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self {
|
||||
Self::Real(super::real::IndexResolver {
|
||||
index_uuid_store,
|
||||
index_store,
|
||||
file_store,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(mocker)
|
||||
}
|
||||
|
||||
pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.process_document_addition_batch(tasks).await,
|
||||
IndexResolver::Mock(m) => unsafe {
|
||||
m.get("process_document_addition_batch").call(tasks)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn process_task(&self, task: &mut Task) {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.process_task(task).await,
|
||||
IndexResolver::Mock(m) => unsafe { m.get("process_task").call(task) },
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.dump(path).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create an index with name `uid`.
|
||||
pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result<Index> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.get_or_create_index(uid, task_id).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.list().await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: String) -> Result<Index> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.delete_index(uid).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<Index> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.get_index(uid).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result<TaskId> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.get_index_creation_task_id(index_uid).await,
|
||||
IndexResolver::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> {
|
||||
match self {
|
||||
IndexResolver::Real(r) => r.delete_content_file(content_uuid).await,
|
||||
IndexResolver::Mock(m) => unsafe {
|
||||
m.get("delete_content_file").call(content_uuid)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_remove_unknown_index() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store
|
||||
.expect_delete()
|
||||
.once()
|
||||
.returning(|_| Box::pin(ok(None)));
|
||||
|
||||
let index_store = MockIndexStore::new();
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Failed { .. }));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_remove_index() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store.expect_delete().once().returning(|_| {
|
||||
Box::pin(ok(Some(IndexMeta {
|
||||
uuid: Uuid::new_v4(),
|
||||
creation_task_id: 1,
|
||||
})))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_delete().once().returning(|_| {
|
||||
let mocker = Mocker::default();
|
||||
mocker.when::<(), ()>("close").then(|_| ());
|
||||
mocker
|
||||
.when::<(), IndexResult<IndexStats>>("stats")
|
||||
.then(|_| {
|
||||
Ok(IndexStats {
|
||||
size: 10,
|
||||
number_of_documents: 10,
|
||||
is_indexing: None,
|
||||
field_distribution: FieldDistribution::default(),
|
||||
})
|
||||
});
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexDeletion {
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_delete_documents() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store.expect_get().once().returning(|_| {
|
||||
Box::pin(ok((
|
||||
"test".to_string(),
|
||||
Some(IndexMeta {
|
||||
uuid: Uuid::new_v4(),
|
||||
creation_task_id: 1,
|
||||
}),
|
||||
)))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_get().once().returning(|_| {
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when::<(), IndexResult<()>>("clear_documents")
|
||||
.once()
|
||||
.then(|_| Ok(()));
|
||||
mocker
|
||||
.when::<(), IndexResult<IndexStats>>("stats")
|
||||
.once()
|
||||
.then(|_| {
|
||||
Ok(IndexStats {
|
||||
size: 10,
|
||||
number_of_documents: 10,
|
||||
is_indexing: None,
|
||||
field_distribution: FieldDistribution::default(),
|
||||
})
|
||||
});
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::DocumentDeletion {
|
||||
deletion: DocumentDeletion::Clear,
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_index_update() {
|
||||
let mut meta_store = MockIndexMetaStore::new();
|
||||
meta_store.expect_get().once().returning(|_| {
|
||||
Box::pin(ok((
|
||||
"test".to_string(),
|
||||
Some(IndexMeta {
|
||||
uuid: Uuid::new_v4(),
|
||||
creation_task_id: 1,
|
||||
}),
|
||||
)))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_get().once().returning(|_| {
|
||||
let mocker = Mocker::default();
|
||||
|
||||
mocker
|
||||
.when::<String, IndexResult<crate::index::IndexMeta>>("update_primary_key")
|
||||
.once()
|
||||
.then(|_| {
|
||||
Ok(crate::index::IndexMeta {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
updated_at: OffsetDateTime::now_utc(),
|
||||
primary_key: Some("key".to_string()),
|
||||
})
|
||||
});
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, file_store);
|
||||
|
||||
let mut task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::IndexUpdate {
|
||||
primary_key: Some("key".to_string()),
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
index_resolver.process_task(&mut task).await;
|
||||
|
||||
assert!(matches!(task.events[0], TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
}
|
@ -3,24 +3,23 @@ pub mod error;
|
||||
pub mod options;
|
||||
|
||||
mod analytics;
|
||||
mod document_formats;
|
||||
// TODO: TAMO: reenable the dumps
|
||||
#[cfg(todo)]
|
||||
mod dump;
|
||||
pub mod index;
|
||||
pub mod index_controller;
|
||||
mod index_resolver;
|
||||
mod index_controller;
|
||||
mod snapshot;
|
||||
pub mod tasks;
|
||||
mod update_file_store;
|
||||
|
||||
use std::env::VarError;
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
pub use index_controller::MeiliSearch;
|
||||
// TODO: TAMO: rename the MeiliSearch in Meilisearch
|
||||
pub use index_controller::Meilisearch as MeiliSearch;
|
||||
pub use milli;
|
||||
pub use milli::heed;
|
||||
|
||||
mod compression;
|
||||
pub mod document_formats;
|
||||
|
||||
/// Check if a db is empty. It does not provide any information on the
|
||||
/// validity of the data in it.
|
||||
|
@ -15,7 +15,7 @@ use walkdir::WalkDir;
|
||||
use crate::compression::from_tar_gz;
|
||||
use crate::index_controller::open_meta_env;
|
||||
use crate::index_controller::versioning::VERSION_FILE_NAME;
|
||||
use crate::tasks::Scheduler;
|
||||
use index_scheduler::IndexScheduler;
|
||||
|
||||
pub struct SnapshotService {
|
||||
pub(crate) db_path: PathBuf,
|
||||
@ -23,7 +23,7 @@ pub struct SnapshotService {
|
||||
pub(crate) snapshot_path: PathBuf,
|
||||
pub(crate) index_size: usize,
|
||||
pub(crate) meta_env_size: usize,
|
||||
pub(crate) scheduler: Arc<RwLock<Scheduler>>,
|
||||
pub(crate) scheduler: IndexScheduler,
|
||||
}
|
||||
|
||||
impl SnapshotService {
|
||||
@ -39,7 +39,8 @@ impl SnapshotService {
|
||||
meta_env_size: self.meta_env_size,
|
||||
index_size: self.index_size,
|
||||
};
|
||||
self.scheduler.write().await.schedule_snapshot(snapshot_job);
|
||||
// TODO: TAMO: reenable the snapshots
|
||||
// self.scheduler.write().await.schedule_snapshot(snapshot_job);
|
||||
sleep(self.snapshot_period).await;
|
||||
}
|
||||
}
|
||||
|
@ -1,75 +0,0 @@
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::snapshot::SnapshotJob;
|
||||
|
||||
use super::task::{Task, TaskEvent};
|
||||
|
||||
pub type BatchId = u32;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BatchContent {
|
||||
DocumentsAdditionBatch(Vec<Task>),
|
||||
IndexUpdate(Task),
|
||||
Dump(Task),
|
||||
Snapshot(SnapshotJob),
|
||||
// Symbolizes a empty batch. This can occur when we were woken, but there wasn't any work to do.
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl BatchContent {
|
||||
pub fn first(&self) -> Option<&Task> {
|
||||
match self {
|
||||
BatchContent::DocumentsAdditionBatch(ts) => ts.first(),
|
||||
BatchContent::Dump(t) | BatchContent::IndexUpdate(t) => Some(t),
|
||||
BatchContent::Snapshot(_) | BatchContent::Empty => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_event(&mut self, event: TaskEvent) {
|
||||
match self {
|
||||
BatchContent::DocumentsAdditionBatch(ts) => {
|
||||
ts.iter_mut().for_each(|t| t.events.push(event.clone()))
|
||||
}
|
||||
BatchContent::IndexUpdate(t) | BatchContent::Dump(t) => t.events.push(event),
|
||||
BatchContent::Snapshot(_) | BatchContent::Empty => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Batch {
|
||||
// Only batches that contains a persistent tasks are given an id. Snapshot batches don't have
|
||||
// an id.
|
||||
pub id: Option<BatchId>,
|
||||
pub created_at: OffsetDateTime,
|
||||
pub content: BatchContent,
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
pub fn new(id: Option<BatchId>, content: BatchContent) -> Self {
|
||||
Self {
|
||||
id,
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
content,
|
||||
}
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
match self.content {
|
||||
BatchContent::DocumentsAdditionBatch(ref ts) => ts.len(),
|
||||
BatchContent::IndexUpdate(_) | BatchContent::Dump(_) | BatchContent::Snapshot(_) => 1,
|
||||
BatchContent::Empty => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
id: None,
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
content: BatchContent::Empty,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::internal_error;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use crate::update_file_store::UpdateFileStoreError;
|
||||
|
||||
use super::task::TaskId;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, TaskError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum TaskError {
|
||||
#[error("Task `{0}` not found.")]
|
||||
UnexistingTask(TaskId),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
TaskError: milli::heed::Error,
|
||||
JoinError,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
UpdateFileStoreError
|
||||
);
|
||||
|
||||
impl ErrorCode for TaskError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
TaskError::UnexistingTask(_) => Code::TaskNotFound,
|
||||
TaskError::Internal(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,132 +0,0 @@
|
||||
use crate::dump::DumpHandler;
|
||||
use crate::index_resolver::index_store::IndexStore;
|
||||
use crate::index_resolver::meta_store::IndexMetaStore;
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::task::{Task, TaskContent, TaskEvent, TaskResult};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<U, I> BatchHandler for DumpHandler<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(batch.content, BatchContent::Dump { .. })
|
||||
}
|
||||
|
||||
async fn process_batch(&self, mut batch: Batch) -> Batch {
|
||||
match &batch.content {
|
||||
BatchContent::Dump(Task {
|
||||
content: TaskContent::Dump { uid },
|
||||
..
|
||||
}) => {
|
||||
match self.run(uid.clone()).await {
|
||||
Ok(_) => {
|
||||
batch
|
||||
.content
|
||||
.push_event(TaskEvent::succeeded(TaskResult::Other));
|
||||
}
|
||||
Err(e) => batch.content.push_event(TaskEvent::failed(e)),
|
||||
}
|
||||
batch
|
||||
}
|
||||
_ => unreachable!("invalid batch content for dump"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn finish(&self, _: &Batch) {}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::dump::error::{DumpError, Result as DumpResult};
|
||||
use crate::index_resolver::{index_store::MockIndexStore, meta_store::MockIndexMetaStore};
|
||||
use crate::tasks::handlers::test::task_to_batch;
|
||||
|
||||
use super::*;
|
||||
|
||||
use nelson::Mocker;
|
||||
use proptest::prelude::*;
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn finish_does_nothing(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let dump_handler = DumpHandler::<MockIndexMetaStore, MockIndexStore>::mock(mocker);
|
||||
|
||||
dump_handler.finish(&batch).await;
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_handle_dump_success(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let batch = task_to_batch(task);
|
||||
let should_accept = matches!(batch.content, BatchContent::Dump { .. });
|
||||
|
||||
let mocker = Mocker::default();
|
||||
if should_accept {
|
||||
mocker.when::<String, DumpResult<()>>("run")
|
||||
.once()
|
||||
.then(|_| Ok(()));
|
||||
}
|
||||
|
||||
let dump_handler = DumpHandler::<MockIndexMetaStore, MockIndexStore>::mock(mocker);
|
||||
|
||||
let accept = dump_handler.accept(&batch);
|
||||
assert_eq!(accept, should_accept);
|
||||
|
||||
if accept {
|
||||
let batch = dump_handler.process_batch(batch).await;
|
||||
let last_event = batch.content.first().unwrap().events.last().unwrap();
|
||||
assert!(matches!(last_event, TaskEvent::Succeeded { .. }));
|
||||
}
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_handle_dump_error(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let batch = task_to_batch(task);
|
||||
let should_accept = matches!(batch.content, BatchContent::Dump { .. });
|
||||
|
||||
let mocker = Mocker::default();
|
||||
if should_accept {
|
||||
mocker.when::<String, DumpResult<()>>("run")
|
||||
.once()
|
||||
.then(|_| Err(DumpError::Internal("error".into())));
|
||||
}
|
||||
|
||||
let dump_handler = DumpHandler::<MockIndexMetaStore, MockIndexStore>::mock(mocker);
|
||||
|
||||
let accept = dump_handler.accept(&batch);
|
||||
assert_eq!(accept, should_accept);
|
||||
|
||||
if accept {
|
||||
let batch = dump_handler.process_batch(batch).await;
|
||||
let last_event = batch.content.first().unwrap().events.last().unwrap();
|
||||
assert!(matches!(last_event, TaskEvent::Failed { .. }));
|
||||
}
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
/// A sink handler for empty tasks.
|
||||
pub struct EmptyBatchHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl BatchHandler for EmptyBatchHandler {
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(batch.content, BatchContent::Empty)
|
||||
}
|
||||
|
||||
async fn process_batch(&self, batch: Batch) -> Batch {
|
||||
batch
|
||||
}
|
||||
|
||||
async fn finish(&self, _: &Batch) {}
|
||||
}
|
@ -1,199 +0,0 @@
|
||||
use crate::index_resolver::IndexResolver;
|
||||
use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore};
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<U, I> BatchHandler for IndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Send + Sync + 'static,
|
||||
I: IndexStore + Send + Sync + 'static,
|
||||
{
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(
|
||||
batch.content,
|
||||
BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_)
|
||||
)
|
||||
}
|
||||
|
||||
async fn process_batch(&self, mut batch: Batch) -> Batch {
|
||||
match batch.content {
|
||||
BatchContent::DocumentsAdditionBatch(ref mut tasks) => {
|
||||
self.process_document_addition_batch(tasks).await;
|
||||
}
|
||||
BatchContent::IndexUpdate(ref mut task) => {
|
||||
self.process_task(task).await;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
batch
|
||||
}
|
||||
|
||||
async fn finish(&self, batch: &Batch) {
|
||||
if let BatchContent::DocumentsAdditionBatch(ref tasks) = batch.content {
|
||||
for task in tasks {
|
||||
if let Some(content_uuid) = task.get_content_uuid() {
|
||||
if let Err(e) = self.delete_content_file(content_uuid).await {
|
||||
log::error!("error deleting update file: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::index_resolver::index_store::MapIndexStore;
|
||||
use crate::index_resolver::meta_store::HeedMetaStore;
|
||||
use crate::index_resolver::{
|
||||
error::Result as IndexResult, index_store::MockIndexStore, meta_store::MockIndexMetaStore,
|
||||
};
|
||||
use crate::tasks::{
|
||||
handlers::test::task_to_batch,
|
||||
task::{Task, TaskContent},
|
||||
};
|
||||
use crate::update_file_store::{Result as FileStoreResult, UpdateFileStore};
|
||||
|
||||
use super::*;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use nelson::Mocker;
|
||||
use proptest::prelude::*;
|
||||
use uuid::Uuid;
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_accept_task(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
let index_store = MockIndexStore::new();
|
||||
let meta_store = MockIndexMetaStore::new();
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store);
|
||||
|
||||
match batch.content {
|
||||
BatchContent::DocumentsAdditionBatch(_)
|
||||
| BatchContent::IndexUpdate(_) => assert!(index_resolver.accept(&batch)),
|
||||
BatchContent::Dump(_)
|
||||
| BatchContent::Snapshot(_)
|
||||
| BatchContent::Empty => assert!(!index_resolver.accept(&batch)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn finisher_called_on_document_update() {
|
||||
let index_store = MockIndexStore::new();
|
||||
let meta_store = MockIndexMetaStore::new();
|
||||
let mocker = Mocker::default();
|
||||
let content_uuid = Uuid::new_v4();
|
||||
mocker
|
||||
.when::<Uuid, FileStoreResult<()>>("delete")
|
||||
.once()
|
||||
.then(move |uuid| {
|
||||
assert_eq!(uuid, content_uuid);
|
||||
Ok(())
|
||||
});
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store);
|
||||
|
||||
let task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy: IndexDocumentsMethod::ReplaceDocuments,
|
||||
primary_key: None,
|
||||
documents_count: 100,
|
||||
allow_index_creation: true,
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
index_resolver.finish(&batch).await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[should_panic]
|
||||
async fn panic_when_passed_unsupported_batch() {
|
||||
let index_store = MockIndexStore::new();
|
||||
let meta_store = MockIndexMetaStore::new();
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store);
|
||||
|
||||
let task = Task {
|
||||
id: 1,
|
||||
content: TaskContent::Dump {
|
||||
uid: String::from("hello"),
|
||||
},
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
index_resolver.process_batch(batch).await;
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn index_document_task_deletes_update_file(
|
||||
task in any::<Task>(),
|
||||
) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let mocker = Mocker::default();
|
||||
|
||||
if let TaskContent::DocumentAddition{ .. } = task.content {
|
||||
mocker.when::<Uuid, IndexResult<()>>("delete_content_file").then(|_| Ok(()));
|
||||
}
|
||||
|
||||
let index_resolver: IndexResolver<HeedMetaStore, MapIndexStore> = IndexResolver::mock(mocker);
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
index_resolver.finish(&batch).await;
|
||||
});
|
||||
|
||||
rt.block_on(handle).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_handle_batch(task in any::<Task>()) {
|
||||
let rt = tokio::runtime::Runtime::new().unwrap();
|
||||
let handle = rt.spawn(async {
|
||||
let mocker = Mocker::default();
|
||||
match task.content {
|
||||
TaskContent::DocumentAddition { .. } => {
|
||||
mocker.when::<&mut [Task], ()>("process_document_addition_batch").then(|_| ());
|
||||
}
|
||||
TaskContent::Dump { .. } => (),
|
||||
_ => {
|
||||
mocker.when::<&mut Task, ()>("process_task").then(|_| ());
|
||||
}
|
||||
}
|
||||
let index_resolver: IndexResolver<HeedMetaStore, MapIndexStore> = IndexResolver::mock(mocker);
|
||||
|
||||
|
||||
let batch = task_to_batch(task);
|
||||
|
||||
if index_resolver.accept(&batch) {
|
||||
index_resolver.process_batch(batch).await;
|
||||
}
|
||||
});
|
||||
|
||||
if let Err(e) = rt.block_on(handle) {
|
||||
if e.is_panic() {
|
||||
std::panic::resume_unwind(e.into_panic());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
pub mod dump_handler;
|
||||
pub mod empty_handler;
|
||||
mod index_resolver_handler;
|
||||
pub mod snapshot_handler;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::tasks::{
|
||||
batch::{Batch, BatchContent},
|
||||
task::{Task, TaskContent},
|
||||
};
|
||||
|
||||
pub fn task_to_batch(task: Task) -> Batch {
|
||||
let content = match task.content {
|
||||
TaskContent::DocumentAddition { .. } => {
|
||||
BatchContent::DocumentsAdditionBatch(vec![task])
|
||||
}
|
||||
TaskContent::DocumentDeletion { .. }
|
||||
| TaskContent::SettingsUpdate { .. }
|
||||
| TaskContent::IndexDeletion { .. }
|
||||
| TaskContent::IndexCreation { .. }
|
||||
| TaskContent::IndexUpdate { .. } => BatchContent::IndexUpdate(task),
|
||||
TaskContent::Dump { .. } => BatchContent::Dump(task),
|
||||
};
|
||||
|
||||
Batch {
|
||||
id: Some(1),
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
content,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::BatchHandler;
|
||||
|
||||
pub struct SnapshotHandler;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl BatchHandler for SnapshotHandler {
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(batch.content, BatchContent::Snapshot(_))
|
||||
}
|
||||
|
||||
async fn process_batch(&self, batch: Batch) -> Batch {
|
||||
match batch.content {
|
||||
BatchContent::Snapshot(job) => {
|
||||
if let Err(e) = job.run().await {
|
||||
log::error!("snapshot error: {e}");
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
Batch::empty()
|
||||
}
|
||||
|
||||
async fn finish(&self, _: &Batch) {}
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
pub use handlers::empty_handler::EmptyBatchHandler;
|
||||
pub use handlers::snapshot_handler::SnapshotHandler;
|
||||
pub use scheduler::Scheduler;
|
||||
pub use task_store::TaskFilter;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use task_store::test::MockTaskStore as TaskStore;
|
||||
#[cfg(not(test))]
|
||||
pub use task_store::TaskStore;
|
||||
|
||||
use batch::Batch;
|
||||
use error::Result;
|
||||
|
||||
pub mod batch;
|
||||
pub mod error;
|
||||
mod handlers;
|
||||
mod scheduler;
|
||||
pub mod task;
|
||||
mod task_store;
|
||||
pub mod update_loop;
|
||||
|
||||
#[cfg_attr(test, mockall::automock(type Error=test::DebugError;))]
|
||||
#[async_trait]
|
||||
pub trait BatchHandler: Sync + Send + 'static {
|
||||
/// return whether this handler can accept this batch
|
||||
fn accept(&self, batch: &Batch) -> bool;
|
||||
|
||||
/// Processes the `Task` batch returning the batch with the `Task` updated.
|
||||
///
|
||||
/// It is ok for this function to panic if a batch is handed that hasn't been verified by
|
||||
/// `accept` beforehand.
|
||||
async fn process_batch(&self, batch: Batch) -> Batch;
|
||||
|
||||
/// `finish` is called when the result of `process` has been committed to the task store. This
|
||||
/// method can be used to perform cleanup after the update has been completed for example.
|
||||
async fn finish(&self, batch: &Batch);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DebugError;
|
||||
|
||||
impl Display for DebugError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("an error")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for DebugError {}
|
||||
}
|
@ -1,609 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::slice;
|
||||
use std::sync::Arc;
|
||||
|
||||
use atomic_refcell::AtomicRefCell;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use time::OffsetDateTime;
|
||||
use tokio::sync::{watch, RwLock};
|
||||
|
||||
use crate::options::SchedulerConfig;
|
||||
use crate::snapshot::SnapshotJob;
|
||||
|
||||
use super::batch::{Batch, BatchContent};
|
||||
use super::error::Result;
|
||||
use super::task::{Task, TaskContent, TaskEvent, TaskId};
|
||||
use super::update_loop::UpdateLoop;
|
||||
use super::{BatchHandler, TaskFilter, TaskStore};
|
||||
|
||||
#[derive(Eq, Debug, Clone, Copy)]
|
||||
enum TaskType {
|
||||
DocumentAddition { number: usize },
|
||||
DocumentUpdate { number: usize },
|
||||
IndexUpdate,
|
||||
Dump,
|
||||
}
|
||||
|
||||
/// Two tasks are equal if they have the same type.
|
||||
impl PartialEq for TaskType {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
matches!(
|
||||
(self, other),
|
||||
(Self::DocumentAddition { .. }, Self::DocumentAddition { .. })
|
||||
| (Self::DocumentUpdate { .. }, Self::DocumentUpdate { .. })
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, Debug, Clone, Copy)]
|
||||
struct PendingTask {
|
||||
kind: TaskType,
|
||||
id: TaskId,
|
||||
}
|
||||
|
||||
impl PartialEq for PendingTask {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.id.eq(&other.id)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for PendingTask {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for PendingTask {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.id.cmp(&other.id).reverse()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TaskList {
|
||||
id: TaskListIdentifier,
|
||||
tasks: BinaryHeap<PendingTask>,
|
||||
}
|
||||
|
||||
impl Deref for TaskList {
|
||||
type Target = BinaryHeap<PendingTask>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.tasks
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for TaskList {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.tasks
|
||||
}
|
||||
}
|
||||
|
||||
impl TaskList {
|
||||
fn new(id: TaskListIdentifier) -> Self {
|
||||
Self {
|
||||
id,
|
||||
tasks: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for TaskList {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.id == other.id
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for TaskList {}
|
||||
|
||||
impl Ord for TaskList {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
match (&self.id, &other.id) {
|
||||
(TaskListIdentifier::Index(_), TaskListIdentifier::Index(_)) => {
|
||||
match (self.peek(), other.peek()) {
|
||||
(None, None) => Ordering::Equal,
|
||||
(None, Some(_)) => Ordering::Less,
|
||||
(Some(_), None) => Ordering::Greater,
|
||||
(Some(lhs), Some(rhs)) => lhs.cmp(rhs),
|
||||
}
|
||||
}
|
||||
(TaskListIdentifier::Index(_), TaskListIdentifier::Dump) => Ordering::Less,
|
||||
(TaskListIdentifier::Dump, TaskListIdentifier::Index(_)) => Ordering::Greater,
|
||||
(TaskListIdentifier::Dump, TaskListIdentifier::Dump) => {
|
||||
unreachable!("There should be only one Dump task list")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for TaskList {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Hash, Debug, Clone)]
|
||||
enum TaskListIdentifier {
|
||||
Index(String),
|
||||
Dump,
|
||||
}
|
||||
|
||||
impl From<&Task> for TaskListIdentifier {
|
||||
fn from(task: &Task) -> Self {
|
||||
match &task.content {
|
||||
TaskContent::DocumentAddition { index_uid, .. }
|
||||
| TaskContent::DocumentDeletion { index_uid, .. }
|
||||
| TaskContent::SettingsUpdate { index_uid, .. }
|
||||
| TaskContent::IndexDeletion { index_uid }
|
||||
| TaskContent::IndexCreation { index_uid, .. }
|
||||
| TaskContent::IndexUpdate { index_uid, .. } => {
|
||||
TaskListIdentifier::Index(index_uid.as_str().to_string())
|
||||
}
|
||||
TaskContent::Dump { .. } => TaskListIdentifier::Dump,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct TaskQueue {
|
||||
/// Maps index uids to their TaskList, for quick access
|
||||
index_tasks: HashMap<TaskListIdentifier, Arc<AtomicRefCell<TaskList>>>,
|
||||
/// A queue that orders TaskList by the priority of their fist update
|
||||
queue: BinaryHeap<Arc<AtomicRefCell<TaskList>>>,
|
||||
}
|
||||
|
||||
impl TaskQueue {
|
||||
fn insert(&mut self, task: Task) {
|
||||
let id = task.id;
|
||||
let uid = TaskListIdentifier::from(&task);
|
||||
|
||||
let kind = match task.content {
|
||||
TaskContent::DocumentAddition {
|
||||
documents_count,
|
||||
merge_strategy: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..
|
||||
} => TaskType::DocumentAddition {
|
||||
number: documents_count,
|
||||
},
|
||||
TaskContent::DocumentAddition {
|
||||
documents_count,
|
||||
merge_strategy: IndexDocumentsMethod::UpdateDocuments,
|
||||
..
|
||||
} => TaskType::DocumentUpdate {
|
||||
number: documents_count,
|
||||
},
|
||||
TaskContent::Dump { .. } => TaskType::Dump,
|
||||
TaskContent::DocumentDeletion { .. }
|
||||
| TaskContent::SettingsUpdate { .. }
|
||||
| TaskContent::IndexDeletion { .. }
|
||||
| TaskContent::IndexCreation { .. }
|
||||
| TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate,
|
||||
_ => unreachable!("unhandled task type"),
|
||||
};
|
||||
let task = PendingTask { kind, id };
|
||||
|
||||
match self.index_tasks.entry(uid) {
|
||||
Entry::Occupied(entry) => {
|
||||
// A task list already exists for this index, all we have to to is to push the new
|
||||
// update to the end of the list. This won't change the order since ids are
|
||||
// monotonically increasing.
|
||||
let mut list = entry.get().borrow_mut();
|
||||
|
||||
// We only need the first element to be lower than the one we want to
|
||||
// insert to preserve the order in the queue.
|
||||
assert!(list.peek().map(|old_id| id >= old_id.id).unwrap_or(true));
|
||||
|
||||
list.push(task);
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let mut task_list = TaskList::new(entry.key().clone());
|
||||
task_list.push(task);
|
||||
let task_list = Arc::new(AtomicRefCell::new(task_list));
|
||||
entry.insert(task_list.clone());
|
||||
self.queue.push(task_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Passes a context with a view to the task list of the next index to schedule. It is
|
||||
/// guaranteed that the first id from task list will be the lowest pending task id.
|
||||
fn head_mut<R>(&mut self, mut f: impl FnMut(&mut TaskList) -> R) -> Option<R> {
|
||||
let head = self.queue.pop()?;
|
||||
let result = {
|
||||
let mut ref_head = head.borrow_mut();
|
||||
f(&mut *ref_head)
|
||||
};
|
||||
if !head.borrow().tasks.is_empty() {
|
||||
// After being mutated, the head is reinserted to the correct position.
|
||||
self.queue.push(head);
|
||||
} else {
|
||||
self.index_tasks.remove(&head.borrow().id);
|
||||
}
|
||||
|
||||
Some(result)
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.queue.is_empty() && self.index_tasks.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scheduler {
|
||||
// TODO: currently snapshots are non persistent tasks, and are treated differently.
|
||||
snapshots: VecDeque<SnapshotJob>,
|
||||
tasks: TaskQueue,
|
||||
|
||||
store: TaskStore,
|
||||
processing: Processing,
|
||||
next_fetched_task_id: TaskId,
|
||||
config: SchedulerConfig,
|
||||
/// Notifies the update loop that a new task was received
|
||||
notifier: watch::Sender<()>,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub fn new(
|
||||
store: TaskStore,
|
||||
performers: Vec<Arc<dyn BatchHandler + Sync + Send + 'static>>,
|
||||
config: SchedulerConfig,
|
||||
) -> Result<Arc<RwLock<Self>>> {
|
||||
let (notifier, rcv) = watch::channel(());
|
||||
|
||||
let this = Self {
|
||||
snapshots: VecDeque::new(),
|
||||
tasks: TaskQueue::default(),
|
||||
|
||||
store,
|
||||
processing: Processing::Nothing,
|
||||
next_fetched_task_id: 0,
|
||||
config,
|
||||
notifier,
|
||||
};
|
||||
|
||||
// Notify update loop to start processing pending updates immediately after startup.
|
||||
this.notify();
|
||||
|
||||
let this = Arc::new(RwLock::new(this));
|
||||
|
||||
let update_loop = UpdateLoop::new(this.clone(), performers, rcv);
|
||||
|
||||
tokio::task::spawn_local(update_loop.run());
|
||||
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
fn register_task(&mut self, task: Task) {
|
||||
assert!(!task.is_finished());
|
||||
self.tasks.insert(task);
|
||||
}
|
||||
|
||||
/// Clears the processing list, this method should be called when the processing of a batch is finished.
|
||||
pub fn finish(&mut self) {
|
||||
self.processing = Processing::Nothing;
|
||||
}
|
||||
|
||||
pub fn notify(&self) {
|
||||
let _ = self.notifier.send(());
|
||||
}
|
||||
|
||||
fn notify_if_not_empty(&self) {
|
||||
if !self.snapshots.is_empty() || !self.tasks.is_empty() {
|
||||
self.notify();
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_tasks(&self, content: BatchContent) -> Result<BatchContent> {
|
||||
match content {
|
||||
BatchContent::DocumentsAdditionBatch(tasks) => {
|
||||
let tasks = self.store.update_tasks(tasks).await?;
|
||||
Ok(BatchContent::DocumentsAdditionBatch(tasks))
|
||||
}
|
||||
BatchContent::IndexUpdate(t) => {
|
||||
let mut tasks = self.store.update_tasks(vec![t]).await?;
|
||||
Ok(BatchContent::IndexUpdate(tasks.remove(0)))
|
||||
}
|
||||
BatchContent::Dump(t) => {
|
||||
let mut tasks = self.store.update_tasks(vec![t]).await?;
|
||||
Ok(BatchContent::Dump(tasks.remove(0)))
|
||||
}
|
||||
other => Ok(other),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
|
||||
self.store.get_task(id, filter).await
|
||||
}
|
||||
|
||||
pub async fn list_tasks(
|
||||
&self,
|
||||
offset: Option<TaskId>,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Task>> {
|
||||
self.store.list_tasks(offset, filter, limit).await
|
||||
}
|
||||
|
||||
pub async fn get_processing_tasks(&self) -> Result<Vec<Task>> {
|
||||
let mut tasks = Vec::new();
|
||||
|
||||
for id in self.processing.ids() {
|
||||
let task = self.store.get_task(id, None).await?;
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub fn schedule_snapshot(&mut self, job: SnapshotJob) {
|
||||
self.snapshots.push_back(job);
|
||||
self.notify();
|
||||
}
|
||||
|
||||
async fn fetch_pending_tasks(&mut self) -> Result<()> {
|
||||
self.store
|
||||
.fetch_unfinished_tasks(Some(self.next_fetched_task_id))
|
||||
.await?
|
||||
.into_iter()
|
||||
.for_each(|t| {
|
||||
self.next_fetched_task_id = t.id + 1;
|
||||
self.register_task(t);
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prepare the next batch, and set `processing` to the ids in that batch.
|
||||
pub async fn prepare(&mut self) -> Result<Batch> {
|
||||
// If there is a job to process, do it first.
|
||||
if let Some(job) = self.snapshots.pop_front() {
|
||||
// There is more work to do, notify the update loop
|
||||
self.notify_if_not_empty();
|
||||
let batch = Batch::new(None, BatchContent::Snapshot(job));
|
||||
return Ok(batch);
|
||||
}
|
||||
|
||||
// Try to fill the queue with pending tasks.
|
||||
self.fetch_pending_tasks().await?;
|
||||
|
||||
self.processing = make_batch(&mut self.tasks, &self.config);
|
||||
|
||||
log::debug!("prepared batch with {} tasks", self.processing.len());
|
||||
|
||||
if !self.processing.is_nothing() {
|
||||
let (processing, mut content) = self
|
||||
.store
|
||||
.get_processing_tasks(std::mem::take(&mut self.processing))
|
||||
.await?;
|
||||
|
||||
// The batch id is the id of the first update it contains. At this point we must have a
|
||||
// valid batch that contains at least 1 task.
|
||||
let id = match content.first() {
|
||||
Some(Task { id, .. }) => *id,
|
||||
_ => panic!("invalid batch"),
|
||||
};
|
||||
|
||||
content.push_event(TaskEvent::Batched {
|
||||
batch_id: id,
|
||||
timestamp: OffsetDateTime::now_utc(),
|
||||
});
|
||||
|
||||
self.processing = processing;
|
||||
|
||||
let batch = Batch::new(Some(id), content);
|
||||
|
||||
// There is more work to do, notify the update loop
|
||||
self.notify_if_not_empty();
|
||||
|
||||
Ok(batch)
|
||||
} else {
|
||||
Ok(Batch::empty())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Processing {
|
||||
DocumentAdditions(Vec<TaskId>),
|
||||
IndexUpdate(TaskId),
|
||||
Dump(TaskId),
|
||||
/// Variant used when there is nothing to process.
|
||||
Nothing,
|
||||
}
|
||||
|
||||
impl Default for Processing {
|
||||
fn default() -> Self {
|
||||
Self::Nothing
|
||||
}
|
||||
}
|
||||
|
||||
enum ProcessingIter<'a> {
|
||||
Many(slice::Iter<'a, TaskId>),
|
||||
Single(Option<TaskId>),
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ProcessingIter<'a> {
|
||||
type Item = TaskId;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self {
|
||||
ProcessingIter::Many(iter) => iter.next().copied(),
|
||||
ProcessingIter::Single(val) => val.take(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Processing {
|
||||
fn is_nothing(&self) -> bool {
|
||||
matches!(self, Processing::Nothing)
|
||||
}
|
||||
|
||||
pub fn ids(&self) -> impl Iterator<Item = TaskId> + '_ {
|
||||
match self {
|
||||
Processing::DocumentAdditions(v) => ProcessingIter::Many(v.iter()),
|
||||
Processing::IndexUpdate(id) | Processing::Dump(id) => ProcessingIter::Single(Some(*id)),
|
||||
Processing::Nothing => ProcessingIter::Single(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Processing::DocumentAdditions(v) => v.len(),
|
||||
Processing::IndexUpdate(_) | Processing::Dump(_) => 1,
|
||||
Processing::Nothing => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
fn make_batch(tasks: &mut TaskQueue, config: &SchedulerConfig) -> Processing {
|
||||
let mut doc_count = 0;
|
||||
tasks
|
||||
.head_mut(|list| match list.peek().copied() {
|
||||
Some(PendingTask {
|
||||
kind: TaskType::IndexUpdate,
|
||||
id,
|
||||
}) => {
|
||||
list.pop();
|
||||
Processing::IndexUpdate(id)
|
||||
}
|
||||
Some(PendingTask {
|
||||
kind: TaskType::Dump,
|
||||
id,
|
||||
}) => {
|
||||
list.pop();
|
||||
Processing::Dump(id)
|
||||
}
|
||||
Some(PendingTask { kind, .. }) => {
|
||||
let mut task_list = Vec::new();
|
||||
loop {
|
||||
match list.peek() {
|
||||
Some(pending) if pending.kind == kind => {
|
||||
// We always need to process at least one task for the scheduler to make progress.
|
||||
if config.disable_auto_batching && !task_list.is_empty() {
|
||||
break;
|
||||
}
|
||||
let pending = list.pop().unwrap();
|
||||
task_list.push(pending.id);
|
||||
|
||||
// We add the number of documents to the count if we are scheduling document additions.
|
||||
match pending.kind {
|
||||
TaskType::DocumentUpdate { number }
|
||||
| TaskType::DocumentAddition { number } => {
|
||||
doc_count += number;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
Processing::DocumentAdditions(task_list)
|
||||
}
|
||||
None => Processing::Nothing,
|
||||
})
|
||||
.unwrap_or(Processing::Nothing)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::tasks::task::TaskContent;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn gen_task(id: TaskId, content: TaskContent) -> Task {
|
||||
Task {
|
||||
id,
|
||||
content,
|
||||
events: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn register_updates_multiples_indexes() {
|
||||
let mut queue = TaskQueue::default();
|
||||
queue.insert(gen_task(0, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1") }));
|
||||
queue.insert(gen_task(1, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") }));
|
||||
queue.insert(gen_task(2, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") }));
|
||||
queue.insert(gen_task(3, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") }));
|
||||
queue.insert(gen_task(4, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1") }));
|
||||
queue.insert(gen_task(5, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1") }));
|
||||
queue.insert(gen_task(6, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2") }));
|
||||
|
||||
let test1_tasks = queue
|
||||
.head_mut(|tasks| tasks.drain().map(|t| t.id).collect::<Vec<_>>())
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(test1_tasks, &[0, 4, 5]);
|
||||
|
||||
let test2_tasks = queue
|
||||
.head_mut(|tasks| tasks.drain().map(|t| t.id).collect::<Vec<_>>())
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(test2_tasks, &[1, 2, 3, 6]);
|
||||
|
||||
assert!(queue.index_tasks.is_empty());
|
||||
assert!(queue.queue.is_empty());
|
||||
}
|
||||
|
||||
fn gen_doc_addition_task_content(index_uid: &str) -> TaskContent {
|
||||
TaskContent::DocumentAddition {
|
||||
content_uuid: Uuid::new_v4(),
|
||||
merge_strategy: IndexDocumentsMethod::ReplaceDocuments,
|
||||
primary_key: Some("test".to_string()),
|
||||
documents_count: 0,
|
||||
allow_index_creation: true,
|
||||
index_uid: IndexUid::new_unchecked(index_uid),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn test_make_batch() {
|
||||
let mut queue = TaskQueue::default();
|
||||
queue.insert(gen_task(0, gen_doc_addition_task_content("test1")));
|
||||
queue.insert(gen_task(1, gen_doc_addition_task_content("test2")));
|
||||
queue.insert(gen_task(2, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test2")}));
|
||||
queue.insert(gen_task(3, gen_doc_addition_task_content("test2")));
|
||||
queue.insert(gen_task(4, gen_doc_addition_task_content("test1")));
|
||||
queue.insert(gen_task(5, TaskContent::IndexDeletion { index_uid: IndexUid::new_unchecked("test1")}));
|
||||
queue.insert(gen_task(6, gen_doc_addition_task_content("test2")));
|
||||
queue.insert(gen_task(7, gen_doc_addition_task_content("test1")));
|
||||
queue.insert(gen_task(8, TaskContent::Dump { uid: "adump".to_owned() }));
|
||||
|
||||
let config = SchedulerConfig::default();
|
||||
|
||||
// Make sure that the dump is processed before everybody else.
|
||||
let batch = make_batch(&mut queue, &config);
|
||||
assert_eq!(batch, Processing::Dump(8));
|
||||
|
||||
let batch = make_batch(&mut queue, &config);
|
||||
assert_eq!(batch, Processing::DocumentAdditions(vec![0, 4]));
|
||||
|
||||
let batch = make_batch(&mut queue, &config);
|
||||
assert_eq!(batch, Processing::DocumentAdditions(vec![1]));
|
||||
|
||||
let batch = make_batch(&mut queue, &config);
|
||||
assert_eq!(batch, Processing::IndexUpdate(2));
|
||||
|
||||
let batch = make_batch(&mut queue, &config);
|
||||
assert_eq!(batch, Processing::DocumentAdditions(vec![3, 6]));
|
||||
|
||||
let batch = make_batch(&mut queue, &config);
|
||||
assert_eq!(batch, Processing::IndexUpdate(5));
|
||||
|
||||
let batch = make_batch(&mut queue, &config);
|
||||
assert_eq!(batch, Processing::DocumentAdditions(vec![7]));
|
||||
|
||||
assert!(queue.is_empty());
|
||||
}
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use time::OffsetDateTime;
|
||||
use tokio::sync::{watch, RwLock};
|
||||
|
||||
use super::batch::Batch;
|
||||
use super::error::Result;
|
||||
use super::{BatchHandler, Scheduler};
|
||||
use crate::tasks::task::TaskEvent;
|
||||
|
||||
/// The update loop sequentially performs batches of updates by asking the scheduler for a batch,
|
||||
/// and handing it to the `TaskPerformer`.
|
||||
pub struct UpdateLoop {
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
||||
|
||||
notifier: Option<watch::Receiver<()>>,
|
||||
}
|
||||
|
||||
impl UpdateLoop {
|
||||
pub fn new(
|
||||
scheduler: Arc<RwLock<Scheduler>>,
|
||||
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
||||
notifier: watch::Receiver<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
scheduler,
|
||||
performers,
|
||||
notifier: Some(notifier),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
let mut notifier = self.notifier.take().unwrap();
|
||||
|
||||
loop {
|
||||
if notifier.changed().await.is_err() {
|
||||
break;
|
||||
}
|
||||
|
||||
if let Err(e) = self.process_next_batch().await {
|
||||
log::error!("an error occurred while processing an update batch: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_next_batch(&self) -> Result<()> {
|
||||
let mut batch = { self.scheduler.write().await.prepare().await? };
|
||||
let performer = self
|
||||
.performers
|
||||
.iter()
|
||||
.find(|p| p.accept(&batch))
|
||||
.expect("No performer found for batch")
|
||||
.clone();
|
||||
|
||||
batch
|
||||
.content
|
||||
.push_event(TaskEvent::Processing(OffsetDateTime::now_utc()));
|
||||
|
||||
batch.content = {
|
||||
self.scheduler
|
||||
.read()
|
||||
.await
|
||||
.update_tasks(batch.content)
|
||||
.await?
|
||||
};
|
||||
|
||||
let batch = performer.process_batch(batch).await;
|
||||
|
||||
self.handle_batch_result(batch, performer).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handles the result from a processed batch.
|
||||
///
|
||||
/// When a task is processed, the result of the process is pushed to its event list. The
|
||||
/// `handle_batch_result` make sure that the new state is saved to the store.
|
||||
/// The tasks are then removed from the processing queue.
|
||||
async fn handle_batch_result(
|
||||
&self,
|
||||
mut batch: Batch,
|
||||
performer: Arc<dyn BatchHandler + Sync + Send + 'static>,
|
||||
) -> Result<()> {
|
||||
let mut scheduler = self.scheduler.write().await;
|
||||
let content = scheduler.update_tasks(batch.content).await?;
|
||||
scheduler.finish();
|
||||
drop(scheduler);
|
||||
batch.content = content;
|
||||
performer.finish(&batch).await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -1,258 +0,0 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{self, BufReader, BufWriter, Write};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use milli::documents::DocumentsBatchReader;
|
||||
use serde_json::Map;
|
||||
use tempfile::{NamedTempFile, PersistError};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub use store::UpdateFileStore;
|
||||
#[cfg(test)]
|
||||
pub use test::MockUpdateFileStore as UpdateFileStore;
|
||||
|
||||
const UPDATE_FILES_PATH: &str = "updates/updates_files";
|
||||
|
||||
use crate::document_formats::read_ndjson;
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
file: NamedTempFile,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("Error while persisting update to disk: {0}")]
|
||||
pub struct UpdateFileStoreError(Box<dyn std::error::Error + Sync + Send + 'static>);
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UpdateFileStoreError>;
|
||||
|
||||
macro_rules! into_update_store_error {
|
||||
($($other:path),*) => {
|
||||
$(
|
||||
impl From<$other> for UpdateFileStoreError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
into_update_store_error!(
|
||||
PersistError,
|
||||
io::Error,
|
||||
serde_json::Error,
|
||||
milli::documents::Error,
|
||||
milli::documents::DocumentsBatchCursorError
|
||||
);
|
||||
|
||||
impl UpdateFile {
|
||||
pub fn persist(self) -> Result<()> {
|
||||
self.file.persist(&self.path)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for UpdateFile {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for UpdateFile {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
mod store {
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateFileStore {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateFileStore {
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH);
|
||||
let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
|
||||
// No update files to load
|
||||
if !src_update_files_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
create_dir_all(&dst_update_files_path)?;
|
||||
|
||||
let entries = std::fs::read_dir(src_update_files_path)?;
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry?;
|
||||
let update_file = BufReader::new(File::open(entry.path())?);
|
||||
let file_uuid = entry.file_name();
|
||||
let file_uuid = file_uuid
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
|
||||
let dst_path = dst_update_files_path.join(file_uuid);
|
||||
let dst_file = BufWriter::new(File::create(dst_path)?);
|
||||
read_ndjson(update_file, dst_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&path)?;
|
||||
Ok(Self { path })
|
||||
}
|
||||
|
||||
/// Creates a new temporary update file.
|
||||
/// A call to `persist` is needed to persist the file in the database.
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
let file = NamedTempFile::new_in(&self.path)?;
|
||||
let uuid = Uuid::new_v4();
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = UpdateFile { file, path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let file = File::open(path)?;
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let src = self.path.join(uuid.to_string());
|
||||
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(uuid.to_string());
|
||||
std::fs::copy(src, dst)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Peforms a dump of the given update file uuid into the provided dump path.
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_string = uuid.to_string();
|
||||
let update_file_path = self.path.join(&uuid_string);
|
||||
let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(&uuid_string);
|
||||
|
||||
let update_file = File::open(update_file_path)?;
|
||||
let mut dst_file = NamedTempFile::new_in(&dump_path)?;
|
||||
let (mut document_cursor, index) =
|
||||
DocumentsBatchReader::from_reader(update_file)?.into_cursor_and_fields_index();
|
||||
|
||||
let mut document_buffer = Map::new();
|
||||
// TODO: we need to find a way to do this more efficiently. (create a custom serializer
|
||||
// for jsonl for example...)
|
||||
while let Some(document) = document_cursor.next_document()? {
|
||||
for (field_id, content) in document.iter() {
|
||||
if let Some(field_name) = index.name(field_id) {
|
||||
let content = serde_json::from_slice(content)?;
|
||||
document_buffer.insert(field_name.to_string(), content);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut dst_file, &document_buffer)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
document_buffer.clear();
|
||||
}
|
||||
|
||||
dst_file.persist(dst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
Ok(self.get_update(uuid)?.metadata()?.len())
|
||||
}
|
||||
|
||||
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
tokio::fs::remove_file(path).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use nelson::Mocker;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum MockUpdateFileStore {
|
||||
Real(store::UpdateFileStore),
|
||||
Mock(Arc<Mocker>),
|
||||
}
|
||||
|
||||
impl MockUpdateFileStore {
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(Arc::new(mocker))
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
store::UpdateFileStore::load_dump(src, dst)
|
||||
}
|
||||
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
store::UpdateFileStore::new(path).map(Self::Real)
|
||||
}
|
||||
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.new_update(),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.get_update(uuid),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.snapshot(uuid, dst),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.dump(uuid, dump_path),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.get_size(uuid),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.delete(uuid).await,
|
||||
MockUpdateFileStore::Mock(mocker) => unsafe { mocker.get("delete").call(uuid) },
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user