From aa50acb0314dcf4cc8ee696e1d4088c41e4d907a Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 16 May 2022 19:50:45 +0200 Subject: [PATCH 01/33] make Task index_uid an option Not all task relate to an index. Tasks that don't have an index_uid set to None --- meilisearch-http/src/task.rs | 8 ++-- .../index_controller/dump_actor/compat/v3.rs | 2 +- meilisearch-lib/src/index_controller/mod.rs | 11 ++++-- meilisearch-lib/src/index_resolver/mod.rs | 19 +++++----- meilisearch-lib/src/tasks/scheduler.rs | 5 ++- meilisearch-lib/src/tasks/task.rs | 6 ++- meilisearch-lib/src/tasks/task_store/mod.rs | 26 +++++++++---- meilisearch-lib/src/tasks/task_store/store.rs | 37 +++++++++++++++---- 8 files changed, 79 insertions(+), 35 deletions(-) diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index 7179b10db..c8e269e56 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -137,7 +137,7 @@ fn serialize_duration( #[serde(rename_all = "camelCase")] pub struct TaskView { uid: TaskId, - index_uid: String, + index_uid: Option, status: TaskStatus, #[serde(rename = "type")] task_type: TaskType, @@ -313,7 +313,7 @@ impl From for TaskView { Self { uid: id, - index_uid: index_uid.into_inner(), + index_uid: index_uid.map(|u| u.into_inner()), status, task_type, details, @@ -342,7 +342,7 @@ impl From> for TaskListView { #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { uid: TaskId, - index_uid: String, + index_uid: Option, status: TaskStatus, #[serde(rename = "type")] task_type: TaskType, @@ -365,7 +365,7 @@ impl From for SummarizedTaskView { Self { uid: other.id, - index_uid: other.index_uid.to_string(), + index_uid: other.index_uid.map(|u| u.into_inner()), status: TaskStatus::Enqueued, task_type: other.content.into(), enqueued_at, diff --git a/meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs b/meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs index 7cd670bad..befd70963 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs @@ -187,7 +187,7 @@ impl From<(UpdateStatus, String, TaskId)> for Task { // Dummy task let mut task = Task { id: task_id, - index_uid: IndexUid::new(uid).unwrap(), + index_uid: Some(IndexUid::new(uid).unwrap()), content: TaskContent::IndexDeletion, events: Vec::new(), }; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index a302f12da..7ba91dfca 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -419,7 +419,7 @@ where Update::UpdateIndex { primary_key } => TaskContent::IndexUpdate { primary_key }, }; - let task = self.task_store.register(uid, content).await?; + let task = self.task_store.register(Some(uid), content).await?; self.scheduler.read().await.notify(); Ok(task) @@ -569,7 +569,12 @@ where // Check if the currently indexing update is from our index. let is_indexing = processing_tasks .first() - .map(|task| task.index_uid.as_str() == uid) + .map(|task| { + task.index_uid + .as_ref() + .map(|u| u.as_str() == uid) + .unwrap_or(false) + }) .unwrap_or_default(); let index = self.index_resolver.get_index(uid).await?; @@ -605,7 +610,7 @@ where // Check if the currently indexing update is from our index. stats.is_indexing = processing_tasks .first() - .map(|p| p.index_uid.as_str() == index_uid) + .and_then(|p| p.index_uid.as_ref().map(|u| u.as_str() == index_uid)) .or(Some(false)); indexes.insert(index_uid, stats); diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index 8ca3efdc6..9db808d3f 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -204,7 +204,7 @@ where match batch.tasks.first() { Some(Task { - index_uid, + index_uid: Some(ref index_uid), id, content: TaskContent::DocumentAddition { @@ -285,7 +285,7 @@ where TaskContent::DocumentAddition { .. } => panic!("updates should be handled by batch"), TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) => { let ids = ids.clone(); - let index = self.get_index(index_uid.into_inner()).await?; + let index = self.get_index(index_uid.unwrap().into_inner()).await?; let DocumentDeletionResult { deleted_documents, .. @@ -294,7 +294,7 @@ where Ok(TaskResult::DocumentDeletion { deleted_documents }) } TaskContent::DocumentDeletion(DocumentDeletion::Clear) => { - let index = self.get_index(index_uid.into_inner()).await?; + let index = self.get_index(index_uid.unwrap().into_inner()).await?; let deleted_documents = spawn_blocking(move || -> IndexResult { let number_documents = index.stats()?.number_of_documents; index.clear_documents()?; @@ -310,9 +310,10 @@ where allow_index_creation, } => { let index = if *is_deletion || !*allow_index_creation { - self.get_index(index_uid.into_inner()).await? + self.get_index(index_uid.unwrap().into_inner()).await? } else { - self.get_or_create_index(index_uid, task.id).await? + self.get_or_create_index(index_uid.unwrap(), task.id) + .await? }; let settings = settings.clone(); @@ -321,7 +322,7 @@ where Ok(TaskResult::Other) } TaskContent::IndexDeletion => { - let index = self.delete_index(index_uid.into_inner()).await?; + let index = self.delete_index(index_uid.unwrap().into_inner()).await?; let deleted_documents = spawn_blocking(move || -> IndexResult { Ok(index.stats()?.number_of_documents) @@ -331,7 +332,7 @@ where Ok(TaskResult::ClearAll { deleted_documents }) } TaskContent::IndexCreation { primary_key } => { - let index = self.create_index(index_uid, task.id).await?; + let index = self.create_index(index_uid.unwrap(), task.id).await?; if let Some(primary_key) = primary_key { let primary_key = primary_key.clone(); @@ -341,7 +342,7 @@ where Ok(TaskResult::Other) } TaskContent::IndexUpdate { primary_key } => { - let index = self.get_index(index_uid.into_inner()).await?; + let index = self.get_index(index_uid.unwrap().into_inner()).await?; if let Some(primary_key) = primary_key { let primary_key = primary_key.clone(); @@ -503,7 +504,7 @@ mod test { proptest! { #[test] fn test_process_task( - task in any::(), + task in any::().prop_filter("uid must be Some", |t| t.index_uid.is_some()), index_exists in any::(), index_op_fails in any::(), any_int in any::(), diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 0e540a646..94de2a5fd 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -125,7 +125,8 @@ struct TaskQueue { impl TaskQueue { fn insert(&mut self, task: Task) { - let uid = task.index_uid.into_inner(); + // TODO(marin): The index uid should be remaped to a task queue identifier here + let uid = task.index_uid.unwrap().into_inner(); let id = task.id; let kind = match task.content { TaskContent::DocumentAddition { @@ -443,7 +444,7 @@ mod test { fn gen_task(id: TaskId, index_uid: &str, content: TaskContent) -> Task { Task { id, - index_uid: IndexUid::new_unchecked(index_uid), + index_uid: Some(IndexUid::new_unchecked(index_uid)), content, events: vec![], } diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs index ecbd4ca62..d7a73a2ae 100644 --- a/meilisearch-lib/src/tasks/task.rs +++ b/meilisearch-lib/src/tasks/task.rs @@ -74,7 +74,11 @@ pub enum TaskEvent { #[cfg_attr(test, derive(proptest_derive::Arbitrary))] pub struct Task { pub id: TaskId, - pub index_uid: IndexUid, + /// The name of the index the task is targeting. If it isn't targeting any idex (i.e Dump task) + /// then this is None + // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of + // the TaskContent. + pub index_uid: Option, pub content: TaskContent, pub events: Vec, } diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index bdcd13f37..bde0f6360 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -30,10 +30,14 @@ pub struct TaskFilter { impl TaskFilter { fn pass(&self, task: &Task) -> bool { - self.indexes - .as_ref() - .map(|indexes| indexes.contains(&*task.index_uid)) - .unwrap_or(true) + match task.index_uid { + Some(ref index_uid) => self + .indexes + .as_ref() + .map(|indexes| indexes.contains(index_uid.as_str())) + .unwrap_or(true), + None => false, + } } /// Adds an index to the filter, so the filter must match this index. @@ -66,7 +70,11 @@ impl TaskStore { Ok(Self { store }) } - pub async fn register(&self, index_uid: IndexUid, content: TaskContent) -> Result { + pub async fn register( + &self, + index_uid: Option, + content: TaskContent, + ) -> Result { debug!("registering update: {:?}", content); let store = self.store.clone(); let task = tokio::task::spawn_blocking(move || -> Result { @@ -305,7 +313,11 @@ pub mod test { } } - pub async fn register(&self, index_uid: IndexUid, content: TaskContent) -> Result { + pub async fn register( + &self, + index_uid: Option, + content: TaskContent, + ) -> Result { match self { Self::Real(s) => s.register(index_uid, content).await, Self::Mock(_m) => todo!(), @@ -335,7 +347,7 @@ pub mod test { let gen_task = |id: TaskId| Task { id, - index_uid: IndexUid::new_unchecked("test"), + index_uid: Some(IndexUid::new_unchecked("test")), content: TaskContent::IndexCreation { primary_key: None }, events: Vec::new(), }; diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs index 4ff986d8b..912047d1e 100644 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ b/meilisearch-lib/src/tasks/task_store/store.rs @@ -109,7 +109,8 @@ impl Store { pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { self.tasks.put(txn, &BEU64::new(task.id), task)?; self.uids_task_ids - .put(txn, &(&task.index_uid, task.id), &())?; + // TODO(marin): The index uid should be remaped to a task queue identifier here + .put(txn, &(&task.index_uid.as_ref().unwrap(), task.id), &())?; Ok(()) } @@ -325,7 +326,7 @@ pub mod test { let tasks = (0..100) .map(|_| Task { id: rand::random(), - index_uid: IndexUid::new_unchecked("test"), + index_uid: Some(IndexUid::new_unchecked("test")), content: TaskContent::IndexDeletion, events: vec![], }) @@ -356,14 +357,14 @@ pub mod test { let task_1 = Task { id: 1, - index_uid: IndexUid::new_unchecked("test"), + index_uid: Some(IndexUid::new_unchecked("test")), content: TaskContent::IndexDeletion, events: vec![], }; let task_2 = Task { id: 0, - index_uid: IndexUid::new_unchecked("test1"), + index_uid: Some(IndexUid::new_unchecked("test1")), content: TaskContent::IndexDeletion, events: vec![], }; @@ -379,18 +380,28 @@ pub mod test { txn.abort().unwrap(); assert_eq!(tasks.len(), 1); - assert_eq!(&*tasks.first().unwrap().index_uid, "test"); + assert_eq!( + tasks + .first() + .as_ref() + .unwrap() + .index_uid + .as_ref() + .unwrap() + .as_str(), + "test" + ); // same thing but invert the ids let task_1 = Task { id: 0, - index_uid: IndexUid::new_unchecked("test"), + index_uid: Some(IndexUid::new_unchecked("test")), content: TaskContent::IndexDeletion, events: vec![], }; let task_2 = Task { id: 1, - index_uid: IndexUid::new_unchecked("test1"), + index_uid: Some(IndexUid::new_unchecked("test1")), content: TaskContent::IndexDeletion, events: vec![], }; @@ -405,7 +416,17 @@ pub mod test { let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); assert_eq!(tasks.len(), 1); - assert_eq!(&*tasks.first().unwrap().index_uid, "test"); + assert_eq!( + &*tasks + .first() + .as_ref() + .unwrap() + .index_uid + .as_ref() + .unwrap() + .as_str(), + "test" + ); } proptest! { From 5a5066023baa514a6779fc77a6330cfef28ac4af Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 16 May 2022 20:16:23 +0200 Subject: [PATCH 02/33] introduce TaskListIdentifier --- meilisearch-http/src/task.rs | 1 + meilisearch-lib/src/index_resolver/mod.rs | 7 ++- meilisearch-lib/src/tasks/scheduler.rs | 54 ++++++++++++++++------- meilisearch-lib/src/tasks/task.rs | 10 +++++ 4 files changed, 54 insertions(+), 18 deletions(-) diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index c8e269e56..56a181d29 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -216,6 +216,7 @@ impl From for TaskView { TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key }), ), + TaskContent::Dump { path: _ } => todo!(), }; // An event always has at least one event: "Created" diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index 9db808d3f..3b8bdd631 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -351,6 +351,7 @@ where Ok(TaskResult::Other) } + TaskContent::Dump { path: _ } => Ok(TaskResult::Other), } } @@ -504,7 +505,7 @@ mod test { proptest! { #[test] fn test_process_task( - task in any::().prop_filter("uid must be Some", |t| t.index_uid.is_some()), + task in any::().prop_filter("IndexUid should be Some", |s| s.index_uid.is_some()), index_exists in any::(), index_op_fails in any::(), any_int in any::(), @@ -580,6 +581,7 @@ mod test { .then(move |_| result()); } } + TaskContent::Dump { path: _ } => { } } mocker.when::<(), IndexResult>("stats") @@ -608,6 +610,7 @@ mod test { } // if index already exists, create index will return an error TaskContent::IndexCreation { .. } if index_exists => (), + TaskContent::Dump { .. } => (), // The index exists and get should be called _ if index_exists => { index_store @@ -648,7 +651,7 @@ mod test { // Test for some expected output scenarios: // Index creation and deletion cannot fail because of a failed index op, since they // don't perform index ops. - if index_op_fails && !matches!(task.content, TaskContent::IndexDeletion | TaskContent::IndexCreation { primary_key: None } | TaskContent::IndexUpdate { primary_key: None }) + if index_op_fails && !matches!(task.content, TaskContent::IndexDeletion | TaskContent::IndexCreation { primary_key: None } | TaskContent::IndexUpdate { primary_key: None } | TaskContent::Dump { .. }) || (index_exists && matches!(task.content, TaskContent::IndexCreation { .. })) || (!index_exists && matches!(task.content, TaskContent::IndexDeletion | TaskContent::DocumentDeletion(_) diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 94de2a5fd..67aa6d8e5 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -21,8 +21,13 @@ use super::{TaskFilter, TaskPerformer, TaskStore}; #[derive(Eq, Debug, Clone, Copy)] enum TaskType { - DocumentAddition { number: usize }, - DocumentUpdate { number: usize }, + DocumentAddition { + number: usize, + }, + DocumentUpdate { + number: usize, + }, + /// Any other kind of task, including Dumps Other, } @@ -63,7 +68,7 @@ impl Ord for PendingTask { #[derive(Debug)] struct TaskList { - index: String, + id: TaskListIdentifier, tasks: BinaryHeap, } @@ -82,9 +87,9 @@ impl DerefMut for TaskList { } impl TaskList { - fn new(index: String) -> Self { + fn new(id: TaskListIdentifier) -> Self { Self { - index, + id, tasks: Default::default(), } } @@ -92,7 +97,7 @@ impl TaskList { impl PartialEq for TaskList { fn eq(&self, other: &Self) -> bool { - self.index == other.index + self.id == other.id } } @@ -100,11 +105,20 @@ impl Eq for TaskList {} impl Ord for TaskList { fn cmp(&self, other: &Self) -> Ordering { - match (self.peek(), other.peek()) { - (None, None) => Ordering::Equal, - (None, Some(_)) => Ordering::Less, - (Some(_), None) => Ordering::Greater, - (Some(lhs), Some(rhs)) => lhs.cmp(rhs), + match (&self.id, &other.id) { + (TaskListIdentifier::Index(_), TaskListIdentifier::Index(_)) => { + match (self.peek(), other.peek()) { + (None, None) => Ordering::Equal, + (None, Some(_)) => Ordering::Less, + (Some(_), None) => Ordering::Greater, + (Some(lhs), Some(rhs)) => lhs.cmp(rhs), + } + } + (TaskListIdentifier::Index(_), TaskListIdentifier::Dump) => Ordering::Greater, + (TaskListIdentifier::Dump, TaskListIdentifier::Index(_)) => Ordering::Less, + (TaskListIdentifier::Dump, TaskListIdentifier::Dump) => { + unreachable!("There should be only one Dump task list") + } } } } @@ -115,19 +129,27 @@ impl PartialOrd for TaskList { } } +#[derive(PartialEq, Eq, Hash, Debug, Clone)] +enum TaskListIdentifier { + Index(String), + Dump, +} + #[derive(Default)] struct TaskQueue { /// Maps index uids to their TaskList, for quick access - index_tasks: HashMap>>, + index_tasks: HashMap>>, /// A queue that orders TaskList by the priority of their fist update queue: BinaryHeap>>, } impl TaskQueue { fn insert(&mut self, task: Task) { - // TODO(marin): The index uid should be remaped to a task queue identifier here - let uid = task.index_uid.unwrap().into_inner(); let id = task.id; + let uid = match task.index_uid { + Some(uid) => TaskListIdentifier::Index(uid.into_inner()), + None => unreachable!(), + }; let kind = match task.content { TaskContent::DocumentAddition { documents_count, @@ -161,7 +183,7 @@ impl TaskQueue { list.push(task); } Entry::Vacant(entry) => { - let mut task_list = TaskList::new(entry.key().to_owned()); + let mut task_list = TaskList::new(entry.key().clone()); task_list.push(task); let task_list = Arc::new(AtomicRefCell::new(task_list)); entry.insert(task_list.clone()); @@ -182,7 +204,7 @@ impl TaskQueue { // After being mutated, the head is reinserted to the correct position. self.queue.push(head); } else { - self.index_tasks.remove(&head.borrow().index); + self.index_tasks.remove(&head.borrow().id); } Some(result) diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs index d7a73a2ae..c20d2151b 100644 --- a/meilisearch-lib/src/tasks/task.rs +++ b/meilisearch-lib/src/tasks/task.rs @@ -78,6 +78,12 @@ pub struct Task { /// then this is None // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of // the TaskContent. + #[cfg_attr( + test, + proptest( + strategy = "proptest::option::weighted(proptest::option::Probability::new(0.99), IndexUid::arbitrary())" + ) + )] pub index_uid: Option, pub content: TaskContent, pub events: Vec, @@ -165,6 +171,10 @@ pub enum TaskContent { IndexUpdate { primary_key: Option, }, + Dump { + #[cfg_attr(test, proptest(value = "PathBuf::from(\".\")"))] + path: PathBuf, + }, } #[cfg(test)] From 737b891a41ac2deb0954da88554bfda398afa001 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 17 May 2022 10:58:15 +0200 Subject: [PATCH 03/33] introduce Dump TaskListIdentifier variant --- meilisearch-lib/src/index_controller/mod.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 7ba91dfca..4be90489a 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -76,6 +76,7 @@ pub struct IndexController { index_resolver: Arc>, scheduler: Arc>, task_store: TaskStore, + dump_path: PathBuf, dump_handle: dump_actor::DumpActorHandleImpl, update_file_store: UpdateFileStore, } @@ -89,6 +90,7 @@ impl Clone for IndexController { dump_handle: self.dump_handle.clone(), update_file_store: self.update_file_store.clone(), task_store: self.task_store.clone(), + dump_path: self.dump_path.clone(), } } } @@ -234,7 +236,7 @@ impl IndexControllerBuilder { receiver, update_file_store.clone(), scheduler.clone(), - dump_path, + dump_path.clone(), analytics_path, index_size, task_store_size, @@ -269,6 +271,7 @@ impl IndexControllerBuilder { index_resolver, scheduler, dump_handle, + dump_path, update_file_store, task_store, }) @@ -425,6 +428,15 @@ where Ok(task) } + pub async fn register_dump_task(&self) -> Result { + let content = TaskContent::Dump { + path: self.dump_path.clone(), + }; + let task = self.task_store.register(None, content).await?; + self.scheduler.read().await.notify(); + Ok(task) + } + pub async fn get_task(&self, id: TaskId, filter: Option) -> Result { let task = self.scheduler.read().await.get_task(id, filter).await?; Ok(task) From 2f0625a984815e259e4953dc924e7c5d49e781fc Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 17 May 2022 12:15:37 +0200 Subject: [PATCH 04/33] register and insert dump task in scheduler --- meilisearch-lib/src/tasks/scheduler.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 67aa6d8e5..1f76f179a 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -148,7 +148,8 @@ impl TaskQueue { let id = task.id; let uid = match task.index_uid { Some(uid) => TaskListIdentifier::Index(uid.into_inner()), - None => unreachable!(), + None if matches!(task.content, TaskContent::Dump { .. }) => TaskListIdentifier::Dump, + None => unreachable!("invalid task state"), }; let kind = match task.content { TaskContent::DocumentAddition { From 7fa3eb1003a7df4fda090d5f6dc0b3648a60e37b Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 17 May 2022 17:40:59 +0200 Subject: [PATCH 05/33] register dump tasks --- meilisearch-http/src/routes/dump.rs | 3 ++- meilisearch-http/src/task.rs | 4 +++- meilisearch-lib/src/tasks/task_store/store.rs | 7 ++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 65cd7521f..b58552f27 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -8,6 +8,7 @@ use serde_json::json; use crate::analytics::Analytics; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::task::SummarizedTaskView; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))) @@ -23,7 +24,7 @@ pub async fn create_dump( ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); - let res = meilisearch.create_dump().await?; + let res: SummarizedTaskView = meilisearch.register_dump_task().await?.into(); debug!("returns: {:?}", res); Ok(HttpResponse::Accepted().json(res)) diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index 56a181d29..c2399f141 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -24,6 +24,7 @@ enum TaskType { DocumentDeletion, SettingsUpdate, ClearAll, + Dump, } impl From for TaskType { @@ -43,6 +44,7 @@ impl From for TaskType { TaskContent::IndexDeletion => TaskType::IndexDeletion, TaskContent::IndexCreation { .. } => TaskType::IndexCreation, TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, + TaskContent::Dump { path } => TaskType::Dump, _ => unreachable!("unexpected task type"), } } @@ -216,7 +218,7 @@ impl From for TaskView { TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key }), ), - TaskContent::Dump { path: _ } => todo!(), + TaskContent::Dump { path: _ } => (TaskType::Dump, None), }; // An event always has at least one event: "Created" diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs index 912047d1e..902f80560 100644 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ b/meilisearch-lib/src/tasks/task_store/store.rs @@ -108,9 +108,10 @@ impl Store { pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { self.tasks.put(txn, &BEU64::new(task.id), task)?; - self.uids_task_ids - // TODO(marin): The index uid should be remaped to a task queue identifier here - .put(txn, &(&task.index_uid.as_ref().unwrap(), task.id), &())?; + // only add the task to the indexes index if it has an index_uid + if let Some(ref index_uid) = task.index_uid { + self.uids_task_ids.put(txn, &(&index_uid, task.id), &())?; + } Ok(()) } From 6a0231cb283997ee3715a6beae7b70227b271f2a Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 17 May 2022 17:55:47 +0200 Subject: [PATCH 06/33] perform dump method --- meilisearch-lib/src/index_resolver/mod.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index 3b8bdd631..33be749b1 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -3,7 +3,7 @@ pub mod index_store; pub mod meta_store; use std::convert::{TryFrom, TryInto}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use error::{IndexResolverError, Result}; @@ -351,10 +351,14 @@ where Ok(TaskResult::Other) } - TaskContent::Dump { path: _ } => Ok(TaskResult::Other), + TaskContent::Dump { path } => self.perform_dump(path).await, } } + async fn perform_dump(&self, path: &PathBuf) -> Result { + todo!() + } + async fn process_job(&self, job: Job) { match job { Job::Dump { ret, path } => { From 46cdc1770168ae8250b804b53cff33d845b8958e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 19 May 2022 12:43:46 +0200 Subject: [PATCH 07/33] make scheduler accept multiple batch handlers --- meilisearch-http/src/task.rs | 2 +- .../dump_actor => dump}/actor.rs | 0 .../dump_actor => dump}/compat/mod.rs | 0 .../dump_actor => dump}/compat/v2.rs | 0 .../dump_actor => dump}/compat/v3.rs | 0 .../dump_actor => dump}/error.rs | 0 .../dump_actor => dump}/handle_impl.rs | 0 .../dump_actor => dump}/loaders/mod.rs | 0 .../dump_actor => dump}/loaders/v1.rs | 0 .../dump_actor => dump}/loaders/v2.rs | 4 +- .../dump_actor => dump}/loaders/v3.rs | 4 +- .../dump_actor => dump}/loaders/v4.rs | 2 +- .../dump_actor => dump}/message.rs | 0 .../dump_actor => dump}/mod.rs | 140 +++++----- meilisearch-lib/src/index_controller/error.rs | 2 +- meilisearch-lib/src/index_controller/mod.rs | 17 +- meilisearch-lib/src/index_resolver/mod.rs | 112 +------- meilisearch-lib/src/lib.rs | 1 + meilisearch-lib/src/snapshot.rs | 4 +- meilisearch-lib/src/tasks/batch.rs | 63 ++++- .../src/tasks/batch_handlers/empty_handler.rs | 20 ++ .../batch_handlers/index_resolver_handler.rs | 58 ++++ .../src/tasks/batch_handlers/mod.rs | 2 + meilisearch-lib/src/tasks/mod.rs | 11 +- meilisearch-lib/src/tasks/scheduler.rs | 259 ++++++++++++------ meilisearch-lib/src/tasks/task.rs | 31 +-- meilisearch-lib/src/tasks/task_store/mod.rs | 55 +++- meilisearch-lib/src/tasks/update_loop.rs | 71 +++-- 28 files changed, 484 insertions(+), 374 deletions(-) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/actor.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/compat/mod.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/compat/v2.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/compat/v3.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/error.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/handle_impl.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/loaders/mod.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/loaders/v1.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/loaders/v2.rs (98%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/loaders/v3.rs (97%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/loaders/v4.rs (95%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/message.rs (100%) rename meilisearch-lib/src/{index_controller/dump_actor => dump}/mod.rs (81%) create mode 100644 meilisearch-lib/src/tasks/batch_handlers/empty_handler.rs create mode 100644 meilisearch-lib/src/tasks/batch_handlers/index_resolver_handler.rs create mode 100644 meilisearch-lib/src/tasks/batch_handlers/mod.rs diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index c2399f141..5a8542ff8 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -44,7 +44,7 @@ impl From for TaskType { TaskContent::IndexDeletion => TaskType::IndexDeletion, TaskContent::IndexCreation { .. } => TaskType::IndexCreation, TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, - TaskContent::Dump { path } => TaskType::Dump, + TaskContent::Dump { .. } => TaskType::Dump, _ => unreachable!("unexpected task type"), } } diff --git a/meilisearch-lib/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/dump/actor.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/actor.rs rename to meilisearch-lib/src/dump/actor.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/compat/mod.rs b/meilisearch-lib/src/dump/compat/mod.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/compat/mod.rs rename to meilisearch-lib/src/dump/compat/mod.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/compat/v2.rs b/meilisearch-lib/src/dump/compat/v2.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/compat/v2.rs rename to meilisearch-lib/src/dump/compat/v2.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs b/meilisearch-lib/src/dump/compat/v3.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs rename to meilisearch-lib/src/dump/compat/v3.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/error.rs b/meilisearch-lib/src/dump/error.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/error.rs rename to meilisearch-lib/src/dump/error.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-lib/src/dump/handle_impl.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs rename to meilisearch-lib/src/dump/handle_impl.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs b/meilisearch-lib/src/dump/loaders/mod.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs rename to meilisearch-lib/src/dump/loaders/mod.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/dump/loaders/v1.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs rename to meilisearch-lib/src/dump/loaders/v1.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-lib/src/dump/loaders/v2.rs similarity index 98% rename from meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs rename to meilisearch-lib/src/dump/loaders/v2.rs index e2445913e..5926de931 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-lib/src/dump/loaders/v2.rs @@ -5,8 +5,8 @@ use std::path::{Path, PathBuf}; use serde_json::{Deserializer, Value}; use tempfile::NamedTempFile; -use crate::index_controller::dump_actor::compat::{self, v2, v3}; -use crate::index_controller::dump_actor::Metadata; +use crate::dump::compat::{self, v2, v3}; +use crate::dump::Metadata; use crate::options::IndexerOpts; /// The dump v2 reads the dump folder and patches all the needed file to make it compatible with a diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs b/meilisearch-lib/src/dump/loaders/v3.rs similarity index 97% rename from meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs rename to meilisearch-lib/src/dump/loaders/v3.rs index 902691511..0a2ea438b 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs +++ b/meilisearch-lib/src/dump/loaders/v3.rs @@ -9,8 +9,8 @@ use log::info; use tempfile::tempdir; use uuid::Uuid; -use crate::index_controller::dump_actor::compat::v3; -use crate::index_controller::dump_actor::Metadata; +use crate::dump::compat::v3; +use crate::dump::Metadata; use crate::index_resolver::meta_store::{DumpEntry, IndexMeta}; use crate::options::IndexerOpts; use crate::tasks::task::{Task, TaskId}; diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v4.rs b/meilisearch-lib/src/dump/loaders/v4.rs similarity index 95% rename from meilisearch-lib/src/index_controller/dump_actor/loaders/v4.rs rename to meilisearch-lib/src/dump/loaders/v4.rs index 38d61f146..c898f83b1 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v4.rs +++ b/meilisearch-lib/src/dump/loaders/v4.rs @@ -6,7 +6,7 @@ use meilisearch_auth::AuthController; use milli::heed::EnvOpenOptions; use crate::analytics; -use crate::index_controller::dump_actor::Metadata; +use crate::dump::Metadata; use crate::index_resolver::IndexResolver; use crate::options::IndexerOpts; use crate::tasks::TaskStore; diff --git a/meilisearch-lib/src/index_controller/dump_actor/message.rs b/meilisearch-lib/src/dump/message.rs similarity index 100% rename from meilisearch-lib/src/index_controller/dump_actor/message.rs rename to meilisearch-lib/src/dump/message.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/dump/mod.rs similarity index 81% rename from meilisearch-lib/src/index_controller/dump_actor/mod.rs rename to meilisearch-lib/src/dump/mod.rs index 00be3a371..bc717b35e 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/dump/mod.rs @@ -3,28 +3,24 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::bail; -use log::{info, trace}; +use log::info; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; pub use actor::DumpActor; pub use handle_impl::*; -use meilisearch_auth::AuthController; pub use message::DumpMsg; use tempfile::TempDir; -use tokio::fs::create_dir_all; -use tokio::sync::{oneshot, RwLock}; +use tokio::sync::RwLock; -use crate::analytics; -use crate::compression::{from_tar_gz, to_tar_gz}; -use crate::index_controller::dump_actor::error::DumpActorError; -use crate::index_controller::dump_actor::loaders::{v2, v3, v4}; +use crate::compression::from_tar_gz; use crate::options::IndexerOpts; -use crate::tasks::task::Job; use crate::tasks::Scheduler; use crate::update_file_store::UpdateFileStore; use error::Result; +use self::loaders::{v2, v3, v4}; + mod actor; mod compat; pub mod error; @@ -316,7 +312,7 @@ fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result< Ok(()) } -struct DumpJob { +pub struct DumpJob { dump_path: PathBuf, db_path: PathBuf, update_file_store: UpdateFileStore, @@ -328,65 +324,65 @@ struct DumpJob { impl DumpJob { async fn run(self) -> Result<()> { - trace!("Performing dump."); - - create_dir_all(&self.dump_path).await?; - - let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size); - let meta_path = temp_dump_path.join(META_FILE_NAME); - let mut meta_file = File::create(&meta_path)?; - serde_json::to_writer(&mut meta_file, &meta)?; - analytics::copy_user_id(&self.db_path, &temp_dump_path); - - create_dir_all(&temp_dump_path.join("indexes")).await?; - - let (sender, receiver) = oneshot::channel(); - - self.scheduler - .write() - .await - .schedule_job(Job::Dump { - ret: sender, - path: temp_dump_path.clone(), - }) - .await; - - // wait until the job has started performing before finishing the dump process - let sender = receiver.await??; - - AuthController::dump(&self.db_path, &temp_dump_path)?; - - //TODO(marin): this is not right, the scheduler should dump itself, not do it here... - self.scheduler - .read() - .await - .dump(&temp_dump_path, self.update_file_store.clone()) - .await?; - - let dump_path = tokio::task::spawn_blocking(move || -> Result { - // for now we simply copy the updates/updates_files - // FIXME: We may copy more files than necessary, if new files are added while we are - // performing the dump. We need a way to filter them out. - - let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?; - to_tar_gz(temp_dump_path, temp_dump_file.path()) - .map_err(|e| DumpActorError::Internal(e.into()))?; - - let dump_path = self.dump_path.join(self.uid).with_extension("dump"); - temp_dump_file.persist(&dump_path)?; - - Ok(dump_path) - }) - .await??; - - // notify the update loop that we are finished performing the dump. - let _ = sender.send(()); - - info!("Created dump in {:?}.", dump_path); - + // trace!("Performing dump."); + // + // create_dir_all(&self.dump_path).await?; + // + // let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; + // let temp_dump_path = temp_dump_dir.path().to_owned(); + // + // let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size); + // let meta_path = temp_dump_path.join(META_FILE_NAME); + // let mut meta_file = File::create(&meta_path)?; + // serde_json::to_writer(&mut meta_file, &meta)?; + // analytics::copy_user_id(&self.db_path, &temp_dump_path); + // + // create_dir_all(&temp_dump_path.join("indexes")).await?; + // + // let (sender, receiver) = oneshot::channel(); + // + // self.scheduler + // .write() + // .await + // .schedule_job(Job::Dump { + // ret: sender, + // path: temp_dump_path.clone(), + // }) + // .await; + // + // // wait until the job has started performing before finishing the dump process + // let sender = receiver.await??; + // + // AuthController::dump(&self.db_path, &temp_dump_path)?; + // + // //TODO(marin): this is not right, the scheduler should dump itself, not do it here... + // self.scheduler + // .read() + // .await + // .dump(&temp_dump_path, self.update_file_store.clone()) + // .await?; + // + // let dump_path = tokio::task::spawn_blocking(move || -> Result { + // // for now we simply copy the updates/updates_files + // // FIXME: We may copy more files than necessary, if new files are added while we are + // // performing the dump. We need a way to filter them out. + // + // let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?; + // to_tar_gz(temp_dump_path, temp_dump_file.path()) + // .map_err(|e| DumpActorError::Internal(e.into()))?; + // + // let dump_path = self.dump_path.join(self.uid).with_extension("dump"); + // temp_dump_file.persist(&dump_path)?; + // + // Ok(dump_path) + // }) + // .await??; + // + // // notify the update loop that we are finished performing the dump. + // let _ = sender.send(()); + // + // info!("Created dump in {:?}.", dump_path); + // Ok(()) } } @@ -401,7 +397,7 @@ mod test { use crate::options::SchedulerConfig; use crate::tasks::error::Result as TaskResult; use crate::tasks::task::{Task, TaskId}; - use crate::tasks::{MockTaskPerformer, TaskFilter, TaskStore}; + use crate::tasks::{BatchHandler, TaskFilter, TaskStore}; use crate::update_file_store::UpdateFileStore; fn setup() { @@ -426,7 +422,7 @@ mod test { let mocker = Mocker::default(); let update_file_store = UpdateFileStore::mock(mocker); - let mut performer = MockTaskPerformer::new(); + let mut performer = BatchHandler::new(); performer .expect_process_job() .once() @@ -480,7 +476,7 @@ mod test { ) .then(|_| Ok(Vec::new())); let task_store = TaskStore::mock(mocker); - let mut performer = MockTaskPerformer::new(); + let mut performer = BatchHandler::new(); performer .expect_process_job() .once() diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs index 85af76623..11ef03d73 100644 --- a/meilisearch-lib/src/index_controller/error.rs +++ b/meilisearch-lib/src/index_controller/error.rs @@ -6,11 +6,11 @@ use tokio::task::JoinError; use super::DocumentAdditionFormat; use crate::document_formats::DocumentFormatError; +use crate::dump::error::DumpActorError; use crate::index::error::IndexError; use crate::tasks::error::TaskError; use crate::update_file_store::UpdateFileStoreError; -use super::dump_actor::error::DumpActorError; use crate::index_resolver::error::IndexResolverError; pub type Result = std::result::Result; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 4be90489a..b73402d56 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -19,25 +19,23 @@ use tokio::time::sleep; use uuid::Uuid; use crate::document_formats::{read_csv, read_json, read_ndjson}; +use crate::dump::{self, load_dump, DumpActor, DumpActorHandle, DumpActorHandleImpl, DumpInfo}; use crate::index::{ Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, }; -use crate::index_controller::dump_actor::{load_dump, DumpActor, DumpActorHandleImpl}; use crate::options::{IndexerOpts, SchedulerConfig}; use crate::snapshot::{load_snapshot, SnapshotService}; use crate::tasks::error::TaskError; use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId}; -use crate::tasks::{Scheduler, TaskFilter, TaskStore}; +use crate::tasks::{BatchHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore}; use error::Result; -use self::dump_actor::{DumpActorHandle, DumpInfo}; use self::error::IndexControllerError; use crate::index_resolver::index_store::{IndexStore, MapIndexStore}; use crate::index_resolver::meta_store::{HeedMetaStore, IndexMetaStore}; use crate::index_resolver::{create_index_resolver, IndexResolver, IndexUid}; use crate::update_file_store::UpdateFileStore; -mod dump_actor; pub mod error; pub mod versioning; @@ -73,12 +71,12 @@ pub struct IndexSettings { } pub struct IndexController { - index_resolver: Arc>, + pub index_resolver: Arc>, scheduler: Arc>, task_store: TaskStore, dump_path: PathBuf, - dump_handle: dump_actor::DumpActorHandleImpl, - update_file_store: UpdateFileStore, + dump_handle: dump::DumpActorHandleImpl, + pub update_file_store: UpdateFileStore, } /// Need a custom implementation for clone because deriving require that U and I are clone. @@ -223,8 +221,9 @@ impl IndexControllerBuilder { )?); let task_store = TaskStore::new(meta_env)?; - let scheduler = - Scheduler::new(task_store.clone(), index_resolver.clone(), scheduler_config)?; + let handlers: Vec> = + vec![index_resolver.clone(), Arc::new(EmptyBatchHandler)]; + let scheduler = Scheduler::new(task_store.clone(), handlers, scheduler_config)?; let dump_path = self .dump_dst diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index 33be749b1..f463cd24d 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -3,7 +3,7 @@ pub mod index_store; pub mod meta_store; use std::convert::{TryFrom, TryInto}; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::sync::Arc; use error::{IndexResolverError, Result}; @@ -14,15 +14,12 @@ use milli::heed::Env; use milli::update::{DocumentDeletionResult, IndexerConfig}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; -use tokio::sync::oneshot; use tokio::task::spawn_blocking; use uuid::Uuid; use crate::index::{error::Result as IndexResult, Index}; use crate::options::IndexerOpts; -use crate::tasks::batch::Batch; -use crate::tasks::task::{DocumentDeletion, Job, Task, TaskContent, TaskEvent, TaskId, TaskResult}; -use crate::tasks::TaskPerformer; +use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult}; use crate::update_file_store::UpdateFileStore; use self::meta_store::IndexMeta; @@ -91,69 +88,10 @@ impl TryInto for String { } } -#[async_trait::async_trait] -impl TaskPerformer for IndexResolver -where - U: IndexMetaStore + Send + Sync + 'static, - I: IndexStore + Send + Sync + 'static, -{ - async fn process_batch(&self, mut batch: Batch) -> Batch { - // If a batch contains multiple tasks, then it must be a document addition batch - if let Some(Task { - content: TaskContent::DocumentAddition { .. }, - .. - }) = batch.tasks.first() - { - debug_assert!(batch.tasks.iter().all(|t| matches!( - t, - Task { - content: TaskContent::DocumentAddition { .. }, - .. - } - ))); - - self.process_document_addition_batch(batch).await - } else { - if let Some(task) = batch.tasks.first_mut() { - task.events - .push(TaskEvent::Processing(OffsetDateTime::now_utc())); - - match self.process_task(task).await { - Ok(success) => { - task.events.push(TaskEvent::Succeded { - result: success, - timestamp: OffsetDateTime::now_utc(), - }); - } - Err(err) => task.events.push(TaskEvent::Failed { - error: err.into(), - timestamp: OffsetDateTime::now_utc(), - }), - } - } - batch - } - } - - async fn process_job(&self, job: Job) { - self.process_job(job).await; - } - - async fn finish(&self, batch: &Batch) { - for task in &batch.tasks { - if let Some(content_uuid) = task.get_content_uuid() { - if let Err(e) = self.file_store.delete(content_uuid).await { - log::error!("error deleting update file: {}", e); - } - } - } - } -} - pub struct IndexResolver { index_uuid_store: U, index_store: I, - file_store: UpdateFileStore, + pub file_store: UpdateFileStore, } impl IndexResolver { @@ -189,7 +127,7 @@ where } } - async fn process_document_addition_batch(&self, mut batch: Batch) -> Batch { + pub async fn process_document_addition_batch(&self, mut tasks: Vec) -> Vec { fn get_content_uuid(task: &Task) -> Uuid { match task { Task { @@ -200,9 +138,9 @@ where } } - let content_uuids = batch.tasks.iter().map(get_content_uuid).collect::>(); + let content_uuids = tasks.iter().map(get_content_uuid).collect::>(); - match batch.tasks.first() { + match tasks.first() { Some(Task { index_uid: Some(ref index_uid), id, @@ -231,13 +169,13 @@ where Ok(index) => index, Err(e) => { let error = ResponseError::from(e); - for task in batch.tasks.iter_mut() { + for task in tasks.iter_mut() { task.events.push(TaskEvent::Failed { error: error.clone(), timestamp: now, }); } - return batch; + return tasks; } }; @@ -269,17 +207,17 @@ where }, }; - for task in batch.tasks.iter_mut() { + for task in tasks.iter_mut() { task.events.push(event.clone()); } - batch + tasks } _ => panic!("invalid batch!"), } } - async fn process_task(&self, task: &Task) -> Result { + pub async fn process_task(&self, task: &Task) -> Result { let index_uid = task.index_uid.clone(); match &task.content { TaskContent::DocumentAddition { .. } => panic!("updates should be handled by batch"), @@ -351,33 +289,7 @@ where Ok(TaskResult::Other) } - TaskContent::Dump { path } => self.perform_dump(path).await, - } - } - - async fn perform_dump(&self, path: &PathBuf) -> Result { - todo!() - } - - async fn process_job(&self, job: Job) { - match job { - Job::Dump { ret, path } => { - log::trace!("The Dump task is getting executed"); - - let (sender, receiver) = oneshot::channel(); - if ret.send(self.dump(path).await.map(|_| sender)).is_err() { - log::error!("The dump actor died."); - } - - // wait until the dump has finished performing. - let _ = receiver.await; - } - Job::Empty => log::error!("Tried to process an empty task."), - Job::Snapshot(job) => { - if let Err(e) = job.run().await { - log::error!("Error performing snapshot: {}", e); - } - } + _ => unreachable!("Invalid task for index resolver"), } } diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 1161340ba..3d3d5e860 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -3,6 +3,7 @@ pub mod error; pub mod options; mod analytics; +mod dump; pub mod index; pub mod index_controller; mod index_resolver; diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs index 6c27ad2f0..6dda0f3e8 100644 --- a/meilisearch-lib/src/snapshot.rs +++ b/meilisearch-lib/src/snapshot.rs @@ -14,7 +14,6 @@ use walkdir::WalkDir; use crate::compression::from_tar_gz; use crate::index_controller::open_meta_env; use crate::index_controller::versioning::VERSION_FILE_NAME; -use crate::tasks::task::Job; use crate::tasks::Scheduler; pub struct SnapshotService { @@ -39,8 +38,7 @@ impl SnapshotService { meta_env_size: self.meta_env_size, index_size: self.index_size, }; - let job = Job::Snapshot(snapshot_job); - self.scheduler.write().await.schedule_job(job).await; + self.scheduler.write().await.register_snapshot(snapshot_job); sleep(self.snapshot_period).await; } } diff --git a/meilisearch-lib/src/tasks/batch.rs b/meilisearch-lib/src/tasks/batch.rs index 4a8cf7907..88c73e3de 100644 --- a/meilisearch-lib/src/tasks/batch.rs +++ b/meilisearch-lib/src/tasks/batch.rs @@ -1,22 +1,75 @@ use time::OffsetDateTime; -use super::task::Task; +use crate::snapshot::SnapshotJob; + +use super::task::{Task, TaskEvent}; pub type BatchId = u64; +#[derive(Debug)] +pub enum BatchContent { + DocumentAddtitionBatch(Vec), + IndexUpdate(Task), + Dump(Task), + Snapshot(SnapshotJob), + // Symbolizes a empty batch. This can occur when we were woken, but there wasn't any work to do. + Empty, +} + +impl BatchContent { + pub fn first(&self) -> Option<&Task> { + match self { + BatchContent::DocumentAddtitionBatch(ts) => ts.first(), + BatchContent::Dump(t) | BatchContent::IndexUpdate(t) => Some(t), + BatchContent::Snapshot(_) | BatchContent::Empty => None, + } + } + + pub fn push_event(&mut self, event: TaskEvent) { + match self { + BatchContent::DocumentAddtitionBatch(ts) => { + ts.iter_mut().for_each(|t| t.events.push(event.clone())) + } + BatchContent::IndexUpdate(t) | BatchContent::Dump(t) => t.events.push(event), + BatchContent::Snapshot(_) | BatchContent::Empty => (), + } + } +} + #[derive(Debug)] pub struct Batch { - pub id: BatchId, + // Only batches that contains a persistant tasks are given an id. Snapshot batches don't have + // an id. + pub id: Option, pub created_at: OffsetDateTime, - pub tasks: Vec, + pub content: BatchContent, } impl Batch { + pub fn new(id: Option, content: BatchContent) -> Self { + Self { + id, + created_at: OffsetDateTime::now_utc(), + content, + } + } pub fn len(&self) -> usize { - self.tasks.len() + match self.content { + BatchContent::DocumentAddtitionBatch(ref ts) => ts.len(), + BatchContent::IndexUpdate(_) | BatchContent::Dump(_) | BatchContent::Snapshot(_) => 1, + BatchContent::Empty => 0, + } } pub fn is_empty(&self) -> bool { - self.tasks.is_empty() + self.len() == 0 + } + + pub fn empty() -> Self { + Self { + id: None, + created_at: OffsetDateTime::now_utc(), + content: BatchContent::Empty, + } } } diff --git a/meilisearch-lib/src/tasks/batch_handlers/empty_handler.rs b/meilisearch-lib/src/tasks/batch_handlers/empty_handler.rs new file mode 100644 index 000000000..5d6aa2275 --- /dev/null +++ b/meilisearch-lib/src/tasks/batch_handlers/empty_handler.rs @@ -0,0 +1,20 @@ +use crate::tasks::batch::{Batch, BatchContent}; +use crate::tasks::BatchHandler; + +/// A sink handler for empty tasks. +pub struct EmptyBatchHandler; + +#[async_trait::async_trait] +impl BatchHandler for EmptyBatchHandler { + fn accept(&self, batch: &Batch) -> bool { + matches!(batch.content, BatchContent::Empty) + } + + async fn process_batch(&self, batch: Batch) -> Batch { + batch + } + + async fn finish(&self, _: &Batch) { + () + } +} diff --git a/meilisearch-lib/src/tasks/batch_handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/batch_handlers/index_resolver_handler.rs new file mode 100644 index 000000000..41a78a22b --- /dev/null +++ b/meilisearch-lib/src/tasks/batch_handlers/index_resolver_handler.rs @@ -0,0 +1,58 @@ +use time::OffsetDateTime; + +use crate::index_resolver::IndexResolver; +use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore}; +use crate::tasks::batch::{Batch, BatchContent}; +use crate::tasks::task::TaskEvent; +use crate::tasks::BatchHandler; + +#[async_trait::async_trait] +impl BatchHandler for IndexResolver +where + U: IndexMetaStore + Send + Sync + 'static, + I: IndexStore + Send + Sync + 'static, +{ + fn accept(&self, batch: &Batch) -> bool { + match batch.content { + BatchContent::DocumentAddtitionBatch(_) | BatchContent::IndexUpdate(_) => true, + _ => false, + } + } + + async fn process_batch(&self, mut batch: Batch) -> Batch { + match batch.content { + BatchContent::DocumentAddtitionBatch(ref mut tasks) => { + *tasks = self + .process_document_addition_batch(std::mem::take(tasks)) + .await; + } + BatchContent::IndexUpdate(ref mut task) => match self.process_task(&task).await { + Ok(success) => { + task.events.push(TaskEvent::Succeded { + result: success, + timestamp: OffsetDateTime::now_utc(), + }); + } + Err(err) => task.events.push(TaskEvent::Failed { + error: err.into(), + timestamp: OffsetDateTime::now_utc(), + }), + }, + _ => unreachable!(), + } + + batch + } + + async fn finish(&self, batch: &Batch) { + if let BatchContent::DocumentAddtitionBatch(ref tasks) = batch.content { + for task in tasks { + if let Some(content_uuid) = task.get_content_uuid() { + if let Err(e) = self.file_store.delete(content_uuid).await { + log::error!("error deleting update file: {}", e); + } + } + } + } + } +} diff --git a/meilisearch-lib/src/tasks/batch_handlers/mod.rs b/meilisearch-lib/src/tasks/batch_handlers/mod.rs new file mode 100644 index 000000000..0e94c76f1 --- /dev/null +++ b/meilisearch-lib/src/tasks/batch_handlers/mod.rs @@ -0,0 +1,2 @@ +pub mod empty_handler; +mod index_resolver_handler; diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs index b56dfaf9d..bc01c4901 100644 --- a/meilisearch-lib/src/tasks/mod.rs +++ b/meilisearch-lib/src/tasks/mod.rs @@ -1,5 +1,6 @@ use async_trait::async_trait; +pub use batch_handlers::empty_handler::EmptyBatchHandler; pub use scheduler::Scheduler; pub use task_store::TaskFilter; @@ -11,9 +12,8 @@ pub use task_store::TaskStore; use batch::Batch; use error::Result; -use self::task::Job; - pub mod batch; +mod batch_handlers; pub mod error; mod scheduler; pub mod task; @@ -22,12 +22,13 @@ pub mod update_loop; #[cfg_attr(test, mockall::automock(type Error=test::DebugError;))] #[async_trait] -pub trait TaskPerformer: Sync + Send + 'static { +pub trait BatchHandler: Sync + Send + 'static { + /// return whether this handler can accept this batch + fn accept(&self, batch: &Batch) -> bool; + /// Processes the `Task` batch returning the batch with the `Task` updated. async fn process_batch(&self, batch: Batch) -> Batch; - async fn process_job(&self, job: Job); - /// `finish` is called when the result of `process` has been commited to the task store. This /// method can be used to perform cleanup after the update has been completed for example. async fn finish(&self, batch: &Batch); diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 1f76f179a..f3018b782 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -2,6 +2,7 @@ use std::cmp::Ordering; use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque}; use std::ops::{Deref, DerefMut}; use std::path::Path; +use std::slice; use std::sync::Arc; use std::time::Duration; @@ -11,24 +12,21 @@ use time::OffsetDateTime; use tokio::sync::{watch, RwLock}; use crate::options::SchedulerConfig; +use crate::snapshot::SnapshotJob; use crate::update_file_store::UpdateFileStore; -use super::batch::Batch; +use super::batch::{Batch, BatchContent}; use super::error::Result; -use super::task::{Job, Task, TaskContent, TaskEvent, TaskId}; +use super::task::{Task, TaskContent, TaskEvent, TaskId}; use super::update_loop::UpdateLoop; -use super::{TaskFilter, TaskPerformer, TaskStore}; +use super::{BatchHandler, TaskFilter, TaskStore}; #[derive(Eq, Debug, Clone, Copy)] enum TaskType { - DocumentAddition { - number: usize, - }, - DocumentUpdate { - number: usize, - }, - /// Any other kind of task, including Dumps - Other, + DocumentAddition { number: usize }, + DocumentUpdate { number: usize }, + IndexUpdate, + Dump, } /// Two tasks are equal if they have the same type. @@ -166,7 +164,13 @@ impl TaskQueue { } => TaskType::DocumentUpdate { number: documents_count, }, - _ => TaskType::Other, + TaskContent::Dump { .. } => TaskType::Dump, + TaskContent::DocumentDeletion(_) + | TaskContent::SettingsUpdate { .. } + | TaskContent::IndexDeletion + | TaskContent::IndexCreation { .. } + | TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, + _ => unreachable!("unhandled task type"), }; let task = PendingTask { kind, id }; @@ -217,11 +221,12 @@ impl TaskQueue { } pub struct Scheduler { - jobs: VecDeque, + // TODO: currently snapshots are non persistent tasks, and are treated differently. + snapshots: VecDeque, tasks: TaskQueue, store: TaskStore, - processing: Vec, + processing: Processing, next_fetched_task_id: TaskId, config: SchedulerConfig, /// Notifies the update loop that a new task was received @@ -229,14 +234,11 @@ pub struct Scheduler { } impl Scheduler { - pub fn new

( + pub fn new( store: TaskStore, - performer: Arc

, + performers: Vec>, mut config: SchedulerConfig, - ) -> Result>> - where - P: TaskPerformer, - { + ) -> Result>> { let (notifier, rcv) = watch::channel(()); let debounce_time = config.debounce_duration_sec; @@ -247,11 +249,11 @@ impl Scheduler { } let this = Self { - jobs: VecDeque::new(), + snapshots: VecDeque::new(), tasks: TaskQueue::default(), store, - processing: Vec::new(), + processing: Processing::Nothing, next_fetched_task_id: 0, config, notifier, @@ -264,7 +266,7 @@ impl Scheduler { let update_loop = UpdateLoop::new( this.clone(), - performer, + performers, debounce_time.filter(|&v| v > 0).map(Duration::from_secs), rcv, ); @@ -283,9 +285,13 @@ impl Scheduler { self.tasks.insert(task); } + pub fn register_snapshot(&mut self, job: SnapshotJob) { + self.snapshots.push_back(job); + } + /// Clears the processing list, this method should be called when the processing of a batch is finished. pub fn finish(&mut self) { - self.processing.clear(); + self.processing = Processing::Nothing; } pub fn notify(&self) { @@ -293,13 +299,27 @@ impl Scheduler { } fn notify_if_not_empty(&self) { - if !self.jobs.is_empty() || !self.tasks.is_empty() { + if !self.snapshots.is_empty() || !self.tasks.is_empty() { self.notify(); } } - pub async fn update_tasks(&self, tasks: Vec) -> Result> { - self.store.update_tasks(tasks).await + pub async fn update_tasks(&self, content: BatchContent) -> Result { + match content { + BatchContent::DocumentAddtitionBatch(tasks) => { + let tasks = self.store.update_tasks(tasks).await?; + Ok(BatchContent::DocumentAddtitionBatch(tasks)) + } + BatchContent::IndexUpdate(t) => { + let mut tasks = self.store.update_tasks(vec![t]).await?; + Ok(BatchContent::IndexUpdate(tasks.remove(0))) + } + BatchContent::Dump(t) => { + let mut tasks = self.store.update_tasks(vec![t]).await?; + Ok(BatchContent::Dump(tasks.remove(0))) + } + other => Ok(other), + } } pub async fn get_task(&self, id: TaskId, filter: Option) -> Result { @@ -318,16 +338,16 @@ impl Scheduler { pub async fn get_processing_tasks(&self) -> Result> { let mut tasks = Vec::new(); - for id in self.processing.iter() { - let task = self.store.get_task(*id, None).await?; + for id in self.processing.ids() { + let task = self.store.get_task(id, None).await?; tasks.push(task); } Ok(tasks) } - pub async fn schedule_job(&mut self, job: Job) { - self.jobs.push_back(job); + pub async fn schedule_snapshot(&mut self, job: SnapshotJob) { + self.snapshots.push_back(job); self.notify(); } @@ -353,106 +373,163 @@ impl Scheduler { } /// Prepare the next batch, and set `processing` to the ids in that batch. - pub async fn prepare(&mut self) -> Result { + pub async fn prepare(&mut self) -> Result { // If there is a job to process, do it first. - if let Some(job) = self.jobs.pop_front() { + if let Some(job) = self.snapshots.pop_front() { // There is more work to do, notify the update loop self.notify_if_not_empty(); - return Ok(Pending::Job(job)); + let batch = Batch::new(None, BatchContent::Snapshot(job)); + return Ok(batch); } + // Try to fill the queue with pending tasks. self.fetch_pending_tasks().await?; - make_batch(&mut self.tasks, &mut self.processing, &self.config); + self.processing = make_batch(&mut self.tasks, &self.config); log::debug!("prepared batch with {} tasks", self.processing.len()); - if !self.processing.is_empty() { - let ids = std::mem::take(&mut self.processing); + if !self.processing.is_nothing() { + let (processing, mut content) = self + .store + .get_processing_tasks(std::mem::take(&mut self.processing)) + .await?; - let (ids, mut tasks) = self.store.get_pending_tasks(ids).await?; - - // The batch id is the id of the first update it contains - let id = match tasks.first() { + // The batch id is the id of the first update it contains. At this point we must have a + // valid batch that contains at least 1 task. + let id = match content.first() { Some(Task { id, .. }) => *id, _ => panic!("invalid batch"), }; - tasks.iter_mut().for_each(|t| { - t.events.push(TaskEvent::Batched { - batch_id: id, - timestamp: OffsetDateTime::now_utc(), - }) + content.push_event(TaskEvent::Batched { + batch_id: id, + timestamp: OffsetDateTime::now_utc(), }); - self.processing = ids; + self.processing = processing; - let batch = Batch { - id, - created_at: OffsetDateTime::now_utc(), - tasks, - }; + let batch = Batch::new(Some(id), content); // There is more work to do, notify the update loop self.notify_if_not_empty(); - Ok(Pending::Batch(batch)) + Ok(batch) } else { - Ok(Pending::Nothing) + Ok(Batch::empty()) } } } -#[derive(Debug)] -pub enum Pending { - Batch(Batch), - Job(Job), +#[derive(Debug, Default)] +pub enum Processing { + DocumentAdditions(Vec), + IndexUpdate(TaskId), + Dump(TaskId), + /// Variant used when there is nothing to process. + #[default] Nothing, } -fn make_batch(tasks: &mut TaskQueue, processing: &mut Vec, config: &SchedulerConfig) { - processing.clear(); +enum ProcessingIter<'a> { + Many(slice::Iter<'a, TaskId>), + Single(Option), +} - let mut doc_count = 0; - tasks.head_mut(|list| match list.peek().copied() { - Some(PendingTask { - kind: TaskType::Other, - id, - }) => { - processing.push(id); - list.pop(); +impl<'a> Iterator for ProcessingIter<'a> { + type Item = TaskId; + + fn next(&mut self) -> Option { + match self { + ProcessingIter::Many(iter) => iter.next().copied(), + ProcessingIter::Single(val) => val.take(), } - Some(PendingTask { kind, .. }) => loop { - match list.peek() { - Some(pending) if pending.kind == kind => { - // We always need to process at least one task for the scheduler to make progress. - if processing.len() >= config.max_batch_size.unwrap_or(usize::MAX).max(1) { - break; - } - let pending = list.pop().unwrap(); - processing.push(pending.id); + } +} - // We add the number of documents to the count if we are scheduling document additions and - // stop adding if we already have enough. - // - // We check that bound only after adding the current task to the batch, so that a batch contains at least one task. - match pending.kind { - TaskType::DocumentUpdate { number } - | TaskType::DocumentAddition { number } => { - doc_count += number; +impl Processing { + fn is_nothing(&self) -> bool { + matches!(self, Processing::Nothing) + } - if doc_count >= config.max_documents_per_batch.unwrap_or(usize::MAX) { + pub fn ids(&self) -> impl Iterator + '_ { + match self { + Processing::DocumentAdditions(v) => ProcessingIter::Many(v.iter()), + Processing::IndexUpdate(id) | Processing::Dump(id) => ProcessingIter::Single(Some(*id)), + Processing::Nothing => ProcessingIter::Single(None), + } + } + + pub fn len(&self) -> usize { + match self { + Processing::DocumentAdditions(v) => v.len(), + Processing::IndexUpdate(_) | Processing::Dump(_) => 1, + Processing::Nothing => 0, + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +fn make_batch(tasks: &mut TaskQueue, config: &SchedulerConfig) -> Processing { + let mut doc_count = 0; + tasks + .head_mut(|list| match list.peek().copied() { + Some(PendingTask { + kind: TaskType::IndexUpdate, + id, + }) => { + list.pop(); + Processing::IndexUpdate(id) + } + Some(PendingTask { + kind: TaskType::Dump, + id, + }) => { + list.pop(); + Processing::Dump(id) + } + Some(PendingTask { kind, .. }) => { + let mut task_list = Vec::new(); + loop { + match list.peek() { + Some(pending) if pending.kind == kind => { + // We always need to process at least one task for the scheduler to make progress. + if task_list.len() >= config.max_batch_size.unwrap_or(usize::MAX).max(1) + { break; } + let pending = list.pop().unwrap(); + task_list.push(pending.id); + + // We add the number of documents to the count if we are scheduling document additions and + // stop adding if we already have enough. + // + // We check that bound only after adding the current task to the batch, so that a batch contains at least one task. + match pending.kind { + TaskType::DocumentUpdate { number } + | TaskType::DocumentAddition { number } => { + doc_count += number; + + if doc_count + >= config.max_documents_per_batch.unwrap_or(usize::MAX) + { + break; + } + } + _ => (), + } } - _ => (), + _ => break, } } - _ => break, + Processing::DocumentAdditions(task_list) } - }, - None => (), - }); + None => Processing::Nothing, + }) + .unwrap_or(Processing::Nothing) } #[cfg(test)] diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs index c20d2151b..cb5ba671a 100644 --- a/meilisearch-lib/src/tasks/task.rs +++ b/meilisearch-lib/src/tasks/task.rs @@ -4,14 +4,12 @@ use meilisearch_error::ResponseError; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; -use tokio::sync::oneshot; use uuid::Uuid; use super::batch::BatchId; use crate::{ index::{Settings, Unchecked}, - index_resolver::{error::IndexResolverError, IndexUid}, - snapshot::SnapshotJob, + index_resolver::IndexUid, }; pub type TaskId = u64; @@ -110,33 +108,6 @@ impl Task { } } -/// A job is like a volatile priority `Task`. -/// It should be processed as fast as possible and is not stored on disk. -/// This means, when Meilisearch is closed all your unprocessed jobs will disappear. -#[derive(Debug, derivative::Derivative)] -#[derivative(PartialEq)] -pub enum Job { - Dump { - #[derivative(PartialEq = "ignore")] - ret: oneshot::Sender, IndexResolverError>>, - path: PathBuf, - }, - Snapshot(#[derivative(PartialEq = "ignore")] SnapshotJob), - Empty, -} - -impl Default for Job { - fn default() -> Self { - Self::Empty - } -} - -impl Job { - pub fn take(&mut self) -> Self { - std::mem::take(self) - } -} - #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[cfg_attr(test, derive(proptest_derive::Arbitrary))] pub enum DocumentDeletion { diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index bde0f6360..f580c8e26 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -9,7 +9,9 @@ use log::debug; use milli::heed::{Env, RwTxn}; use time::OffsetDateTime; +use super::batch::BatchContent; use super::error::TaskError; +use super::scheduler::Processing; use super::task::{Task, TaskContent, TaskId}; use super::Result; use crate::index_resolver::IndexUid; @@ -122,19 +124,44 @@ impl TaskStore { } } - pub async fn get_pending_tasks(&self, ids: Vec) -> Result<(Vec, Vec)> { + /// This methods takes a `Processing` which contains the next task ids to process, and returns + /// the coresponding tasks along with the ownership to the passed processing. + /// + /// We need get_processing_tasks to take ownership over `Processing` because we need it to be + /// valid for 'static. + pub async fn get_processing_tasks( + &self, + processing: Processing, + ) -> Result<(Processing, BatchContent)> { let store = self.store.clone(); let tasks = tokio::task::spawn_blocking(move || -> Result<_> { - let mut tasks = Vec::new(); let txn = store.rtxn()?; - for id in ids.iter() { - let task = store - .get(&txn, *id)? - .ok_or(TaskError::UnexistingTask(*id))?; - tasks.push(task); - } - Ok((ids, tasks)) + let content = match processing { + Processing::DocumentAdditions(ref ids) => { + let mut tasks = Vec::new(); + + for id in ids.iter() { + let task = store + .get(&txn, *id)? + .ok_or(TaskError::UnexistingTask(*id))?; + tasks.push(task); + } + BatchContent::DocumentAddtitionBatch(tasks) + } + Processing::IndexUpdate(id) => { + let task = store.get(&txn, id)?.ok_or(TaskError::UnexistingTask(id))?; + BatchContent::IndexUpdate(task) + } + Processing::Dump(id) => { + let task = store.get(&txn, id)?.ok_or(TaskError::UnexistingTask(id))?; + debug_assert!(matches!(task.content, TaskContent::Dump { .. })); + BatchContent::Dump(task) + } + Processing::Nothing => unreachable!(), + }; + + Ok((processing, content)) }) .await??; @@ -231,7 +258,7 @@ impl TaskStore { #[cfg(test)] pub mod test { - use crate::tasks::task_store::store::test::tmp_env; + use crate::tasks::{scheduler::Processing, task_store::store::test::tmp_env}; use super::*; @@ -280,12 +307,12 @@ pub mod test { } } - pub async fn get_pending_tasks( + pub async fn get_processing_tasks( &self, - tasks: Vec, - ) -> Result<(Vec, Vec)> { + tasks: Processing, + ) -> Result<(Processing, BatchContent)> { match self { - Self::Real(s) => s.get_pending_tasks(tasks).await, + Self::Real(s) => s.get_processing_tasks(tasks).await, Self::Mock(m) => unsafe { m.get("get_pending_task").call(tasks) }, } } diff --git a/meilisearch-lib/src/tasks/update_loop.rs b/meilisearch-lib/src/tasks/update_loop.rs index b09811721..01e88755a 100644 --- a/meilisearch-lib/src/tasks/update_loop.rs +++ b/meilisearch-lib/src/tasks/update_loop.rs @@ -7,33 +7,29 @@ use tokio::time::interval_at; use super::batch::Batch; use super::error::Result; -use super::scheduler::Pending; -use super::{Scheduler, TaskPerformer}; +use super::{BatchHandler, Scheduler}; use crate::tasks::task::TaskEvent; /// The update loop sequentially performs batches of updates by asking the scheduler for a batch, /// and handing it to the `TaskPerformer`. -pub struct UpdateLoop { +pub struct UpdateLoop { scheduler: Arc>, - performer: Arc

, + performers: Vec>, notifier: Option>, debounce_duration: Option, } -impl

UpdateLoop

-where - P: TaskPerformer + Send + Sync + 'static, -{ +impl UpdateLoop { pub fn new( scheduler: Arc>, - performer: Arc

, + performers: Vec>, debuf_duration: Option, notifier: watch::Receiver<()>, ) -> Self { Self { scheduler, - performer, + performers, debounce_duration: debuf_duration, notifier: Some(notifier), } @@ -59,34 +55,29 @@ where } async fn process_next_batch(&self) -> Result<()> { - let pending = { self.scheduler.write().await.prepare().await? }; - match pending { - Pending::Batch(mut batch) => { - for task in &mut batch.tasks { - task.events - .push(TaskEvent::Processing(OffsetDateTime::now_utc())); - } + let mut batch = { self.scheduler.write().await.prepare().await? }; + let performer = self + .performers + .iter() + .find(|p| p.accept(&batch)) + .expect("No performer found for batch") + .clone(); - batch.tasks = { - self.scheduler - .read() - .await - .update_tasks(batch.tasks) - .await? - }; + batch + .content + .push_event(TaskEvent::Processing(OffsetDateTime::now_utc())); - let performer = self.performer.clone(); + batch.content = { + self.scheduler + .read() + .await + .update_tasks(batch.content) + .await? + }; - let batch = performer.process_batch(batch).await; + let batch = performer.process_batch(batch).await; - self.handle_batch_result(batch).await?; - } - Pending::Job(job) => { - let performer = self.performer.clone(); - performer.process_job(job).await; - } - Pending::Nothing => (), - } + self.handle_batch_result(batch, performer).await?; Ok(()) } @@ -96,13 +87,17 @@ where /// When a task is processed, the result of the process is pushed to its event list. The /// `handle_batch_result` make sure that the new state is saved to the store. /// The tasks are then removed from the processing queue. - async fn handle_batch_result(&self, mut batch: Batch) -> Result<()> { + async fn handle_batch_result( + &self, + mut batch: Batch, + performer: Arc, + ) -> Result<()> { let mut scheduler = self.scheduler.write().await; - let tasks = scheduler.update_tasks(batch.tasks).await?; + let content = scheduler.update_tasks(batch.content).await?; scheduler.finish(); drop(scheduler); - batch.tasks = tasks; - self.performer.finish(&batch).await; + batch.content = content; + performer.finish(&batch).await; Ok(()) } } From 60a8249de61c979edcb83f6c0c9c9782d472ab13 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 19 May 2022 14:44:24 +0200 Subject: [PATCH 08/33] add dump batch handler --- meilisearch-http/src/routes/dump.rs | 6 +- meilisearch-lib/src/dump/actor.rs | 7 +- meilisearch-lib/src/dump/error.rs | 16 +- meilisearch-lib/src/dump/handle_impl.rs | 2 +- meilisearch-lib/src/dump/mod.rs | 182 ++++++------------ meilisearch-lib/src/index_controller/error.rs | 4 +- meilisearch-lib/src/index_controller/mod.rs | 32 +-- .../src/tasks/batch_handlers/dump_handler.rs | 92 +++++++++ .../src/tasks/batch_handlers/mod.rs | 1 + 9 files changed, 166 insertions(+), 176 deletions(-) create mode 100644 meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index b58552f27..7d32fdda5 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -45,8 +45,8 @@ async fn get_dump_status( meilisearch: GuardedData, MeiliSearch>, path: web::Path, ) -> Result { - let res = meilisearch.dump_info(path.dump_uid.clone()).await?; + todo!(); - debug!("returns: {:?}", res); - Ok(HttpResponse::Ok().json(res)) + // debug!("returns: {:?}", res); + // Ok(HttpResponse::Ok().json(res)) } diff --git a/meilisearch-lib/src/dump/actor.rs b/meilisearch-lib/src/dump/actor.rs index 48fc077ca..b7f615e44 100644 --- a/meilisearch-lib/src/dump/actor.rs +++ b/meilisearch-lib/src/dump/actor.rs @@ -9,7 +9,7 @@ use time::macros::format_description; use time::OffsetDateTime; use tokio::sync::{mpsc, oneshot, RwLock}; -use super::error::{DumpActorError, Result}; +use super::error::{DumpError, Result}; use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus}; use crate::tasks::Scheduler; use crate::update_file_store::UpdateFileStore; @@ -106,7 +106,7 @@ impl DumpActor { let _lock = match self.lock.try_lock() { Some(lock) => lock, None => { - ret.send(Err(DumpActorError::DumpAlreadyRunning)) + ret.send(Err(DumpError::DumpAlreadyRunning)) .expect("Dump actor is dead"); return; } @@ -123,7 +123,6 @@ impl DumpActor { dump_path: self.dump_path.clone(), db_path: self.analytics_path.clone(), update_file_store: self.update_file_store.clone(), - scheduler: self.scheduler.clone(), uid: uid.clone(), update_db_size: self.update_db_size, index_db_size: self.index_db_size, @@ -155,7 +154,7 @@ impl DumpActor { async fn handle_dump_info(&self, uid: String) -> Result { match self.dump_infos.read().await.get(&uid) { Some(info) => Ok(info.clone()), - _ => Err(DumpActorError::DumpDoesNotExist(uid)), + _ => Err(DumpError::DumpDoesNotExist(uid)), } } } diff --git a/meilisearch-lib/src/dump/error.rs b/meilisearch-lib/src/dump/error.rs index f72b6d1dd..7931a8d75 100644 --- a/meilisearch-lib/src/dump/error.rs +++ b/meilisearch-lib/src/dump/error.rs @@ -3,10 +3,10 @@ use meilisearch_error::{internal_error, Code, ErrorCode}; use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError}; -pub type Result = std::result::Result; +pub type Result = std::result::Result; #[derive(thiserror::Error, Debug)] -pub enum DumpActorError { +pub enum DumpError { #[error("A dump is already processing. You must wait until the current process is finished before requesting another dump.")] DumpAlreadyRunning, #[error("Dump `{0}` not found.")] @@ -18,7 +18,7 @@ pub enum DumpActorError { } internal_error!( - DumpActorError: milli::heed::Error, + DumpError: milli::heed::Error, std::io::Error, tokio::task::JoinError, tokio::sync::oneshot::error::RecvError, @@ -29,13 +29,13 @@ internal_error!( TaskError ); -impl ErrorCode for DumpActorError { +impl ErrorCode for DumpError { fn error_code(&self) -> Code { match self { - DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress, - DumpActorError::DumpDoesNotExist(_) => Code::DumpNotFound, - DumpActorError::Internal(_) => Code::Internal, - DumpActorError::IndexResolver(e) => e.error_code(), + DumpError::DumpAlreadyRunning => Code::DumpAlreadyInProgress, + DumpError::DumpDoesNotExist(_) => Code::DumpNotFound, + DumpError::Internal(_) => Code::Internal, + DumpError::IndexResolver(e) => e.error_code(), } } } diff --git a/meilisearch-lib/src/dump/handle_impl.rs b/meilisearch-lib/src/dump/handle_impl.rs index 16a312e70..9577b3663 100644 --- a/meilisearch-lib/src/dump/handle_impl.rs +++ b/meilisearch-lib/src/dump/handle_impl.rs @@ -1,7 +1,7 @@ use tokio::sync::{mpsc, oneshot}; use super::error::Result; -use super::{DumpActorHandle, DumpInfo, DumpMsg}; +use super::{DumpActorHandle, DumpMsg}; #[derive(Clone)] pub struct DumpActorHandleImpl { diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs index bc717b35e..59b51a601 100644 --- a/meilisearch-lib/src/dump/mod.rs +++ b/meilisearch-lib/src/dump/mod.rs @@ -1,32 +1,30 @@ use std::fs::File; use std::path::{Path, PathBuf}; -use std::sync::Arc; use anyhow::bail; -use log::info; +use log::{info, trace}; +use meilisearch_auth::AuthController; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; -pub use actor::DumpActor; -pub use handle_impl::*; -pub use message::DumpMsg; use tempfile::TempDir; -use tokio::sync::RwLock; +use tokio::fs::create_dir_all; -use crate::compression::from_tar_gz; +use crate::analytics; +use crate::compression::{from_tar_gz, to_tar_gz}; +use crate::dump::error::DumpError; use crate::options::IndexerOpts; -use crate::tasks::Scheduler; use crate::update_file_store::UpdateFileStore; use error::Result; use self::loaders::{v2, v3, v4}; -mod actor; +// mod actor; mod compat; pub mod error; -mod handle_impl; +// mod handle_impl; mod loaders; -mod message; +// mod message; const META_FILE_NAME: &str = "metadata.json"; @@ -51,18 +49,6 @@ impl Metadata { } } -#[async_trait::async_trait] -#[cfg_attr(test, mockall::automock)] -pub trait DumpActorHandle { - /// Start the creation of a dump - /// Implementation: [handle_impl::DumpActorHandleImpl::create_dump] - async fn create_dump(&self) -> Result; - - /// Return the status of an already created dump - /// Implementation: [handle_impl::DumpActorHandleImpl::dump_info] - async fn dump_info(&self, uid: String) -> Result; -} - #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct MetadataV1 { @@ -159,49 +145,6 @@ pub enum DumpStatus { Failed, } -#[derive(Debug, Serialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct DumpInfo { - pub uid: String, - pub status: DumpStatus, - #[serde(skip_serializing_if = "Option::is_none")] - pub error: Option, - #[serde(with = "time::serde::rfc3339")] - started_at: OffsetDateTime, - #[serde( - skip_serializing_if = "Option::is_none", - with = "time::serde::rfc3339::option" - )] - finished_at: Option, -} - -impl DumpInfo { - pub fn new(uid: String, status: DumpStatus) -> Self { - Self { - uid, - status, - error: None, - started_at: OffsetDateTime::now_utc(), - finished_at: None, - } - } - - pub fn with_error(&mut self, error: String) { - self.status = DumpStatus::Failed; - self.finished_at = Some(OffsetDateTime::now_utc()); - self.error = Some(error); - } - - pub fn done(&mut self) { - self.finished_at = Some(OffsetDateTime::now_utc()); - self.status = DumpStatus::Done; - } - - pub fn dump_already_in_progress(&self) -> bool { - self.status == DumpStatus::InProgress - } -} - pub fn load_dump( dst_path: impl AsRef, src_path: impl AsRef, @@ -313,76 +256,59 @@ fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result< } pub struct DumpJob { - dump_path: PathBuf, - db_path: PathBuf, - update_file_store: UpdateFileStore, - scheduler: Arc>, - uid: String, - update_db_size: usize, - index_db_size: usize, + pub dump_path: PathBuf, + pub db_path: PathBuf, + pub update_file_store: UpdateFileStore, + pub uid: String, + pub update_db_size: usize, + pub index_db_size: usize, } impl DumpJob { - async fn run(self) -> Result<()> { - // trace!("Performing dump."); - // - // create_dir_all(&self.dump_path).await?; - // - // let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; - // let temp_dump_path = temp_dump_dir.path().to_owned(); - // - // let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size); - // let meta_path = temp_dump_path.join(META_FILE_NAME); - // let mut meta_file = File::create(&meta_path)?; - // serde_json::to_writer(&mut meta_file, &meta)?; - // analytics::copy_user_id(&self.db_path, &temp_dump_path); - // - // create_dir_all(&temp_dump_path.join("indexes")).await?; - // - // let (sender, receiver) = oneshot::channel(); - // - // self.scheduler - // .write() - // .await - // .schedule_job(Job::Dump { - // ret: sender, - // path: temp_dump_path.clone(), - // }) - // .await; - // - // // wait until the job has started performing before finishing the dump process - // let sender = receiver.await??; - // - // AuthController::dump(&self.db_path, &temp_dump_path)?; - // - // //TODO(marin): this is not right, the scheduler should dump itself, not do it here... + pub async fn run(self) -> Result<()> { + trace!("Performing dump."); + + create_dir_all(&self.dump_path).await?; + + let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size); + let meta_path = temp_dump_path.join(META_FILE_NAME); + let mut meta_file = File::create(&meta_path)?; + serde_json::to_writer(&mut meta_file, &meta)?; + analytics::copy_user_id(&self.db_path, &temp_dump_path); + + create_dir_all(&temp_dump_path.join("indexes")).await?; + + AuthController::dump(&self.db_path, &temp_dump_path)?; + // TODO: Dump indexes and updates + + //TODO(marin): this is not right, the scheduler should dump itself, not do it here... // self.scheduler // .read() // .await // .dump(&temp_dump_path, self.update_file_store.clone()) // .await?; - // - // let dump_path = tokio::task::spawn_blocking(move || -> Result { - // // for now we simply copy the updates/updates_files - // // FIXME: We may copy more files than necessary, if new files are added while we are - // // performing the dump. We need a way to filter them out. - // - // let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?; - // to_tar_gz(temp_dump_path, temp_dump_file.path()) - // .map_err(|e| DumpActorError::Internal(e.into()))?; - // - // let dump_path = self.dump_path.join(self.uid).with_extension("dump"); - // temp_dump_file.persist(&dump_path)?; - // - // Ok(dump_path) - // }) - // .await??; - // - // // notify the update loop that we are finished performing the dump. - // let _ = sender.send(()); - // - // info!("Created dump in {:?}.", dump_path); - // + + let dump_path = tokio::task::spawn_blocking(move || -> Result { + // for now we simply copy the updates/updates_files + // FIXME: We may copy more files than necessary, if new files are added while we are + // performing the dump. We need a way to filter them out. + + let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?; + to_tar_gz(temp_dump_path, temp_dump_file.path()) + .map_err(|e| DumpError::Internal(e.into()))?; + + let dump_path = self.dump_path.join(self.uid).with_extension("dump"); + temp_dump_file.persist(&dump_path)?; + + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + Ok(()) } } diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs index 11ef03d73..529887b6a 100644 --- a/meilisearch-lib/src/index_controller/error.rs +++ b/meilisearch-lib/src/index_controller/error.rs @@ -6,7 +6,7 @@ use tokio::task::JoinError; use super::DocumentAdditionFormat; use crate::document_formats::DocumentFormatError; -use crate::dump::error::DumpActorError; +use crate::dump::error::DumpError; use crate::index::error::IndexError; use crate::tasks::error::TaskError; use crate::update_file_store::UpdateFileStoreError; @@ -28,7 +28,7 @@ pub enum IndexControllerError { #[error("{0}")] TaskError(#[from] TaskError), #[error("{0}")] - DumpError(#[from] DumpActorError), + DumpError(#[from] DumpError), #[error("{0}")] DocumentFormatError(#[from] DocumentFormatError), #[error("A {0} payload is missing.")] diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index b73402d56..14f262a51 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -13,13 +13,13 @@ use futures::StreamExt; use milli::update::IndexDocumentsMethod; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; -use tokio::sync::{mpsc, RwLock}; +use tokio::sync::RwLock; use tokio::task::spawn_blocking; use tokio::time::sleep; use uuid::Uuid; use crate::document_formats::{read_csv, read_json, read_ndjson}; -use crate::dump::{self, load_dump, DumpActor, DumpActorHandle, DumpActorHandleImpl, DumpInfo}; +use crate::dump::load_dump; use crate::index::{ Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, }; @@ -75,7 +75,6 @@ pub struct IndexController { scheduler: Arc>, task_store: TaskStore, dump_path: PathBuf, - dump_handle: dump::DumpActorHandleImpl, pub update_file_store: UpdateFileStore, } @@ -85,7 +84,6 @@ impl Clone for IndexController { Self { index_resolver: self.index_resolver.clone(), scheduler: self.scheduler.clone(), - dump_handle: self.dump_handle.clone(), update_file_store: self.update_file_store.clone(), task_store: self.task_store.clone(), dump_path: self.dump_path.clone(), @@ -228,23 +226,6 @@ impl IndexControllerBuilder { let dump_path = self .dump_dst .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; - let dump_handle = { - let analytics_path = &db_path; - let (sender, receiver) = mpsc::channel(10); - let actor = DumpActor::new( - receiver, - update_file_store.clone(), - scheduler.clone(), - dump_path.clone(), - analytics_path, - index_size, - task_store_size, - ); - - tokio::task::spawn_local(actor.run()); - - DumpActorHandleImpl { sender } - }; if self.schedule_snapshot { let snapshot_period = self @@ -269,7 +250,6 @@ impl IndexControllerBuilder { Ok(IndexController { index_resolver, scheduler, - dump_handle, dump_path, update_file_store, task_store, @@ -633,14 +613,6 @@ where indexes, }) } - - pub async fn create_dump(&self) -> Result { - Ok(self.dump_handle.create_dump().await?) - } - - pub async fn dump_info(&self, uid: String) -> Result { - Ok(self.dump_handle.dump_info(uid).await?) - } } pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { diff --git a/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs b/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs new file mode 100644 index 000000000..c0ef70ba8 --- /dev/null +++ b/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs @@ -0,0 +1,92 @@ +use std::path::{Path, PathBuf}; + +use log::{error, trace}; +use time::{macros::format_description, OffsetDateTime}; + +use crate::dump::DumpJob; +use crate::tasks::batch::{Batch, BatchContent}; +use crate::tasks::BatchHandler; +use crate::update_file_store::UpdateFileStore; + +pub struct DumpHandler { + update_file_store: UpdateFileStore, + dump_path: PathBuf, + db_path: PathBuf, + update_db_size: usize, + index_db_size: usize, +} + +/// Generate uid from creation date +fn generate_uid() -> String { + OffsetDateTime::now_utc() + .format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )) + .unwrap() +} + +impl DumpHandler { + pub fn new( + update_file_store: UpdateFileStore, + dump_path: impl AsRef, + db_path: impl AsRef, + index_db_size: usize, + update_db_size: usize, + ) -> Self { + Self { + update_file_store, + dump_path: dump_path.as_ref().into(), + db_path: db_path.as_ref().into(), + index_db_size, + update_db_size, + } + } + + async fn create_dump(&self) { + let uid = generate_uid(); + + let task = DumpJob { + dump_path: self.dump_path.clone(), + db_path: self.db_path.clone(), + update_file_store: self.update_file_store.clone(), + uid: uid.clone(), + update_db_size: self.update_db_size, + index_db_size: self.index_db_size, + }; + + let task_result = tokio::task::spawn_local(task.run()).await; + + match task_result { + Ok(Ok(())) => { + trace!("Dump succeed"); + } + Ok(Err(e)) => { + error!("Dump failed: {}", e); + } + Err(_) => { + error!("Dump panicked. Dump status set to failed"); + } + }; + } +} + +#[async_trait::async_trait] +impl BatchHandler for DumpHandler { + fn accept(&self, batch: &Batch) -> bool { + matches!(batch.content, BatchContent::Dump { .. }) + } + + async fn process_batch(&self, batch: Batch) -> Batch { + match batch.content { + BatchContent::Dump { .. } => { + self.create_dump().await; + batch + } + _ => unreachable!("invalid batch content for dump"), + } + } + + async fn finish(&self, _: &Batch) { + () + } +} diff --git a/meilisearch-lib/src/tasks/batch_handlers/mod.rs b/meilisearch-lib/src/tasks/batch_handlers/mod.rs index 0e94c76f1..f72c1b760 100644 --- a/meilisearch-lib/src/tasks/batch_handlers/mod.rs +++ b/meilisearch-lib/src/tasks/batch_handlers/mod.rs @@ -1,2 +1,3 @@ +pub mod dump_handler; pub mod empty_handler; mod index_resolver_handler; From 414d0907ced804dd2dcb1f1e82c80633013ff84e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 19 May 2022 14:51:04 +0200 Subject: [PATCH 09/33] register dump handler --- meilisearch-http/src/task.rs | 2 +- meilisearch-lib/src/index_controller/mod.rs | 34 +++++++++++++-------- meilisearch-lib/src/tasks/mod.rs | 2 +- meilisearch-lib/src/tasks/task.rs | 7 +---- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index 5a8542ff8..3febe002f 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -218,7 +218,7 @@ impl From for TaskView { TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key }), ), - TaskContent::Dump { path: _ } => (TaskType::Dump, None), + TaskContent::Dump => (TaskType::Dump, None), }; // An event always has at least one event: "Created" diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 14f262a51..de4426f81 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -27,7 +27,9 @@ use crate::options::{IndexerOpts, SchedulerConfig}; use crate::snapshot::{load_snapshot, SnapshotService}; use crate::tasks::error::TaskError; use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId}; -use crate::tasks::{BatchHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore}; +use crate::tasks::{ + BatchHandler, DumpHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore, +}; use error::Result; use self::error::IndexControllerError; @@ -74,7 +76,6 @@ pub struct IndexController { pub index_resolver: Arc>, scheduler: Arc>, task_store: TaskStore, - dump_path: PathBuf, pub update_file_store: UpdateFileStore, } @@ -86,7 +87,6 @@ impl Clone for IndexController { scheduler: self.scheduler.clone(), update_file_store: self.update_file_store.clone(), task_store: self.task_store.clone(), - dump_path: self.dump_path.clone(), } } } @@ -218,15 +218,28 @@ impl IndexControllerBuilder { update_file_store.clone(), )?); - let task_store = TaskStore::new(meta_env)?; - let handlers: Vec> = - vec![index_resolver.clone(), Arc::new(EmptyBatchHandler)]; - let scheduler = Scheduler::new(task_store.clone(), handlers, scheduler_config)?; - let dump_path = self .dump_dst .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; + let dump_handler = Arc::new(DumpHandler::new( + update_file_store.clone(), + dump_path, + db_path.as_ref().clone(), + index_size, + task_store_size, + )); + let task_store = TaskStore::new(meta_env)?; + + // register all the batch handlers for use with the scheduler. + let handlers: Vec> = vec![ + index_resolver.clone(), + dump_handler, + // dummy handler to catch all empty batches + Arc::new(EmptyBatchHandler), + ]; + let scheduler = Scheduler::new(task_store.clone(), handlers, scheduler_config)?; + if self.schedule_snapshot { let snapshot_period = self .snapshot_interval @@ -250,7 +263,6 @@ impl IndexControllerBuilder { Ok(IndexController { index_resolver, scheduler, - dump_path, update_file_store, task_store, }) @@ -408,9 +420,7 @@ where } pub async fn register_dump_task(&self) -> Result { - let content = TaskContent::Dump { - path: self.dump_path.clone(), - }; + let content = TaskContent::Dump; let task = self.task_store.register(None, content).await?; self.scheduler.read().await.notify(); Ok(task) diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs index bc01c4901..faa35f2da 100644 --- a/meilisearch-lib/src/tasks/mod.rs +++ b/meilisearch-lib/src/tasks/mod.rs @@ -1,6 +1,6 @@ use async_trait::async_trait; -pub use batch_handlers::empty_handler::EmptyBatchHandler; +pub use batch_handlers::{dump_handler::DumpHandler, empty_handler::EmptyBatchHandler}; pub use scheduler::Scheduler; pub use task_store::TaskFilter; diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs index cb5ba671a..41a536a1e 100644 --- a/meilisearch-lib/src/tasks/task.rs +++ b/meilisearch-lib/src/tasks/task.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; - use meilisearch_error::ResponseError; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; @@ -142,10 +140,7 @@ pub enum TaskContent { IndexUpdate { primary_key: Option, }, - Dump { - #[cfg_attr(test, proptest(value = "PathBuf::from(\".\")"))] - path: PathBuf, - }, + Dump, } #[cfg(test)] From 56eb2907c9b8809b663111f3bb38492a9d7660b1 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 19 May 2022 14:59:59 +0200 Subject: [PATCH 10/33] dump indexes --- meilisearch-lib/src/dump/mod.rs | 16 +++++++++++--- meilisearch-lib/src/index_controller/mod.rs | 1 + .../src/tasks/batch_handlers/dump_handler.rs | 22 ++++++++++++++++--- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs index 59b51a601..05deb8a40 100644 --- a/meilisearch-lib/src/dump/mod.rs +++ b/meilisearch-lib/src/dump/mod.rs @@ -1,5 +1,6 @@ use std::fs::File; use std::path::{Path, PathBuf}; +use std::sync::Arc; use anyhow::bail; use log::{info, trace}; @@ -13,6 +14,9 @@ use tokio::fs::create_dir_all; use crate::analytics; use crate::compression::{from_tar_gz, to_tar_gz}; use crate::dump::error::DumpError; +use crate::index_resolver::index_store::IndexStore; +use crate::index_resolver::meta_store::IndexMetaStore; +use crate::index_resolver::IndexResolver; use crate::options::IndexerOpts; use crate::update_file_store::UpdateFileStore; use error::Result; @@ -255,16 +259,21 @@ fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result< Ok(()) } -pub struct DumpJob { +pub struct DumpJob { pub dump_path: PathBuf, pub db_path: PathBuf, pub update_file_store: UpdateFileStore, pub uid: String, pub update_db_size: usize, pub index_db_size: usize, + pub index_resolver: Arc>, } -impl DumpJob { +impl DumpJob +where + U: IndexMetaStore, + I: IndexStore, +{ pub async fn run(self) -> Result<()> { trace!("Performing dump."); @@ -281,8 +290,9 @@ impl DumpJob { create_dir_all(&temp_dump_path.join("indexes")).await?; + // TODO: this is blocking!! AuthController::dump(&self.db_path, &temp_dump_path)?; - // TODO: Dump indexes and updates + self.index_resolver.dump(&self.dump_path).await?; //TODO(marin): this is not right, the scheduler should dump itself, not do it here... // self.scheduler diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index de4426f81..f89ebec4e 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -228,6 +228,7 @@ impl IndexControllerBuilder { db_path.as_ref().clone(), index_size, task_store_size, + index_resolver.clone(), )); let task_store = TaskStore::new(meta_env)?; diff --git a/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs b/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs index c0ef70ba8..057cf274f 100644 --- a/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs +++ b/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs @@ -1,15 +1,20 @@ use std::path::{Path, PathBuf}; +use std::sync::Arc; use log::{error, trace}; use time::{macros::format_description, OffsetDateTime}; use crate::dump::DumpJob; +use crate::index_resolver::index_store::IndexStore; +use crate::index_resolver::meta_store::IndexMetaStore; +use crate::index_resolver::IndexResolver; use crate::tasks::batch::{Batch, BatchContent}; use crate::tasks::BatchHandler; use crate::update_file_store::UpdateFileStore; -pub struct DumpHandler { +pub struct DumpHandler { update_file_store: UpdateFileStore, + index_resolver: Arc>, dump_path: PathBuf, db_path: PathBuf, update_db_size: usize, @@ -25,13 +30,18 @@ fn generate_uid() -> String { .unwrap() } -impl DumpHandler { +impl DumpHandler +where + U: IndexMetaStore + Send + Sync + 'static, + I: IndexStore + Send + Sync + 'static, +{ pub fn new( update_file_store: UpdateFileStore, dump_path: impl AsRef, db_path: impl AsRef, index_db_size: usize, update_db_size: usize, + index_resolver: Arc>, ) -> Self { Self { update_file_store, @@ -39,6 +49,7 @@ impl DumpHandler { db_path: db_path.as_ref().into(), index_db_size, update_db_size, + index_resolver, } } @@ -52,6 +63,7 @@ impl DumpHandler { uid: uid.clone(), update_db_size: self.update_db_size, index_db_size: self.index_db_size, + index_resolver: self.index_resolver.clone(), }; let task_result = tokio::task::spawn_local(task.run()).await; @@ -71,7 +83,11 @@ impl DumpHandler { } #[async_trait::async_trait] -impl BatchHandler for DumpHandler { +impl BatchHandler for DumpHandler +where + U: IndexMetaStore + Send + Sync + 'static, + I: IndexStore + Send + Sync + 'static, +{ fn accept(&self, batch: &Batch) -> bool { matches!(batch.content, BatchContent::Dump { .. }) } From 57fde30b918f6c8bb9775dd46a78ca26344614d0 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 19 May 2022 20:18:43 +0200 Subject: [PATCH 11/33] handle dump --- meilisearch-http/src/task.rs | 6 +- meilisearch-lib/src/dump/message.rs | 1 - meilisearch-lib/src/dump/mod.rs | 46 +++++--- meilisearch-lib/src/index_controller/mod.rs | 22 ++-- .../src/tasks/batch_handlers/dump_handler.rs | 103 +++--------------- meilisearch-lib/src/tasks/mod.rs | 2 +- meilisearch-lib/src/tasks/scheduler.rs | 6 - meilisearch-lib/src/tasks/task.rs | 20 +++- meilisearch-lib/src/tasks/task_store/mod.rs | 24 ++-- 9 files changed, 94 insertions(+), 136 deletions(-) diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index 3febe002f..f10d7e110 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -82,6 +82,8 @@ enum TaskDetails { }, #[serde(rename_all = "camelCase")] ClearAll { deleted_documents: Option }, + #[serde(rename_all = "camelCase")] + Dump { dump_uid: String }, } /// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for @@ -218,7 +220,9 @@ impl From for TaskView { TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key }), ), - TaskContent::Dump => (TaskType::Dump, None), + TaskContent::Dump { uid } => { + (TaskType::Dump, Some(TaskDetails::Dump { dump_uid: uid })) + } }; // An event always has at least one event: "Created" diff --git a/meilisearch-lib/src/dump/message.rs b/meilisearch-lib/src/dump/message.rs index 6c9dded9f..8ebeb3b57 100644 --- a/meilisearch-lib/src/dump/message.rs +++ b/meilisearch-lib/src/dump/message.rs @@ -1,7 +1,6 @@ use tokio::sync::oneshot; use super::error::Result; -use super::DumpInfo; pub enum DumpMsg { CreateDump { diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs index 05deb8a40..b14b356fd 100644 --- a/meilisearch-lib/src/dump/mod.rs +++ b/meilisearch-lib/src/dump/mod.rs @@ -5,10 +5,12 @@ use std::sync::Arc; use anyhow::bail; use log::{info, trace}; use meilisearch_auth::AuthController; +use milli::heed::Env; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use tempfile::TempDir; +use time::macros::format_description; use tokio::fs::create_dir_all; use crate::analytics; @@ -18,6 +20,7 @@ use crate::index_resolver::index_store::IndexStore; use crate::index_resolver::meta_store::IndexMetaStore; use crate::index_resolver::IndexResolver; use crate::options::IndexerOpts; +use crate::tasks::TaskStore; use crate::update_file_store::UpdateFileStore; use error::Result; @@ -259,22 +262,31 @@ fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result< Ok(()) } -pub struct DumpJob { +/// Generate uid from creation date +pub fn generate_uid() -> String { + OffsetDateTime::now_utc() + .format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )) + .unwrap() +} + +pub struct DumpHandler { pub dump_path: PathBuf, pub db_path: PathBuf, pub update_file_store: UpdateFileStore, - pub uid: String, - pub update_db_size: usize, + pub task_store_size: usize, pub index_db_size: usize, + pub env: Arc, pub index_resolver: Arc>, } -impl DumpJob +impl DumpHandler where - U: IndexMetaStore, - I: IndexStore, + U: IndexMetaStore + Sync + Send + 'static, + I: IndexStore + Sync + Send + 'static, { - pub async fn run(self) -> Result<()> { + pub async fn run(&self, uid: String) -> Result<()> { trace!("Performing dump."); create_dir_all(&self.dump_path).await?; @@ -282,7 +294,7 @@ where let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; let temp_dump_path = temp_dump_dir.path().to_owned(); - let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size); + let meta = MetadataVersion::new_v4(self.index_db_size, self.task_store_size); let meta_path = temp_dump_path.join(META_FILE_NAME); let mut meta_file = File::create(&meta_path)?; serde_json::to_writer(&mut meta_file, &meta)?; @@ -292,25 +304,25 @@ where // TODO: this is blocking!! AuthController::dump(&self.db_path, &temp_dump_path)?; + TaskStore::dump( + self.env.clone(), + &self.dump_path, + self.update_file_store.clone(), + ) + .await?; self.index_resolver.dump(&self.dump_path).await?; - //TODO(marin): this is not right, the scheduler should dump itself, not do it here... - // self.scheduler - // .read() - // .await - // .dump(&temp_dump_path, self.update_file_store.clone()) - // .await?; - + let dump_path = self.dump_path.clone(); let dump_path = tokio::task::spawn_blocking(move || -> Result { // for now we simply copy the updates/updates_files // FIXME: We may copy more files than necessary, if new files are added while we are // performing the dump. We need a way to filter them out. - let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?; + let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; to_tar_gz(temp_dump_path, temp_dump_file.path()) .map_err(|e| DumpError::Internal(e.into()))?; - let dump_path = self.dump_path.join(self.uid).with_extension("dump"); + let dump_path = dump_path.join(uid).with_extension("dump"); temp_dump_file.persist(&dump_path)?; Ok(dump_path) diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index f89ebec4e..1eb61d9f0 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -19,7 +19,7 @@ use tokio::time::sleep; use uuid::Uuid; use crate::document_formats::{read_csv, read_json, read_ndjson}; -use crate::dump::load_dump; +use crate::dump::{self, load_dump, DumpHandler}; use crate::index::{ Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, }; @@ -27,9 +27,7 @@ use crate::options::{IndexerOpts, SchedulerConfig}; use crate::snapshot::{load_snapshot, SnapshotService}; use crate::tasks::error::TaskError; use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId}; -use crate::tasks::{ - BatchHandler, DumpHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore, -}; +use crate::tasks::{BatchHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore}; use error::Result; use self::error::IndexControllerError; @@ -222,14 +220,15 @@ impl IndexControllerBuilder { .dump_dst .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; - let dump_handler = Arc::new(DumpHandler::new( - update_file_store.clone(), + let dump_handler = Arc::new(DumpHandler { dump_path, - db_path.as_ref().clone(), - index_size, + db_path: db_path.as_ref().into(), + update_file_store: update_file_store.clone(), task_store_size, - index_resolver.clone(), - )); + index_db_size: index_size, + env: meta_env.clone(), + index_resolver: index_resolver.clone(), + }); let task_store = TaskStore::new(meta_env)?; // register all the batch handlers for use with the scheduler. @@ -421,7 +420,8 @@ where } pub async fn register_dump_task(&self) -> Result { - let content = TaskContent::Dump; + let uid = dump::generate_uid(); + let content = TaskContent::Dump { uid }; let task = self.task_store.register(None, content).await?; self.scheduler.read().await.notify(); Ok(task) diff --git a/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs b/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs index 057cf274f..fc506522f 100644 --- a/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs +++ b/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs @@ -1,101 +1,34 @@ -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use log::{error, trace}; -use time::{macros::format_description, OffsetDateTime}; - -use crate::dump::DumpJob; +use crate::dump::DumpHandler; use crate::index_resolver::index_store::IndexStore; use crate::index_resolver::meta_store::IndexMetaStore; -use crate::index_resolver::IndexResolver; use crate::tasks::batch::{Batch, BatchContent}; +use crate::tasks::task::{Task, TaskContent, TaskEvent, TaskResult}; use crate::tasks::BatchHandler; -use crate::update_file_store::UpdateFileStore; - -pub struct DumpHandler { - update_file_store: UpdateFileStore, - index_resolver: Arc>, - dump_path: PathBuf, - db_path: PathBuf, - update_db_size: usize, - index_db_size: usize, -} - -/// Generate uid from creation date -fn generate_uid() -> String { - OffsetDateTime::now_utc() - .format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )) - .unwrap() -} - -impl DumpHandler -where - U: IndexMetaStore + Send + Sync + 'static, - I: IndexStore + Send + Sync + 'static, -{ - pub fn new( - update_file_store: UpdateFileStore, - dump_path: impl AsRef, - db_path: impl AsRef, - index_db_size: usize, - update_db_size: usize, - index_resolver: Arc>, - ) -> Self { - Self { - update_file_store, - dump_path: dump_path.as_ref().into(), - db_path: db_path.as_ref().into(), - index_db_size, - update_db_size, - index_resolver, - } - } - - async fn create_dump(&self) { - let uid = generate_uid(); - - let task = DumpJob { - dump_path: self.dump_path.clone(), - db_path: self.db_path.clone(), - update_file_store: self.update_file_store.clone(), - uid: uid.clone(), - update_db_size: self.update_db_size, - index_db_size: self.index_db_size, - index_resolver: self.index_resolver.clone(), - }; - - let task_result = tokio::task::spawn_local(task.run()).await; - - match task_result { - Ok(Ok(())) => { - trace!("Dump succeed"); - } - Ok(Err(e)) => { - error!("Dump failed: {}", e); - } - Err(_) => { - error!("Dump panicked. Dump status set to failed"); - } - }; - } -} #[async_trait::async_trait] impl BatchHandler for DumpHandler where - U: IndexMetaStore + Send + Sync + 'static, - I: IndexStore + Send + Sync + 'static, + U: IndexMetaStore + Sync + Send + 'static, + I: IndexStore + Sync + Send + 'static, { fn accept(&self, batch: &Batch) -> bool { matches!(batch.content, BatchContent::Dump { .. }) } - async fn process_batch(&self, batch: Batch) -> Batch { - match batch.content { - BatchContent::Dump { .. } => { - self.create_dump().await; + async fn process_batch(&self, mut batch: Batch) -> Batch { + match &batch.content { + BatchContent::Dump(Task { + content: TaskContent::Dump { uid }, + .. + }) => { + match self.run(uid.clone()).await { + Ok(_) => { + batch + .content + .push_event(TaskEvent::succeeded(TaskResult::Other)); + } + Err(e) => batch.content.push_event(TaskEvent::failed(e.into())), + } batch } _ => unreachable!("invalid batch content for dump"), diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs index faa35f2da..bc01c4901 100644 --- a/meilisearch-lib/src/tasks/mod.rs +++ b/meilisearch-lib/src/tasks/mod.rs @@ -1,6 +1,6 @@ use async_trait::async_trait; -pub use batch_handlers::{dump_handler::DumpHandler, empty_handler::EmptyBatchHandler}; +pub use batch_handlers::empty_handler::EmptyBatchHandler; pub use scheduler::Scheduler; pub use task_store::TaskFilter; diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index f3018b782..1b3fd6daa 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -1,7 +1,6 @@ use std::cmp::Ordering; use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque}; use std::ops::{Deref, DerefMut}; -use std::path::Path; use std::slice; use std::sync::Arc; use std::time::Duration; @@ -13,7 +12,6 @@ use tokio::sync::{watch, RwLock}; use crate::options::SchedulerConfig; use crate::snapshot::SnapshotJob; -use crate::update_file_store::UpdateFileStore; use super::batch::{Batch, BatchContent}; use super::error::Result; @@ -276,10 +274,6 @@ impl Scheduler { Ok(this) } - pub async fn dump(&self, path: &Path, file_store: UpdateFileStore) -> Result<()> { - self.store.dump(path, file_store).await - } - fn register_task(&mut self, task: Task) { assert!(!task.is_finished()); self.tasks.insert(task); diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs index 41a536a1e..0e0aa8af2 100644 --- a/meilisearch-lib/src/tasks/task.rs +++ b/meilisearch-lib/src/tasks/task.rs @@ -62,6 +62,22 @@ pub enum TaskEvent { }, } +impl TaskEvent { + pub fn succeeded(result: TaskResult) -> Self { + Self::Succeded { + result, + timestamp: OffsetDateTime::now_utc(), + } + } + + pub fn failed(error: ResponseError) -> Self { + Self::Failed { + error, + timestamp: OffsetDateTime::now_utc(), + } + } +} + /// A task represents an operation that Meilisearch must do. /// It's stored on disk and executed from the lowest to highest Task id. /// Everytime a new task is created it has a higher Task id than the previous one. @@ -140,7 +156,9 @@ pub enum TaskContent { IndexUpdate { primary_key: Option, }, - Dump, + Dump { + uid: String, + }, } #[cfg(test)] diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index f580c8e26..610a5bdeb 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -204,13 +204,14 @@ impl TaskStore { } pub async fn dump( - &self, + env: Arc, dir_path: impl AsRef, update_file_store: UpdateFileStore, ) -> Result<()> { + let store = Self::new(env)?; let update_dir = dir_path.as_ref().join("updates"); let updates_file = update_dir.join("data.jsonl"); - let tasks = self.list_tasks(None, None, None).await?; + let tasks = store.list_tasks(None, None, None).await?; let dir_path = dir_path.as_ref().to_path_buf(); tokio::task::spawn_blocking(move || -> Result<()> { @@ -287,6 +288,14 @@ pub mod test { Ok(Self::Real(TaskStore::new(env)?)) } + pub async fn dump( + env: Arc, + path: impl AsRef, + update_file_store: UpdateFileStore, + ) -> Result<()> { + TaskStore::dump(env, path, update_file_store).await + } + pub fn mock(mocker: Mocker) -> Self { Self::Mock(Arc::new(mocker)) } @@ -329,17 +338,6 @@ pub mod test { } } - pub async fn dump( - &self, - path: impl AsRef, - update_file_store: UpdateFileStore, - ) -> Result<()> { - match self { - Self::Real(s) => s.dump(path, update_file_store).await, - Self::Mock(m) => unsafe { m.get("dump").call((path, update_file_store)) }, - } - } - pub async fn register( &self, index_uid: Option, From 4778884105e273711535a0be434045476c6bfb0f Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 19 May 2022 20:19:34 +0200 Subject: [PATCH 12/33] remove dump status route --- meilisearch-http/src/routes/dump.rs | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 7d32fdda5..55469b0b4 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -2,7 +2,6 @@ use actix_web::{web, HttpRequest, HttpResponse}; use log::debug; use meilisearch_error::ResponseError; use meilisearch_lib::MeiliSearch; -use serde::{Deserialize, Serialize}; use serde_json::json; use crate::analytics::Analytics; @@ -11,10 +10,7 @@ use crate::extractors::sequential_extractor::SeqHandler; use crate::task::SummarizedTaskView; pub fn configure(cfg: &mut web::ServiceConfig) { - cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))) - .service( - web::resource("/{dump_uid}/status").route(web::get().to(SeqHandler(get_dump_status))), - ); + cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); } pub async fn create_dump( @@ -29,24 +25,3 @@ pub async fn create_dump( debug!("returns: {:?}", res); Ok(HttpResponse::Accepted().json(res)) } - -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -struct DumpStatusResponse { - status: String, -} - -#[derive(Deserialize)] -struct DumpParam { - dump_uid: String, -} - -async fn get_dump_status( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, -) -> Result { - todo!(); - - // debug!("returns: {:?}", res); - // Ok(HttpResponse::Ok().json(res)) -} From 61035a3ea465a6066577b37ce873433f113fb6a7 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 23 May 2022 10:54:49 +0200 Subject: [PATCH 13/33] create dump v5 --- meilisearch-http/src/task.rs | 11 +++++----- meilisearch-lib/src/dump/loaders/v3.rs | 1 + meilisearch-lib/src/dump/loaders/v4.rs | 5 +++-- meilisearch-lib/src/dump/mod.rs | 28 ++++++++++++++++---------- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index f10d7e110..397fed618 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -24,7 +24,7 @@ enum TaskType { DocumentDeletion, SettingsUpdate, ClearAll, - Dump, + DumpCreation, } impl From for TaskType { @@ -44,7 +44,7 @@ impl From for TaskType { TaskContent::IndexDeletion => TaskType::IndexDeletion, TaskContent::IndexCreation { .. } => TaskType::IndexCreation, TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, - TaskContent::Dump { .. } => TaskType::Dump, + TaskContent::Dump { .. } => TaskType::DumpCreation, _ => unreachable!("unexpected task type"), } } @@ -220,9 +220,10 @@ impl From for TaskView { TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key }), ), - TaskContent::Dump { uid } => { - (TaskType::Dump, Some(TaskDetails::Dump { dump_uid: uid })) - } + TaskContent::Dump { uid } => ( + TaskType::DumpCreation, + Some(TaskDetails::Dump { dump_uid: uid }), + ), }; // An event always has at least one event: "Created" diff --git a/meilisearch-lib/src/dump/loaders/v3.rs b/meilisearch-lib/src/dump/loaders/v3.rs index 0a2ea438b..8e76b67e0 100644 --- a/meilisearch-lib/src/dump/loaders/v3.rs +++ b/meilisearch-lib/src/dump/loaders/v3.rs @@ -66,6 +66,7 @@ pub fn load_dump( index_db_size, meta_env_size, indexing_options, + "V5", ) } diff --git a/meilisearch-lib/src/dump/loaders/v4.rs b/meilisearch-lib/src/dump/loaders/v4.rs index c898f83b1..7f0ade714 100644 --- a/meilisearch-lib/src/dump/loaders/v4.rs +++ b/meilisearch-lib/src/dump/loaders/v4.rs @@ -19,10 +19,11 @@ pub fn load_dump( index_db_size: usize, meta_env_size: usize, indexing_options: &IndexerOpts, + version: &str, ) -> anyhow::Result<()> { info!( - "Loading dump from {}, dump database version: {}, dump version: V4", - meta.dump_date, meta.db_version + "Loading dump from {}, dump database version: {}, dump version: {}", + meta.dump_date, meta.db_version, version ); let mut options = EnvOpenOptions::new(); diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs index b14b356fd..084fbd63f 100644 --- a/meilisearch-lib/src/dump/mod.rs +++ b/meilisearch-lib/src/dump/mod.rs @@ -69,6 +69,8 @@ pub enum MetadataVersion { V2(Metadata), V3(Metadata), V4(Metadata), + // V5 is forward compatible with V4 but not backward compatible. + V5(Metadata), } impl MetadataVersion { @@ -80,6 +82,7 @@ impl MetadataVersion { meta_env_size: usize, indexing_options: &IndexerOpts, ) -> anyhow::Result<()> { + let version = self.version(); match self { MetadataVersion::V1(_meta) => { anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") @@ -100,46 +103,49 @@ impl MetadataVersion { meta_env_size, indexing_options, )?, - MetadataVersion::V4(meta) => v4::load_dump( + MetadataVersion::V4(meta) | MetadataVersion::V5(meta) => v4::load_dump( meta, src, dst, index_db_size, meta_env_size, indexing_options, + version, )?, } Ok(()) } - pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self { + pub fn new_v5(index_db_size: usize, update_db_size: usize) -> Self { let meta = Metadata::new(index_db_size, update_db_size); - Self::V4(meta) + Self::V5(meta) } pub fn db_version(&self) -> &str { match self { Self::V1(meta) => &meta.db_version, - Self::V2(meta) | Self::V3(meta) | Self::V4(meta) => &meta.db_version, + Self::V2(meta) | Self::V3(meta) | Self::V4(meta) | Self::V5(meta) => &meta.db_version, } } - pub fn version(&self) -> &str { + pub fn version(&self) -> &'static str { match self { MetadataVersion::V1(_) => "V1", MetadataVersion::V2(_) => "V2", MetadataVersion::V3(_) => "V3", MetadataVersion::V4(_) => "V4", + MetadataVersion::V5(_) => "V5", } } pub fn dump_date(&self) -> Option<&OffsetDateTime> { match self { MetadataVersion::V1(_) => None, - MetadataVersion::V2(meta) | MetadataVersion::V3(meta) | MetadataVersion::V4(meta) => { - Some(&meta.dump_date) - } + MetadataVersion::V2(meta) + | MetadataVersion::V3(meta) + | MetadataVersion::V4(meta) + | MetadataVersion::V5(meta) => Some(&meta.dump_date), } } } @@ -294,7 +300,7 @@ where let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; let temp_dump_path = temp_dump_dir.path().to_owned(); - let meta = MetadataVersion::new_v4(self.index_db_size, self.task_store_size); + let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size); let meta_path = temp_dump_path.join(META_FILE_NAME); let mut meta_file = File::create(&meta_path)?; serde_json::to_writer(&mut meta_file, &meta)?; @@ -306,11 +312,11 @@ where AuthController::dump(&self.db_path, &temp_dump_path)?; TaskStore::dump( self.env.clone(), - &self.dump_path, + &temp_dump_path, self.update_file_store.clone(), ) .await?; - self.index_resolver.dump(&self.dump_path).await?; + self.index_resolver.dump(&temp_dump_path).await?; let dump_path = self.dump_path.clone(); let dump_path = tokio::task::spawn_blocking(move || -> Result { From f0aceb4fba7a0598874803bf0fbc99db605e7486 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 23 May 2022 15:58:02 +0200 Subject: [PATCH 14/33] remove unused files --- meilisearch-lib/src/dump/actor.rs | 190 ------------------------ meilisearch-lib/src/dump/handle_impl.rs | 26 ---- meilisearch-lib/src/dump/message.rs | 13 -- 3 files changed, 229 deletions(-) delete mode 100644 meilisearch-lib/src/dump/actor.rs delete mode 100644 meilisearch-lib/src/dump/handle_impl.rs delete mode 100644 meilisearch-lib/src/dump/message.rs diff --git a/meilisearch-lib/src/dump/actor.rs b/meilisearch-lib/src/dump/actor.rs deleted file mode 100644 index b7f615e44..000000000 --- a/meilisearch-lib/src/dump/actor.rs +++ /dev/null @@ -1,190 +0,0 @@ -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use async_stream::stream; -use futures::{lock::Mutex, stream::StreamExt}; -use log::{error, trace}; -use time::macros::format_description; -use time::OffsetDateTime; -use tokio::sync::{mpsc, oneshot, RwLock}; - -use super::error::{DumpError, Result}; -use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus}; -use crate::tasks::Scheduler; -use crate::update_file_store::UpdateFileStore; - -pub const CONCURRENT_DUMP_MSG: usize = 10; - -pub struct DumpActor { - inbox: Option>, - update_file_store: UpdateFileStore, - scheduler: Arc>, - dump_path: PathBuf, - analytics_path: PathBuf, - lock: Arc>, - dump_infos: Arc>>, - update_db_size: usize, - index_db_size: usize, -} - -/// Generate uid from creation date -fn generate_uid() -> String { - OffsetDateTime::now_utc() - .format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )) - .unwrap() -} - -impl DumpActor { - pub fn new( - inbox: mpsc::Receiver, - update_file_store: UpdateFileStore, - scheduler: Arc>, - dump_path: impl AsRef, - analytics_path: impl AsRef, - index_db_size: usize, - update_db_size: usize, - ) -> Self { - let dump_infos = Arc::new(RwLock::new(HashMap::new())); - let lock = Arc::new(Mutex::new(())); - Self { - inbox: Some(inbox), - scheduler, - update_file_store, - dump_path: dump_path.as_ref().into(), - analytics_path: analytics_path.as_ref().into(), - dump_infos, - lock, - index_db_size, - update_db_size, - } - } - - pub async fn run(mut self) { - trace!("Started dump actor."); - - let mut inbox = self - .inbox - .take() - .expect("Dump Actor must have a inbox at this point."); - - let stream = stream! { - loop { - match inbox.recv().await { - Some(msg) => yield msg, - None => break, - } - } - }; - - stream - .for_each_concurrent(Some(CONCURRENT_DUMP_MSG), |msg| self.handle_message(msg)) - .await; - - error!("Dump actor stopped."); - } - - async fn handle_message(&self, msg: DumpMsg) { - use DumpMsg::*; - - match msg { - CreateDump { ret } => { - let _ = self.handle_create_dump(ret).await; - } - DumpInfo { ret, uid } => { - let _ = ret.send(self.handle_dump_info(uid).await); - } - } - } - - async fn handle_create_dump(&self, ret: oneshot::Sender>) { - let uid = generate_uid(); - let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress); - - let _lock = match self.lock.try_lock() { - Some(lock) => lock, - None => { - ret.send(Err(DumpError::DumpAlreadyRunning)) - .expect("Dump actor is dead"); - return; - } - }; - - self.dump_infos - .write() - .await - .insert(uid.clone(), info.clone()); - - ret.send(Ok(info)).expect("Dump actor is dead"); - - let task = DumpJob { - dump_path: self.dump_path.clone(), - db_path: self.analytics_path.clone(), - update_file_store: self.update_file_store.clone(), - uid: uid.clone(), - update_db_size: self.update_db_size, - index_db_size: self.index_db_size, - }; - - let task_result = tokio::task::spawn_local(task.run()).await; - - let mut dump_infos = self.dump_infos.write().await; - let dump_infos = dump_infos - .get_mut(&uid) - .expect("dump entry deleted while lock was acquired"); - - match task_result { - Ok(Ok(())) => { - dump_infos.done(); - trace!("Dump succeed"); - } - Ok(Err(e)) => { - dump_infos.with_error(e.to_string()); - error!("Dump failed: {}", e); - } - Err(_) => { - dump_infos.with_error("Unexpected error while performing dump.".to_string()); - error!("Dump panicked. Dump status set to failed"); - } - }; - } - - async fn handle_dump_info(&self, uid: String) -> Result { - match self.dump_infos.read().await.get(&uid) { - Some(info) => Ok(info.clone()), - _ => Err(DumpError::DumpDoesNotExist(uid)), - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_generate_uid() { - let current = OffsetDateTime::now_utc(); - - let uid = generate_uid(); - let (date, time) = uid.split_once('-').unwrap(); - - let date = time::Date::parse( - date, - &format_description!("[year repr:full][month repr:numerical][day padding:zero]"), - ) - .unwrap(); - let time = time::Time::parse( - time, - &format_description!( - "[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - ), - ) - .unwrap(); - let datetime = time::PrimitiveDateTime::new(date, time); - let datetime = datetime.assume_utc(); - - assert!(current - datetime < time::Duration::SECOND); - } -} diff --git a/meilisearch-lib/src/dump/handle_impl.rs b/meilisearch-lib/src/dump/handle_impl.rs deleted file mode 100644 index 9577b3663..000000000 --- a/meilisearch-lib/src/dump/handle_impl.rs +++ /dev/null @@ -1,26 +0,0 @@ -use tokio::sync::{mpsc, oneshot}; - -use super::error::Result; -use super::{DumpActorHandle, DumpMsg}; - -#[derive(Clone)] -pub struct DumpActorHandleImpl { - pub sender: mpsc::Sender, -} - -#[async_trait::async_trait] -impl DumpActorHandle for DumpActorHandleImpl { - async fn create_dump(&self) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = DumpMsg::CreateDump { ret }; - let _ = self.sender.send(msg).await; - receiver.await.expect("IndexActor has been killed") - } - - async fn dump_info(&self, uid: String) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = DumpMsg::DumpInfo { ret, uid }; - let _ = self.sender.send(msg).await; - receiver.await.expect("IndexActor has been killed") - } -} diff --git a/meilisearch-lib/src/dump/message.rs b/meilisearch-lib/src/dump/message.rs deleted file mode 100644 index 8ebeb3b57..000000000 --- a/meilisearch-lib/src/dump/message.rs +++ /dev/null @@ -1,13 +0,0 @@ -use tokio::sync::oneshot; - -use super::error::Result; - -pub enum DumpMsg { - CreateDump { - ret: oneshot::Sender>, - }, - DumpInfo { - uid: String, - ret: oneshot::Sender>, - }, -} From 8743d73973130a4416287c0e97fe654c2c5669d8 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 23 May 2022 16:01:43 +0200 Subject: [PATCH 15/33] move DumpHandler to own module --- meilisearch-lib/src/dump/handler.rs | 89 ++++++++++++++++++++++++++ meilisearch-lib/src/dump/mod.rs | 98 ++--------------------------- 2 files changed, 95 insertions(+), 92 deletions(-) create mode 100644 meilisearch-lib/src/dump/handler.rs diff --git a/meilisearch-lib/src/dump/handler.rs b/meilisearch-lib/src/dump/handler.rs new file mode 100644 index 000000000..b168e162a --- /dev/null +++ b/meilisearch-lib/src/dump/handler.rs @@ -0,0 +1,89 @@ +use std::{fs::File, path::PathBuf, sync::Arc}; + +use log::{info, trace}; +use meilisearch_auth::AuthController; +use milli::heed::Env; +use time::{macros::format_description, OffsetDateTime}; +use tokio::fs::create_dir_all; + +use crate::analytics; +use crate::compression::to_tar_gz; +use crate::dump::error::{DumpError, Result}; +use crate::dump::{MetadataVersion, META_FILE_NAME}; +use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore, IndexResolver}; +use crate::tasks::TaskStore; +use crate::update_file_store::UpdateFileStore; + +/// Generate uid from creation date +pub fn generate_uid() -> String { + OffsetDateTime::now_utc() + .format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )) + .unwrap() +} + +pub struct DumpHandler { + pub dump_path: PathBuf, + pub db_path: PathBuf, + pub update_file_store: UpdateFileStore, + pub task_store_size: usize, + pub index_db_size: usize, + pub env: Arc, + pub index_resolver: Arc>, +} + +impl DumpHandler +where + U: IndexMetaStore + Sync + Send + 'static, + I: IndexStore + Sync + Send + 'static, +{ + pub async fn run(&self, uid: String) -> Result<()> { + trace!("Performing dump."); + + create_dir_all(&self.dump_path).await?; + + let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size); + let meta_path = temp_dump_path.join(META_FILE_NAME); + // TODO: blocking + let mut meta_file = File::create(&meta_path)?; + serde_json::to_writer(&mut meta_file, &meta)?; + analytics::copy_user_id(&self.db_path, &temp_dump_path); + + create_dir_all(&temp_dump_path.join("indexes")).await?; + + // TODO: this is blocking!! + AuthController::dump(&self.db_path, &temp_dump_path)?; + TaskStore::dump( + self.env.clone(), + &temp_dump_path, + self.update_file_store.clone(), + ) + .await?; + self.index_resolver.dump(&temp_dump_path).await?; + + let dump_path = self.dump_path.clone(); + let dump_path = tokio::task::spawn_blocking(move || -> Result { + // for now we simply copy the updates/updates_files + // FIXME: We may copy more files than necessary, if new files are added while we are + // performing the dump. We need a way to filter them out. + + let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; + to_tar_gz(temp_dump_path, temp_dump_file.path()) + .map_err(|e| DumpError::Internal(e.into()))?; + + let dump_path = dump_path.join(uid).with_extension("dump"); + temp_dump_file.persist(&dump_path)?; + + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(()) + } +} diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs index 084fbd63f..c80554301 100644 --- a/meilisearch-lib/src/dump/mod.rs +++ b/meilisearch-lib/src/dump/mod.rs @@ -1,37 +1,24 @@ use std::fs::File; -use std::path::{Path, PathBuf}; -use std::sync::Arc; +use std::path::Path; use anyhow::bail; -use log::{info, trace}; -use meilisearch_auth::AuthController; -use milli::heed::Env; +use log::info; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use tempfile::TempDir; -use time::macros::format_description; -use tokio::fs::create_dir_all; -use crate::analytics; -use crate::compression::{from_tar_gz, to_tar_gz}; -use crate::dump::error::DumpError; -use crate::index_resolver::index_store::IndexStore; -use crate::index_resolver::meta_store::IndexMetaStore; -use crate::index_resolver::IndexResolver; +use crate::compression::from_tar_gz; use crate::options::IndexerOpts; -use crate::tasks::TaskStore; -use crate::update_file_store::UpdateFileStore; -use error::Result; use self::loaders::{v2, v3, v4}; -// mod actor; +pub use handler::{generate_uid, DumpHandler}; + mod compat; pub mod error; -// mod handle_impl; +mod handler; mod loaders; -// mod message; const META_FILE_NAME: &str = "metadata.json"; @@ -268,79 +255,6 @@ fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result< Ok(()) } -/// Generate uid from creation date -pub fn generate_uid() -> String { - OffsetDateTime::now_utc() - .format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )) - .unwrap() -} - -pub struct DumpHandler { - pub dump_path: PathBuf, - pub db_path: PathBuf, - pub update_file_store: UpdateFileStore, - pub task_store_size: usize, - pub index_db_size: usize, - pub env: Arc, - pub index_resolver: Arc>, -} - -impl DumpHandler -where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, -{ - pub async fn run(&self, uid: String) -> Result<()> { - trace!("Performing dump."); - - create_dir_all(&self.dump_path).await?; - - let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size); - let meta_path = temp_dump_path.join(META_FILE_NAME); - let mut meta_file = File::create(&meta_path)?; - serde_json::to_writer(&mut meta_file, &meta)?; - analytics::copy_user_id(&self.db_path, &temp_dump_path); - - create_dir_all(&temp_dump_path.join("indexes")).await?; - - // TODO: this is blocking!! - AuthController::dump(&self.db_path, &temp_dump_path)?; - TaskStore::dump( - self.env.clone(), - &temp_dump_path, - self.update_file_store.clone(), - ) - .await?; - self.index_resolver.dump(&temp_dump_path).await?; - - let dump_path = self.dump_path.clone(); - let dump_path = tokio::task::spawn_blocking(move || -> Result { - // for now we simply copy the updates/updates_files - // FIXME: We may copy more files than necessary, if new files are added while we are - // performing the dump. We need a way to filter them out. - - let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; - to_tar_gz(temp_dump_path, temp_dump_file.path()) - .map_err(|e| DumpError::Internal(e.into()))?; - - let dump_path = dump_path.join(uid).with_extension("dump"); - temp_dump_file.persist(&dump_path)?; - - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - - Ok(()) - } -} - #[cfg(test)] mod test { use nelson::Mocker; From 7b47e4e87a4c4d80c718fb2b2c3175a598a5bb50 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 23 May 2022 16:30:06 +0200 Subject: [PATCH 16/33] snapshot batch handler --- meilisearch-lib/src/index_controller/mod.rs | 5 ++- meilisearch-lib/src/snapshot.rs | 2 +- .../src/tasks/batch_handlers/mod.rs | 1 + .../tasks/batch_handlers/snapshot_handler.rs | 31 +++++++++++++++++++ meilisearch-lib/src/tasks/mod.rs | 1 + meilisearch-lib/src/tasks/scheduler.rs | 6 +--- 6 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 meilisearch-lib/src/tasks/batch_handlers/snapshot_handler.rs diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 1eb61d9f0..039bd8dfa 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -27,7 +27,9 @@ use crate::options::{IndexerOpts, SchedulerConfig}; use crate::snapshot::{load_snapshot, SnapshotService}; use crate::tasks::error::TaskError; use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId}; -use crate::tasks::{BatchHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore}; +use crate::tasks::{ + BatchHandler, EmptyBatchHandler, Scheduler, SnapshotHandler, TaskFilter, TaskStore, +}; use error::Result; use self::error::IndexControllerError; @@ -235,6 +237,7 @@ impl IndexControllerBuilder { let handlers: Vec> = vec![ index_resolver.clone(), dump_handler, + Arc::new(SnapshotHandler), // dummy handler to catch all empty batches Arc::new(EmptyBatchHandler), ]; diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs index 6dda0f3e8..527195729 100644 --- a/meilisearch-lib/src/snapshot.rs +++ b/meilisearch-lib/src/snapshot.rs @@ -38,7 +38,7 @@ impl SnapshotService { meta_env_size: self.meta_env_size, index_size: self.index_size, }; - self.scheduler.write().await.register_snapshot(snapshot_job); + self.scheduler.write().await.schedule_snapshot(snapshot_job); sleep(self.snapshot_period).await; } } diff --git a/meilisearch-lib/src/tasks/batch_handlers/mod.rs b/meilisearch-lib/src/tasks/batch_handlers/mod.rs index f72c1b760..9199e872d 100644 --- a/meilisearch-lib/src/tasks/batch_handlers/mod.rs +++ b/meilisearch-lib/src/tasks/batch_handlers/mod.rs @@ -1,3 +1,4 @@ pub mod dump_handler; pub mod empty_handler; mod index_resolver_handler; +pub mod snapshot_handler; diff --git a/meilisearch-lib/src/tasks/batch_handlers/snapshot_handler.rs b/meilisearch-lib/src/tasks/batch_handlers/snapshot_handler.rs new file mode 100644 index 000000000..2948fb4ff --- /dev/null +++ b/meilisearch-lib/src/tasks/batch_handlers/snapshot_handler.rs @@ -0,0 +1,31 @@ +use crate::tasks::batch::{Batch, BatchContent}; +use crate::tasks::BatchHandler; + +pub struct SnapshotHandler; + +#[async_trait::async_trait] +impl BatchHandler for SnapshotHandler { + fn accept(&self, batch: &Batch) -> bool { + match batch.content { + BatchContent::Snapshot(_) => true, + _ => false, + } + } + + async fn process_batch(&self, batch: Batch) -> Batch { + match batch.content { + BatchContent::Snapshot(job) => { + if let Err(e) = job.run().await { + log::error!("snapshot error: {e}"); + } + } + _ => unreachable!(), + } + + Batch::empty() + } + + async fn finish(&self, _: &Batch) { + () + } +} diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs index bc01c4901..4c51ec207 100644 --- a/meilisearch-lib/src/tasks/mod.rs +++ b/meilisearch-lib/src/tasks/mod.rs @@ -1,6 +1,7 @@ use async_trait::async_trait; pub use batch_handlers::empty_handler::EmptyBatchHandler; +pub use batch_handlers::snapshot_handler::SnapshotHandler; pub use scheduler::Scheduler; pub use task_store::TaskFilter; diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 1b3fd6daa..6089efd7f 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -279,10 +279,6 @@ impl Scheduler { self.tasks.insert(task); } - pub fn register_snapshot(&mut self, job: SnapshotJob) { - self.snapshots.push_back(job); - } - /// Clears the processing list, this method should be called when the processing of a batch is finished. pub fn finish(&mut self) { self.processing = Processing::Nothing; @@ -340,7 +336,7 @@ impl Scheduler { Ok(tasks) } - pub async fn schedule_snapshot(&mut self, job: SnapshotJob) { + pub fn schedule_snapshot(&mut self, job: SnapshotJob) { self.snapshots.push_back(job); self.notify(); } From 0f9c134114051432090f395ff1f5ae2cc33eb121 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 24 May 2022 16:47:10 +0200 Subject: [PATCH 17/33] fix tests --- meilisearch-http/tests/auth/authorization.rs | 1 - meilisearch-lib/src/dump/mod.rs | 118 ------------------- meilisearch-lib/src/index_controller/mod.rs | 15 +-- meilisearch-lib/src/index_resolver/mod.rs | 50 ++++---- meilisearch-lib/src/tasks/scheduler.rs | 33 ++---- 5 files changed, 45 insertions(+), 172 deletions(-) diff --git a/meilisearch-http/tests/auth/authorization.rs b/meilisearch-http/tests/auth/authorization.rs index 30df2dd2d..25f32eb12 100644 --- a/meilisearch-http/tests/auth/authorization.rs +++ b/meilisearch-http/tests/auth/authorization.rs @@ -45,7 +45,6 @@ pub static AUTHORIZATIONS: Lazy hashset!{"stats.get", "*"}, ("GET", "/stats") => hashset!{"stats.get", "*"}, ("POST", "/dumps") => hashset!{"dumps.create", "*"}, - ("GET", "/dumps/0/status") => hashset!{"dumps.get", "*"}, ("GET", "/version") => hashset!{"version", "*"}, } }); diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs index c80554301..ab1c63d6d 100644 --- a/meilisearch-lib/src/dump/mod.rs +++ b/meilisearch-lib/src/dump/mod.rs @@ -254,121 +254,3 @@ fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result< Ok(()) } - -#[cfg(test)] -mod test { - use nelson::Mocker; - use once_cell::sync::Lazy; - - use super::*; - use crate::index_resolver::error::IndexResolverError; - use crate::options::SchedulerConfig; - use crate::tasks::error::Result as TaskResult; - use crate::tasks::task::{Task, TaskId}; - use crate::tasks::{BatchHandler, TaskFilter, TaskStore}; - use crate::update_file_store::UpdateFileStore; - - fn setup() { - static SETUP: Lazy<()> = Lazy::new(|| { - if cfg!(windows) { - std::env::set_var("TMP", "."); - } else { - std::env::set_var("TMPDIR", "."); - } - }); - - // just deref to make sure the env is setup - *SETUP - } - - #[actix_rt::test] - async fn test_dump_normal() { - setup(); - - let tmp = tempfile::tempdir().unwrap(); - - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - - let mut performer = BatchHandler::new(); - performer - .expect_process_job() - .once() - .returning(|j| match j { - Job::Dump { ret, .. } => { - let (sender, _receiver) = oneshot::channel(); - ret.send(Ok(sender)).unwrap(); - } - _ => unreachable!(), - }); - let performer = Arc::new(performer); - let mocker = Mocker::default(); - mocker - .when::<(&Path, UpdateFileStore), TaskResult<()>>("dump") - .then(|_| Ok(())); - mocker - .when::<(Option, Option, Option), TaskResult>>( - "list_tasks", - ) - .then(|_| Ok(Vec::new())); - let store = TaskStore::mock(mocker); - let config = SchedulerConfig::default(); - - let scheduler = Scheduler::new(store, performer, config).unwrap(); - - let task = DumpJob { - dump_path: tmp.path().into(), - // this should do nothing - update_file_store, - db_path: tmp.path().into(), - uid: String::from("test"), - update_db_size: 4096 * 10, - index_db_size: 4096 * 10, - scheduler, - }; - - task.run().await.unwrap(); - } - - #[actix_rt::test] - async fn error_performing_dump() { - let tmp = tempfile::tempdir().unwrap(); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let mocker = Mocker::default(); - mocker - .when::<(Option, Option, Option), TaskResult>>( - "list_tasks", - ) - .then(|_| Ok(Vec::new())); - let task_store = TaskStore::mock(mocker); - let mut performer = BatchHandler::new(); - performer - .expect_process_job() - .once() - .returning(|job| match job { - Job::Dump { ret, .. } => drop(ret.send(Err(IndexResolverError::BadlyFormatted( - "blabla".to_string(), - )))), - _ => unreachable!(), - }); - let performer = Arc::new(performer); - - let scheduler = Scheduler::new(task_store, performer, SchedulerConfig::default()).unwrap(); - - let task = DumpJob { - dump_path: tmp.path().into(), - // this should do nothing - db_path: tmp.path().into(), - update_file_store: file_store, - uid: String::from("test"), - update_db_size: 4096 * 10, - index_db_size: 4096 * 10, - scheduler, - }; - - assert!(task.run().await.is_err()); - } -} diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 039bd8dfa..b34523fd5 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -664,13 +664,11 @@ mod test { index_resolver: Arc>, task_store: TaskStore, update_file_store: UpdateFileStore, - dump_handle: DumpActorHandleImpl, scheduler: Arc>, ) -> Self { IndexController { index_resolver, task_store, - dump_handle, update_file_store, scheduler, } @@ -754,19 +752,12 @@ mod test { let task_store = TaskStore::mock(task_store_mocker); let scheduler = Scheduler::new( task_store.clone(), - index_resolver.clone(), + vec![index_resolver.clone()], SchedulerConfig::default(), ) .unwrap(); - let (sender, _) = mpsc::channel(1); - let dump_handle = DumpActorHandleImpl { sender }; - let index_controller = IndexController::mock( - index_resolver, - task_store, - update_file_store, - dump_handle, - scheduler, - ); + let index_controller = + IndexController::mock(index_resolver, task_store, update_file_store, scheduler); let r = index_controller .search(index_uid.to_owned(), query.clone()) diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index f463cd24d..cc0308f9e 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -411,15 +411,21 @@ mod test { use nelson::Mocker; use proptest::prelude::*; - use crate::index::{ - error::{IndexError, Result as IndexResult}, - Checked, IndexMeta, IndexStats, Settings, + use crate::{ + index::{ + error::{IndexError, Result as IndexResult}, + Checked, IndexMeta, IndexStats, Settings, + }, + tasks::{batch::Batch, BatchHandler}, }; use index_store::MockIndexStore; use meta_store::MockIndexMetaStore; + // TODO: ignoring this test, it has become too complex to maintain, and rather implement + // handler logic test. proptest! { #[test] + #[ignore] fn test_process_task( task in any::().prop_filter("IndexUid should be Some", |s| s.index_uid.is_some()), index_exists in any::(), @@ -497,7 +503,7 @@ mod test { .then(move |_| result()); } } - TaskContent::Dump { path: _ } => { } + TaskContent::Dump { .. } => { } } mocker.when::<(), IndexResult>("stats") @@ -561,24 +567,26 @@ mod test { let update_file_store = UpdateFileStore::mock(mocker); let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store); - let batch = Batch { id: 1, created_at: OffsetDateTime::now_utc(), tasks: vec![task.clone()] }; - let result = index_resolver.process_batch(batch).await; + let batch = Batch { id: Some(1), created_at: OffsetDateTime::now_utc(), content: crate::tasks::batch::BatchContent::IndexUpdate(task.clone()) }; + if index_resolver.accept(&batch) { + let result = index_resolver.process_batch(batch).await; - // Test for some expected output scenarios: - // Index creation and deletion cannot fail because of a failed index op, since they - // don't perform index ops. - if index_op_fails && !matches!(task.content, TaskContent::IndexDeletion | TaskContent::IndexCreation { primary_key: None } | TaskContent::IndexUpdate { primary_key: None } | TaskContent::Dump { .. }) - || (index_exists && matches!(task.content, TaskContent::IndexCreation { .. })) - || (!index_exists && matches!(task.content, TaskContent::IndexDeletion - | TaskContent::DocumentDeletion(_) - | TaskContent::SettingsUpdate { is_deletion: true, ..} - | TaskContent::SettingsUpdate { allow_index_creation: false, ..} - | TaskContent::DocumentAddition { allow_index_creation: false, ..} - | TaskContent::IndexUpdate { .. } )) - { - assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Failed { .. }), "{:?}", result); - } else { - assert!(matches!(result.tasks[0].events.last().unwrap(), TaskEvent::Succeded { .. }), "{:?}", result); + // Test for some expected output scenarios: + // Index creation and deletion cannot fail because of a failed index op, since they + // don't perform index ops. + if index_op_fails && !matches!(task.content, TaskContent::IndexDeletion | TaskContent::IndexCreation { primary_key: None } | TaskContent::IndexUpdate { primary_key: None } | TaskContent::Dump { .. }) + || (index_exists && matches!(task.content, TaskContent::IndexCreation { .. })) + || (!index_exists && matches!(task.content, TaskContent::IndexDeletion + | TaskContent::DocumentDeletion(_) + | TaskContent::SettingsUpdate { is_deletion: true, ..} + | TaskContent::SettingsUpdate { allow_index_creation: false, ..} + | TaskContent::DocumentAddition { allow_index_creation: false, ..} + | TaskContent::IndexUpdate { .. } )) + { + assert!(matches!(result.content.first().unwrap().events.last().unwrap(), TaskEvent::Failed { .. }), "{:?}", result); + } else { + assert!(matches!(result.content.first().unwrap().events.last().unwrap(), TaskEvent::Succeded { .. }), "{:?}", result); + } } }); } diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 6089efd7f..177cc0229 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -411,7 +411,7 @@ impl Scheduler { } } -#[derive(Debug, Default)] +#[derive(Debug, Default, PartialEq)] pub enum Processing { DocumentAdditions(Vec), IndexUpdate(TaskId), @@ -586,31 +586,24 @@ mod test { queue.insert(gen_task(6, "test2", content.clone())); queue.insert(gen_task(7, "test1", content)); - let mut batch = Vec::new(); - let config = SchedulerConfig::default(); - make_batch(&mut queue, &mut batch, &config); - assert_eq!(batch, &[0, 4]); + let batch = make_batch(&mut queue, &config); + assert_eq!(batch, Processing::DocumentAdditions(vec![0, 4])); - batch.clear(); - make_batch(&mut queue, &mut batch, &config); - assert_eq!(batch, &[1]); + let batch = make_batch(&mut queue, &config); + assert_eq!(batch, Processing::DocumentAdditions(vec![1])); - batch.clear(); - make_batch(&mut queue, &mut batch, &config); - assert_eq!(batch, &[2]); + let batch = make_batch(&mut queue, &config); + assert_eq!(batch, Processing::IndexUpdate(2)); - batch.clear(); - make_batch(&mut queue, &mut batch, &config); - assert_eq!(batch, &[3, 6]); + let batch = make_batch(&mut queue, &config); + assert_eq!(batch, Processing::DocumentAdditions(vec![3, 6])); - batch.clear(); - make_batch(&mut queue, &mut batch, &config); - assert_eq!(batch, &[5]); + let batch = make_batch(&mut queue, &config); + assert_eq!(batch, Processing::IndexUpdate(5)); - batch.clear(); - make_batch(&mut queue, &mut batch, &config); - assert_eq!(batch, &[7]); + let batch = make_batch(&mut queue, &config); + assert_eq!(batch, Processing::DocumentAdditions(vec![7])); assert!(queue.is_empty()); } From 64654ef7c34665bac3310f4b2a2a8b3bec62498a Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 08:46:06 +0200 Subject: [PATCH 18/33] rename batch_handler to handler --- .../src/tasks/{batch_handlers => handlers}/dump_handler.rs | 0 .../src/tasks/{batch_handlers => handlers}/empty_handler.rs | 0 .../{batch_handlers => handlers}/index_resolver_handler.rs | 0 .../src/tasks/{batch_handlers => handlers}/mod.rs | 0 .../tasks/{batch_handlers => handlers}/snapshot_handler.rs | 0 meilisearch-lib/src/tasks/mod.rs | 6 +++--- 6 files changed, 3 insertions(+), 3 deletions(-) rename meilisearch-lib/src/tasks/{batch_handlers => handlers}/dump_handler.rs (100%) rename meilisearch-lib/src/tasks/{batch_handlers => handlers}/empty_handler.rs (100%) rename meilisearch-lib/src/tasks/{batch_handlers => handlers}/index_resolver_handler.rs (100%) rename meilisearch-lib/src/tasks/{batch_handlers => handlers}/mod.rs (100%) rename meilisearch-lib/src/tasks/{batch_handlers => handlers}/snapshot_handler.rs (100%) diff --git a/meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs b/meilisearch-lib/src/tasks/handlers/dump_handler.rs similarity index 100% rename from meilisearch-lib/src/tasks/batch_handlers/dump_handler.rs rename to meilisearch-lib/src/tasks/handlers/dump_handler.rs diff --git a/meilisearch-lib/src/tasks/batch_handlers/empty_handler.rs b/meilisearch-lib/src/tasks/handlers/empty_handler.rs similarity index 100% rename from meilisearch-lib/src/tasks/batch_handlers/empty_handler.rs rename to meilisearch-lib/src/tasks/handlers/empty_handler.rs diff --git a/meilisearch-lib/src/tasks/batch_handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs similarity index 100% rename from meilisearch-lib/src/tasks/batch_handlers/index_resolver_handler.rs rename to meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs diff --git a/meilisearch-lib/src/tasks/batch_handlers/mod.rs b/meilisearch-lib/src/tasks/handlers/mod.rs similarity index 100% rename from meilisearch-lib/src/tasks/batch_handlers/mod.rs rename to meilisearch-lib/src/tasks/handlers/mod.rs diff --git a/meilisearch-lib/src/tasks/batch_handlers/snapshot_handler.rs b/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs similarity index 100% rename from meilisearch-lib/src/tasks/batch_handlers/snapshot_handler.rs rename to meilisearch-lib/src/tasks/handlers/snapshot_handler.rs diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs index 4c51ec207..a8cb74aed 100644 --- a/meilisearch-lib/src/tasks/mod.rs +++ b/meilisearch-lib/src/tasks/mod.rs @@ -1,7 +1,7 @@ use async_trait::async_trait; -pub use batch_handlers::empty_handler::EmptyBatchHandler; -pub use batch_handlers::snapshot_handler::SnapshotHandler; +pub use handlers::empty_handler::EmptyBatchHandler; +pub use handlers::snapshot_handler::SnapshotHandler; pub use scheduler::Scheduler; pub use task_store::TaskFilter; @@ -14,8 +14,8 @@ use batch::Batch; use error::Result; pub mod batch; -mod batch_handlers; pub mod error; +mod handlers; mod scheduler; pub mod task; mod task_store; From 8349f38197e1c8e97ef4a11db9056119826e026e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 09:25:42 +0200 Subject: [PATCH 19/33] remove unused file --- meilisearch-lib/src/index_resolver/message.rs | 37 ------------------- 1 file changed, 37 deletions(-) delete mode 100644 meilisearch-lib/src/index_resolver/message.rs diff --git a/meilisearch-lib/src/index_resolver/message.rs b/meilisearch-lib/src/index_resolver/message.rs deleted file mode 100644 index 25a0d64a9..000000000 --- a/meilisearch-lib/src/index_resolver/message.rs +++ /dev/null @@ -1,37 +0,0 @@ -use std::{collections::HashSet, path::PathBuf}; - -use tokio::sync::oneshot; -use uuid::Uuid; - -use crate::index::Index; -use super::error::Result; - -pub enum IndexResolverMsg { - Get { - uid: String, - ret: oneshot::Sender>, - }, - Delete { - uid: String, - ret: oneshot::Sender>, - }, - List { - ret: oneshot::Sender>>, - }, - Insert { - uuid: Uuid, - name: String, - ret: oneshot::Sender>, - }, - SnapshotRequest { - path: PathBuf, - ret: oneshot::Sender>>, - }, - GetSize { - ret: oneshot::Sender>, - }, - DumpRequest { - path: PathBuf, - ret: oneshot::Sender>>, - }, -} From 3c85b2986542e9e9dcb9a8ee2bd114dbb16dd0da Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 09:26:11 +0200 Subject: [PATCH 20/33] add doc to BatchHandler --- meilisearch-lib/src/tasks/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs index a8cb74aed..d8bc25bb7 100644 --- a/meilisearch-lib/src/tasks/mod.rs +++ b/meilisearch-lib/src/tasks/mod.rs @@ -28,6 +28,9 @@ pub trait BatchHandler: Sync + Send + 'static { fn accept(&self, batch: &Batch) -> bool; /// Processes the `Task` batch returning the batch with the `Task` updated. + /// + /// It is ok for this function to panic if a batch is handed that hasn't been verified by + /// `accept` beforehand. async fn process_batch(&self, batch: Batch) -> Batch; /// `finish` is called when the result of `process` has been commited to the task store. This From 92d86ce6aa7c49547b402ef215804e2d13106227 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 09:26:35 +0200 Subject: [PATCH 21/33] add tests to IndexResolver BatchHandler --- .../tasks/handlers/index_resolver_handler.rs | 98 +++++++++++++++++++ meilisearch-lib/src/tasks/handlers/mod.rs | 30 ++++++ meilisearch-lib/src/update_file_store.rs | 4 +- 3 files changed, 130 insertions(+), 2 deletions(-) diff --git a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs index 41a78a22b..38c079baa 100644 --- a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs @@ -56,3 +56,101 @@ where } } } + +#[cfg(test)] +mod test { + use crate::index_resolver::{index_store::MockIndexStore, meta_store::MockIndexMetaStore}; + use crate::tasks::{ + handlers::test::task_to_batch, + task::{Task, TaskContent}, + }; + use crate::update_file_store::{Result as FileStoreResult, UpdateFileStore}; + + use super::*; + use milli::update::IndexDocumentsMethod; + use nelson::Mocker; + use proptest::prelude::*; + use uuid::Uuid; + + proptest! { + #[test] + fn test_accept_task( + task in any::(), + ) { + let batch = task_to_batch(task); + + let index_store = MockIndexStore::new(); + let meta_store = MockIndexMetaStore::new(); + let mocker = Mocker::default(); + let update_file_store = UpdateFileStore::mock(mocker); + let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); + + match batch.content { + BatchContent::DocumentAddtitionBatch(_) + | BatchContent::IndexUpdate(_) => assert!(index_resolver.accept(&batch)), + BatchContent::Dump(_) + | BatchContent::Snapshot(_) + | BatchContent::Empty => assert!(!index_resolver.accept(&batch)), + } + } + } + + #[actix_rt::test] + async fn finisher_called_on_document_update() { + let index_store = MockIndexStore::new(); + let meta_store = MockIndexMetaStore::new(); + let mocker = Mocker::default(); + let content_uuid = Uuid::new_v4(); + mocker + .when::>("delete") + .once() + .then(move |uuid| { + assert_eq!(uuid, content_uuid); + Ok(()) + }); + let update_file_store = UpdateFileStore::mock(mocker); + let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); + + let task = Task { + id: 1, + index_uid: None, + content: TaskContent::DocumentAddition { + content_uuid, + merge_strategy: IndexDocumentsMethod::ReplaceDocuments, + primary_key: None, + documents_count: 100, + allow_index_creation: true, + }, + events: Vec::new(), + }; + + let batch = task_to_batch(task); + + index_resolver.finish(&batch).await; + } + + #[actix_rt::test] + #[should_panic] + async fn panic_when_passed_unsupported_batch() { + let index_store = MockIndexStore::new(); + let meta_store = MockIndexMetaStore::new(); + let mocker = Mocker::default(); + let update_file_store = UpdateFileStore::mock(mocker); + let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); + + let task = Task { + id: 1, + index_uid: None, + content: TaskContent::Dump { + uid: String::from("hello"), + }, + events: Vec::new(), + }; + + let batch = task_to_batch(task); + + index_resolver.process_batch(batch).await; + } + + // TODO: test perform_batch. We need a Mocker for IndexResolver. +} diff --git a/meilisearch-lib/src/tasks/handlers/mod.rs b/meilisearch-lib/src/tasks/handlers/mod.rs index 9199e872d..f5fe8eaf2 100644 --- a/meilisearch-lib/src/tasks/handlers/mod.rs +++ b/meilisearch-lib/src/tasks/handlers/mod.rs @@ -2,3 +2,33 @@ pub mod dump_handler; pub mod empty_handler; mod index_resolver_handler; pub mod snapshot_handler; + +#[cfg(test)] +mod test { + use time::OffsetDateTime; + + use crate::tasks::{ + batch::{Batch, BatchContent}, + task::{Task, TaskContent}, + }; + + pub fn task_to_batch(task: Task) -> Batch { + let content = match task.content { + TaskContent::DocumentAddition { .. } => { + BatchContent::DocumentAddtitionBatch(vec![task]) + } + TaskContent::DocumentDeletion(_) + | TaskContent::SettingsUpdate { .. } + | TaskContent::IndexDeletion + | TaskContent::IndexCreation { .. } + | TaskContent::IndexUpdate { .. } => BatchContent::IndexUpdate(task), + TaskContent::Dump { .. } => BatchContent::Dump(task), + }; + + Batch { + id: Some(1), + created_at: OffsetDateTime::now_utc(), + content, + } + } +} diff --git a/meilisearch-lib/src/update_file_store.rs b/meilisearch-lib/src/update_file_store.rs index ec355a56e..3a60dfe26 100644 --- a/meilisearch-lib/src/update_file_store.rs +++ b/meilisearch-lib/src/update_file_store.rs @@ -26,7 +26,7 @@ pub struct UpdateFile { #[error("Error while persisting update to disk: {0}")] pub struct UpdateFileStoreError(Box); -type Result = std::result::Result; +pub type Result = std::result::Result; macro_rules! into_update_store_error { ($($other:path),*) => { @@ -249,7 +249,7 @@ mod test { pub async fn delete(&self, uuid: Uuid) -> Result<()> { match self { MockUpdateFileStore::Real(s) => s.delete(uuid).await, - MockUpdateFileStore::Mock(_) => todo!(), + MockUpdateFileStore::Mock(mocker) => unsafe { mocker.get("delete").call(uuid) }, } } } From 986a99296d8b41d98ca8a7ec015fcc53c8bcb39d Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 11:24:35 +0200 Subject: [PATCH 22/33] remove useless dump test --- meilisearch-http/tests/dumps/mod.rs | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/meilisearch-http/tests/dumps/mod.rs b/meilisearch-http/tests/dumps/mod.rs index 8395ec3aa..22625f17f 100644 --- a/meilisearch-http/tests/dumps/mod.rs +++ b/meilisearch-http/tests/dumps/mod.rs @@ -6,23 +6,6 @@ use serde_json::json; use self::data::GetDump; -#[actix_rt::test] -async fn get_unexisting_dump_status() { - let server = Server::new().await; - - let (response, code) = server.get_dump_status("foobar").await; - assert_eq!(code, 404); - - let expected_response = json!({ - "message": "Dump `foobar` not found.", - "code": "dump_not_found", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#dump_not_found" - }); - - assert_eq!(response, expected_response); -} - // all the following test are ignored on windows. See #2364 #[actix_rt::test] #[cfg_attr(target_os = "windows", ignore)] From 127171c8120b2b11c8e372fae140cff3c939b75a Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 14:10:17 +0200 Subject: [PATCH 23/33] impl Default on Processing --- meilisearch-lib/src/tasks/scheduler.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 177cc0229..8510ba771 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -411,16 +411,21 @@ impl Scheduler { } } -#[derive(Debug, Default, PartialEq)] +#[derive(Debug, PartialEq)] pub enum Processing { DocumentAdditions(Vec), IndexUpdate(TaskId), Dump(TaskId), /// Variant used when there is nothing to process. - #[default] Nothing, } +impl Default for Processing { + fn default() -> Self { + Self::Nothing + } +} + enum ProcessingIter<'a> { Many(slice::Iter<'a, TaskId>), Single(Option), From 49d8fadb520c9edff37a06c3d67bf86589c1b9ec Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 11:14:25 +0200 Subject: [PATCH 24/33] test dump handler --- meilisearch-lib/src/dump/handler.rs | 217 +++++++++++++----- meilisearch-lib/src/index_controller/mod.rs | 14 +- .../src/tasks/handlers/dump_handler.rs | 93 ++++++++ 3 files changed, 254 insertions(+), 70 deletions(-) diff --git a/meilisearch-lib/src/dump/handler.rs b/meilisearch-lib/src/dump/handler.rs index b168e162a..4adb7011a 100644 --- a/meilisearch-lib/src/dump/handler.rs +++ b/meilisearch-lib/src/dump/handler.rs @@ -1,18 +1,10 @@ -use std::{fs::File, path::PathBuf, sync::Arc}; +#[cfg(not(test))] +pub use real::DumpHandler; + +#[cfg(test)] +pub use test::MockDumpHandler as DumpHandler; -use log::{info, trace}; -use meilisearch_auth::AuthController; -use milli::heed::Env; use time::{macros::format_description, OffsetDateTime}; -use tokio::fs::create_dir_all; - -use crate::analytics; -use crate::compression::to_tar_gz; -use crate::dump::error::{DumpError, Result}; -use crate::dump::{MetadataVersion, META_FILE_NAME}; -use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore, IndexResolver}; -use crate::tasks::TaskStore; -use crate::update_file_store::UpdateFileStore; /// Generate uid from creation date pub fn generate_uid() -> String { @@ -23,67 +15,166 @@ pub fn generate_uid() -> String { .unwrap() } -pub struct DumpHandler { - pub dump_path: PathBuf, - pub db_path: PathBuf, - pub update_file_store: UpdateFileStore, - pub task_store_size: usize, - pub index_db_size: usize, - pub env: Arc, - pub index_resolver: Arc>, -} +mod real { + use std::{fs::File, path::PathBuf, sync::Arc}; -impl DumpHandler -where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, -{ - pub async fn run(&self, uid: String) -> Result<()> { - trace!("Performing dump."); + use log::{info, trace}; + use meilisearch_auth::AuthController; + use milli::heed::Env; + use tokio::fs::create_dir_all; - create_dir_all(&self.dump_path).await?; + use crate::analytics; + use crate::compression::to_tar_gz; + use crate::dump::error::{DumpError, Result}; + use crate::dump::{MetadataVersion, META_FILE_NAME}; + use crate::index_resolver::{ + index_store::IndexStore, meta_store::IndexMetaStore, IndexResolver, + }; + use crate::tasks::TaskStore; + use crate::update_file_store::UpdateFileStore; - let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); + pub struct DumpHandler { + dump_path: PathBuf, + db_path: PathBuf, + update_file_store: UpdateFileStore, + task_store_size: usize, + index_db_size: usize, + env: Arc, + index_resolver: Arc>, + } - let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size); - let meta_path = temp_dump_path.join(META_FILE_NAME); - // TODO: blocking - let mut meta_file = File::create(&meta_path)?; - serde_json::to_writer(&mut meta_file, &meta)?; - analytics::copy_user_id(&self.db_path, &temp_dump_path); + impl DumpHandler + where + U: IndexMetaStore + Sync + Send + 'static, + I: IndexStore + Sync + Send + 'static, + { + pub fn new( + dump_path: PathBuf, + db_path: PathBuf, + update_file_store: UpdateFileStore, + task_store_size: usize, + index_db_size: usize, + env: Arc, + index_resolver: Arc>, + ) -> Self { + Self { + dump_path, + db_path, + update_file_store, + task_store_size, + index_db_size, + env, + index_resolver, + } + } - create_dir_all(&temp_dump_path.join("indexes")).await?; + pub async fn run(&self, uid: String) -> Result<()> { + trace!("Performing dump."); - // TODO: this is blocking!! - AuthController::dump(&self.db_path, &temp_dump_path)?; - TaskStore::dump( - self.env.clone(), - &temp_dump_path, - self.update_file_store.clone(), - ) - .await?; - self.index_resolver.dump(&temp_dump_path).await?; + create_dir_all(&self.dump_path).await?; - let dump_path = self.dump_path.clone(); - let dump_path = tokio::task::spawn_blocking(move || -> Result { - // for now we simply copy the updates/updates_files - // FIXME: We may copy more files than necessary, if new files are added while we are - // performing the dump. We need a way to filter them out. + let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); - let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; - to_tar_gz(temp_dump_path, temp_dump_file.path()) - .map_err(|e| DumpError::Internal(e.into()))?; + let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size); + let meta_path = temp_dump_path.join(META_FILE_NAME); + // TODO: blocking + let mut meta_file = File::create(&meta_path)?; + serde_json::to_writer(&mut meta_file, &meta)?; + analytics::copy_user_id(&self.db_path, &temp_dump_path); - let dump_path = dump_path.join(uid).with_extension("dump"); - temp_dump_file.persist(&dump_path)?; + create_dir_all(&temp_dump_path.join("indexes")).await?; - Ok(dump_path) - }) - .await??; + // TODO: this is blocking!! + AuthController::dump(&self.db_path, &temp_dump_path)?; + TaskStore::dump( + self.env.clone(), + &temp_dump_path, + self.update_file_store.clone(), + ) + .await?; + self.index_resolver.dump(&temp_dump_path).await?; - info!("Created dump in {:?}.", dump_path); + let dump_path = self.dump_path.clone(); + let dump_path = tokio::task::spawn_blocking(move || -> Result { + // for now we simply copy the updates/updates_files + // FIXME: We may copy more files than necessary, if new files are added while we are + // performing the dump. We need a way to filter them out. - Ok(()) + let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; + to_tar_gz(temp_dump_path, temp_dump_file.path()) + .map_err(|e| DumpError::Internal(e.into()))?; + + let dump_path = dump_path.join(uid).with_extension("dump"); + temp_dump_file.persist(&dump_path)?; + + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(()) + } + } +} + +#[cfg(test)] +mod test { + use std::marker::PhantomData; + use std::path::PathBuf; + use std::sync::Arc; + + use milli::heed::Env; + use nelson::Mocker; + + use crate::dump::error::Result; + use crate::index_resolver::IndexResolver; + use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore}; + use crate::update_file_store::UpdateFileStore; + + use super::*; + + pub enum MockDumpHandler { + Real(super::real::DumpHandler), + Mock(Mocker, PhantomData<(U, I)>), + } + + impl MockDumpHandler { + pub fn mock(mocker: Mocker) -> Self { + Self::Mock(mocker, PhantomData) + } + } + + impl MockDumpHandler + where + U: IndexMetaStore + Sync + Send + 'static, + I: IndexStore + Sync + Send + 'static, + { + pub fn new( + dump_path: PathBuf, + db_path: PathBuf, + update_file_store: UpdateFileStore, + task_store_size: usize, + index_db_size: usize, + env: Arc, + index_resolver: Arc>, + ) -> Self { + Self::Real(super::real::DumpHandler::new( + dump_path, + db_path, + update_file_store, + task_store_size, + index_db_size, + env, + index_resolver, + )) + } + pub async fn run(&self, uid: String) -> Result<()> { + match self { + DumpHandler::Real(real) => real.run(uid).await, + DumpHandler::Mock(mocker, _) => unsafe { mocker.get("run").call(uid) }, + } + } } } diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index b34523fd5..30a6b6dc8 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -222,15 +222,15 @@ impl IndexControllerBuilder { .dump_dst .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; - let dump_handler = Arc::new(DumpHandler { + let dump_handler = Arc::new(DumpHandler::new( dump_path, - db_path: db_path.as_ref().into(), - update_file_store: update_file_store.clone(), + db_path.as_ref().into(), + update_file_store.clone(), task_store_size, - index_db_size: index_size, - env: meta_env.clone(), - index_resolver: index_resolver.clone(), - }); + index_size, + meta_env.clone(), + index_resolver.clone(), + )); let task_store = TaskStore::new(meta_env)?; // register all the batch handlers for use with the scheduler. diff --git a/meilisearch-lib/src/tasks/handlers/dump_handler.rs b/meilisearch-lib/src/tasks/handlers/dump_handler.rs index fc506522f..e826242f4 100644 --- a/meilisearch-lib/src/tasks/handlers/dump_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/dump_handler.rs @@ -39,3 +39,96 @@ where () } } + +#[cfg(test)] +mod test { + use crate::dump::error::{DumpError, Result as DumpResult}; + use crate::index_resolver::{index_store::MockIndexStore, meta_store::MockIndexMetaStore}; + use crate::tasks::handlers::test::task_to_batch; + + use super::*; + + use nelson::Mocker; + use proptest::prelude::*; + + proptest! { + #[test] + fn finish_does_nothing( + task in any::(), + ) { + let rt = tokio::runtime::Runtime::new().unwrap(); + let handle = rt.spawn(async { + let batch = task_to_batch(task); + + let mocker = Mocker::default(); + let dump_handler = DumpHandler::::mock(mocker); + + dump_handler.finish(&batch).await; + }); + + rt.block_on(handle).unwrap(); + } + + #[test] + fn test_handle_dump_success( + task in any::(), + ) { + let rt = tokio::runtime::Runtime::new().unwrap(); + let handle = rt.spawn(async { + let batch = task_to_batch(task); + let should_accept = matches!(batch.content, BatchContent::Dump { .. }); + + let mocker = Mocker::default(); + if should_accept { + mocker.when::>("run") + .once() + .then(|_| Ok(())); + } + + let dump_handler = DumpHandler::::mock(mocker); + + let accept = dump_handler.accept(&batch); + assert_eq!(accept, should_accept); + + if accept { + let batch = dump_handler.process_batch(batch).await; + let last_event = batch.content.first().unwrap().events.last().unwrap(); + assert!(matches!(last_event, TaskEvent::Succeded { .. })); + } + }); + + rt.block_on(handle).unwrap(); + } + + #[test] + fn test_handle_dump_error( + task in any::(), + ) { + let rt = tokio::runtime::Runtime::new().unwrap(); + let handle = rt.spawn(async { + let batch = task_to_batch(task); + let should_accept = matches!(batch.content, BatchContent::Dump { .. }); + + let mocker = Mocker::default(); + if should_accept { + mocker.when::>("run") + .once() + .then(|_| Err(DumpError::Internal("error".into()))); + } + + let dump_handler = DumpHandler::::mock(mocker); + + let accept = dump_handler.accept(&batch); + assert_eq!(accept, should_accept); + + if accept { + let batch = dump_handler.process_batch(batch).await; + let last_event = batch.content.first().unwrap().events.last().unwrap(); + assert!(matches!(last_event, TaskEvent::Failed { .. })); + } + }); + + rt.block_on(handle).unwrap(); + } + } +} From 3015265bde07a0b7428611fb429139fb540cffd2 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 14:37:10 +0200 Subject: [PATCH 25/33] remove useless dump errors --- meilisearch-lib/src/dump/error.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/meilisearch-lib/src/dump/error.rs b/meilisearch-lib/src/dump/error.rs index 7931a8d75..da9010347 100644 --- a/meilisearch-lib/src/dump/error.rs +++ b/meilisearch-lib/src/dump/error.rs @@ -7,10 +7,6 @@ pub type Result = std::result::Result; #[derive(thiserror::Error, Debug)] pub enum DumpError { - #[error("A dump is already processing. You must wait until the current process is finished before requesting another dump.")] - DumpAlreadyRunning, - #[error("Dump `{0}` not found.")] - DumpDoesNotExist(String), #[error("An internal error has occurred. `{0}`.")] Internal(Box), #[error("{0}")] @@ -32,8 +28,6 @@ internal_error!( impl ErrorCode for DumpError { fn error_code(&self) -> Code { match self { - DumpError::DumpAlreadyRunning => Code::DumpAlreadyInProgress, - DumpError::DumpDoesNotExist(_) => Code::DumpNotFound, DumpError::Internal(_) => Code::Internal, DumpError::IndexResolver(e) => e.error_code(), } From 6b2016b350d6ea7e8ffd79cb6f278578b5a9c7d7 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 14:39:07 +0200 Subject: [PATCH 26/33] remove typo in BatchContent variant --- meilisearch-lib/src/tasks/batch.rs | 8 ++++---- .../src/tasks/handlers/index_resolver_handler.rs | 8 ++++---- meilisearch-lib/src/tasks/handlers/mod.rs | 2 +- meilisearch-lib/src/tasks/scheduler.rs | 4 ++-- meilisearch-lib/src/tasks/task_store/mod.rs | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/meilisearch-lib/src/tasks/batch.rs b/meilisearch-lib/src/tasks/batch.rs index 88c73e3de..d5116f750 100644 --- a/meilisearch-lib/src/tasks/batch.rs +++ b/meilisearch-lib/src/tasks/batch.rs @@ -8,7 +8,7 @@ pub type BatchId = u64; #[derive(Debug)] pub enum BatchContent { - DocumentAddtitionBatch(Vec), + DocumentsAdditionBatch(Vec), IndexUpdate(Task), Dump(Task), Snapshot(SnapshotJob), @@ -19,7 +19,7 @@ pub enum BatchContent { impl BatchContent { pub fn first(&self) -> Option<&Task> { match self { - BatchContent::DocumentAddtitionBatch(ts) => ts.first(), + BatchContent::DocumentsAdditionBatch(ts) => ts.first(), BatchContent::Dump(t) | BatchContent::IndexUpdate(t) => Some(t), BatchContent::Snapshot(_) | BatchContent::Empty => None, } @@ -27,7 +27,7 @@ impl BatchContent { pub fn push_event(&mut self, event: TaskEvent) { match self { - BatchContent::DocumentAddtitionBatch(ts) => { + BatchContent::DocumentsAdditionBatch(ts) => { ts.iter_mut().for_each(|t| t.events.push(event.clone())) } BatchContent::IndexUpdate(t) | BatchContent::Dump(t) => t.events.push(event), @@ -55,7 +55,7 @@ impl Batch { } pub fn len(&self) -> usize { match self.content { - BatchContent::DocumentAddtitionBatch(ref ts) => ts.len(), + BatchContent::DocumentsAdditionBatch(ref ts) => ts.len(), BatchContent::IndexUpdate(_) | BatchContent::Dump(_) | BatchContent::Snapshot(_) => 1, BatchContent::Empty => 0, } diff --git a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs index 38c079baa..a744dcaad 100644 --- a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs @@ -14,14 +14,14 @@ where { fn accept(&self, batch: &Batch) -> bool { match batch.content { - BatchContent::DocumentAddtitionBatch(_) | BatchContent::IndexUpdate(_) => true, + BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_) => true, _ => false, } } async fn process_batch(&self, mut batch: Batch) -> Batch { match batch.content { - BatchContent::DocumentAddtitionBatch(ref mut tasks) => { + BatchContent::DocumentsAdditionBatch(ref mut tasks) => { *tasks = self .process_document_addition_batch(std::mem::take(tasks)) .await; @@ -45,7 +45,7 @@ where } async fn finish(&self, batch: &Batch) { - if let BatchContent::DocumentAddtitionBatch(ref tasks) = batch.content { + if let BatchContent::DocumentsAdditionBatch(ref tasks) = batch.content { for task in tasks { if let Some(content_uuid) = task.get_content_uuid() { if let Err(e) = self.file_store.delete(content_uuid).await { @@ -86,7 +86,7 @@ mod test { let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); match batch.content { - BatchContent::DocumentAddtitionBatch(_) + BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_) => assert!(index_resolver.accept(&batch)), BatchContent::Dump(_) | BatchContent::Snapshot(_) diff --git a/meilisearch-lib/src/tasks/handlers/mod.rs b/meilisearch-lib/src/tasks/handlers/mod.rs index f5fe8eaf2..6e28636ed 100644 --- a/meilisearch-lib/src/tasks/handlers/mod.rs +++ b/meilisearch-lib/src/tasks/handlers/mod.rs @@ -15,7 +15,7 @@ mod test { pub fn task_to_batch(task: Task) -> Batch { let content = match task.content { TaskContent::DocumentAddition { .. } => { - BatchContent::DocumentAddtitionBatch(vec![task]) + BatchContent::DocumentsAdditionBatch(vec![task]) } TaskContent::DocumentDeletion(_) | TaskContent::SettingsUpdate { .. } diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 8510ba771..cf3b14cd4 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -296,9 +296,9 @@ impl Scheduler { pub async fn update_tasks(&self, content: BatchContent) -> Result { match content { - BatchContent::DocumentAddtitionBatch(tasks) => { + BatchContent::DocumentsAdditionBatch(tasks) => { let tasks = self.store.update_tasks(tasks).await?; - Ok(BatchContent::DocumentAddtitionBatch(tasks)) + Ok(BatchContent::DocumentsAdditionBatch(tasks)) } BatchContent::IndexUpdate(t) => { let mut tasks = self.store.update_tasks(vec![t]).await?; diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index 610a5bdeb..a5227fe73 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -147,7 +147,7 @@ impl TaskStore { .ok_or(TaskError::UnexistingTask(*id))?; tasks.push(task); } - BatchContent::DocumentAddtitionBatch(tasks) + BatchContent::DocumentsAdditionBatch(tasks) } Processing::IndexUpdate(id) => { let task = store.get(&txn, id)?.ok_or(TaskError::UnexistingTask(id))?; From f58507379aaab938fa52e5a7d1c17ec31ce92342 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 14:50:14 +0200 Subject: [PATCH 27/33] fix dump priority in scheduler --- meilisearch-lib/src/tasks/scheduler.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index cf3b14cd4..8d44a5859 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -110,8 +110,8 @@ impl Ord for TaskList { (Some(lhs), Some(rhs)) => lhs.cmp(rhs), } } - (TaskListIdentifier::Index(_), TaskListIdentifier::Dump) => Ordering::Greater, - (TaskListIdentifier::Dump, TaskListIdentifier::Index(_)) => Ordering::Less, + (TaskListIdentifier::Index(_), TaskListIdentifier::Dump) => Ordering::Less, + (TaskListIdentifier::Dump, TaskListIdentifier::Index(_)) => Ordering::Greater, (TaskListIdentifier::Dump, TaskListIdentifier::Dump) => { unreachable!("There should be only one Dump task list") } From 74a1f88d8883ed9a78f29f74d18b7915f810b1f2 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 14:57:18 +0200 Subject: [PATCH 28/33] add test for dump processing order --- meilisearch-lib/src/tasks/scheduler.rs | 46 ++++++++++++++++---------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/meilisearch-lib/src/tasks/scheduler.rs b/meilisearch-lib/src/tasks/scheduler.rs index 8d44a5859..19265a911 100644 --- a/meilisearch-lib/src/tasks/scheduler.rs +++ b/meilisearch-lib/src/tasks/scheduler.rs @@ -536,10 +536,10 @@ mod test { use super::*; - fn gen_task(id: TaskId, index_uid: &str, content: TaskContent) -> Task { + fn gen_task(id: TaskId, index_uid: Option<&str>, content: TaskContent) -> Task { Task { id, - index_uid: Some(IndexUid::new_unchecked(index_uid)), + index_uid: index_uid.map(IndexUid::new_unchecked), content, events: vec![], } @@ -548,13 +548,13 @@ mod test { #[test] fn register_updates_multiples_indexes() { let mut queue = TaskQueue::default(); - queue.insert(gen_task(0, "test1", TaskContent::IndexDeletion)); - queue.insert(gen_task(1, "test2", TaskContent::IndexDeletion)); - queue.insert(gen_task(2, "test2", TaskContent::IndexDeletion)); - queue.insert(gen_task(3, "test2", TaskContent::IndexDeletion)); - queue.insert(gen_task(4, "test1", TaskContent::IndexDeletion)); - queue.insert(gen_task(5, "test1", TaskContent::IndexDeletion)); - queue.insert(gen_task(6, "test2", TaskContent::IndexDeletion)); + queue.insert(gen_task(0, Some("test1"), TaskContent::IndexDeletion)); + queue.insert(gen_task(1, Some("test2"), TaskContent::IndexDeletion)); + queue.insert(gen_task(2, Some("test2"), TaskContent::IndexDeletion)); + queue.insert(gen_task(3, Some("test2"), TaskContent::IndexDeletion)); + queue.insert(gen_task(4, Some("test1"), TaskContent::IndexDeletion)); + queue.insert(gen_task(5, Some("test1"), TaskContent::IndexDeletion)); + queue.insert(gen_task(6, Some("test2"), TaskContent::IndexDeletion)); let test1_tasks = queue .head_mut(|tasks| tasks.drain().map(|t| t.id).collect::>()) @@ -582,16 +582,28 @@ mod test { documents_count: 0, allow_index_creation: true, }; - queue.insert(gen_task(0, "test1", content.clone())); - queue.insert(gen_task(1, "test2", content.clone())); - queue.insert(gen_task(2, "test2", TaskContent::IndexDeletion)); - queue.insert(gen_task(3, "test2", content.clone())); - queue.insert(gen_task(4, "test1", content.clone())); - queue.insert(gen_task(5, "test1", TaskContent::IndexDeletion)); - queue.insert(gen_task(6, "test2", content.clone())); - queue.insert(gen_task(7, "test1", content)); + queue.insert(gen_task(0, Some("test1"), content.clone())); + queue.insert(gen_task(1, Some("test2"), content.clone())); + queue.insert(gen_task(2, Some("test2"), TaskContent::IndexDeletion)); + queue.insert(gen_task(3, Some("test2"), content.clone())); + queue.insert(gen_task(4, Some("test1"), content.clone())); + queue.insert(gen_task(5, Some("test1"), TaskContent::IndexDeletion)); + queue.insert(gen_task(6, Some("test2"), content.clone())); + queue.insert(gen_task(7, Some("test1"), content)); + queue.insert(gen_task( + 8, + None, + TaskContent::Dump { + uid: "adump".to_owned(), + }, + )); let config = SchedulerConfig::default(); + + // Make sure that the dump is processed before everybody else. + let batch = make_batch(&mut queue, &config); + assert_eq!(batch, Processing::Dump(8)); + let batch = make_batch(&mut queue, &config); assert_eq!(batch, Processing::DocumentAdditions(vec![0, 4])); From 1647ca3c1f929a1ceb67e5c9ec6fb24a5c96bcb1 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 25 May 2022 15:07:52 +0200 Subject: [PATCH 29/33] fix clipy warnings --- meilisearch-lib/src/tasks/handlers/dump_handler.rs | 4 +--- meilisearch-lib/src/tasks/handlers/empty_handler.rs | 4 +--- .../src/tasks/handlers/index_resolver_handler.rs | 10 +++++----- meilisearch-lib/src/tasks/handlers/snapshot_handler.rs | 9 ++------- meilisearch-lib/src/tasks/task_store/store.rs | 2 +- 5 files changed, 10 insertions(+), 19 deletions(-) diff --git a/meilisearch-lib/src/tasks/handlers/dump_handler.rs b/meilisearch-lib/src/tasks/handlers/dump_handler.rs index e826242f4..715beafee 100644 --- a/meilisearch-lib/src/tasks/handlers/dump_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/dump_handler.rs @@ -35,9 +35,7 @@ where } } - async fn finish(&self, _: &Batch) { - () - } + async fn finish(&self, _: &Batch) {} } #[cfg(test)] diff --git a/meilisearch-lib/src/tasks/handlers/empty_handler.rs b/meilisearch-lib/src/tasks/handlers/empty_handler.rs index 5d6aa2275..d800e1965 100644 --- a/meilisearch-lib/src/tasks/handlers/empty_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/empty_handler.rs @@ -14,7 +14,5 @@ impl BatchHandler for EmptyBatchHandler { batch } - async fn finish(&self, _: &Batch) { - () - } + async fn finish(&self, _: &Batch) {} } diff --git a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs index a744dcaad..a34082afe 100644 --- a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs @@ -13,10 +13,10 @@ where I: IndexStore + Send + Sync + 'static, { fn accept(&self, batch: &Batch) -> bool { - match batch.content { - BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_) => true, - _ => false, - } + matches!( + batch.content, + BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_) + ) } async fn process_batch(&self, mut batch: Batch) -> Batch { @@ -26,7 +26,7 @@ where .process_document_addition_batch(std::mem::take(tasks)) .await; } - BatchContent::IndexUpdate(ref mut task) => match self.process_task(&task).await { + BatchContent::IndexUpdate(ref mut task) => match self.process_task(task).await { Ok(success) => { task.events.push(TaskEvent::Succeded { result: success, diff --git a/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs b/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs index 2948fb4ff..32fe6d746 100644 --- a/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs @@ -6,10 +6,7 @@ pub struct SnapshotHandler; #[async_trait::async_trait] impl BatchHandler for SnapshotHandler { fn accept(&self, batch: &Batch) -> bool { - match batch.content { - BatchContent::Snapshot(_) => true, - _ => false, - } + matches!(batch.content, BatchContent::Snapshot(_)) } async fn process_batch(&self, batch: Batch) -> Batch { @@ -25,7 +22,5 @@ impl BatchHandler for SnapshotHandler { Batch::empty() } - async fn finish(&self, _: &Batch) { - () - } + async fn finish(&self, _: &Batch) {} } diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs index 902f80560..75ece0ae8 100644 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ b/meilisearch-lib/src/tasks/task_store/store.rs @@ -110,7 +110,7 @@ impl Store { self.tasks.put(txn, &BEU64::new(task.id), task)?; // only add the task to the indexes index if it has an index_uid if let Some(ref index_uid) = task.index_uid { - self.uids_task_ids.put(txn, &(&index_uid, task.id), &())?; + self.uids_task_ids.put(txn, &(index_uid, task.id), &())?; } Ok(()) From 5a2972fc1929aaa9a2a1a11b91b4b903f0135d43 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 26 May 2022 11:50:04 +0200 Subject: [PATCH 30/33] use TaskEvent method instead of variants in BatchHandler impl --- .../src/tasks/handlers/index_resolver_handler.rs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs index a34082afe..e0471567b 100644 --- a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs +++ b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs @@ -1,5 +1,3 @@ -use time::OffsetDateTime; - use crate::index_resolver::IndexResolver; use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore}; use crate::tasks::batch::{Batch, BatchContent}; @@ -27,16 +25,8 @@ where .await; } BatchContent::IndexUpdate(ref mut task) => match self.process_task(task).await { - Ok(success) => { - task.events.push(TaskEvent::Succeded { - result: success, - timestamp: OffsetDateTime::now_utc(), - }); - } - Err(err) => task.events.push(TaskEvent::Failed { - error: err.into(), - timestamp: OffsetDateTime::now_utc(), - }), + Ok(success) => task.events.push(TaskEvent::succeeded(success)), + Err(err) => task.events.push(TaskEvent::failed(err.into())), }, _ => unreachable!(), } From a9ef399a6b5ac619c8ab7ef8a3475530f8b60fcd Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 26 May 2022 12:04:27 +0200 Subject: [PATCH 31/33] processing::Nothing return BatchContent::Empty instead of panic --- meilisearch-lib/src/tasks/task_store/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index a5227fe73..deb3f8191 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -158,7 +158,7 @@ impl TaskStore { debug_assert!(matches!(task.content, TaskContent::Dump { .. })); BatchContent::Dump(task) } - Processing::Nothing => unreachable!(), + Processing::Nothing => BatchContent::Empty, }; Ok((processing, content)) From 4cb2c6ef1e16894054c111b5a1aaac93cfca5d3a Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 30 May 2022 12:30:15 +0200 Subject: [PATCH 32/33] use map_or instead of map + unwrap_or --- meilisearch-lib/src/tasks/task_store/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index deb3f8191..3645717e6 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -36,8 +36,7 @@ impl TaskFilter { Some(ref index_uid) => self .indexes .as_ref() - .map(|indexes| indexes.contains(index_uid.as_str())) - .unwrap_or(true), + .map_or(true, |indexes| indexes.contains(index_uid.as_str())), None => false, } } From 1e310ecc7d1600c171187c6f23e176825331185b Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 30 May 2022 14:34:49 +0200 Subject: [PATCH 33/33] fix typo in docstring Co-authored-by: Tamo --- meilisearch-lib/src/tasks/task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs index 0e0aa8af2..0499d9702 100644 --- a/meilisearch-lib/src/tasks/task.rs +++ b/meilisearch-lib/src/tasks/task.rs @@ -86,7 +86,7 @@ impl TaskEvent { #[cfg_attr(test, derive(proptest_derive::Arbitrary))] pub struct Task { pub id: TaskId, - /// The name of the index the task is targeting. If it isn't targeting any idex (i.e Dump task) + /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) /// then this is None // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of // the TaskContent.