mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
handle dump
This commit is contained in:
parent
56eb2907c9
commit
57fde30b91
@ -82,6 +82,8 @@ enum TaskDetails {
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
ClearAll { deleted_documents: Option<u64> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
Dump { dump_uid: String },
|
||||
}
|
||||
|
||||
/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for
|
||||
@ -218,7 +220,9 @@ impl From<Task> for TaskView {
|
||||
TaskType::IndexUpdate,
|
||||
Some(TaskDetails::IndexInfo { primary_key }),
|
||||
),
|
||||
TaskContent::Dump => (TaskType::Dump, None),
|
||||
TaskContent::Dump { uid } => {
|
||||
(TaskType::Dump, Some(TaskDetails::Dump { dump_uid: uid }))
|
||||
}
|
||||
};
|
||||
|
||||
// An event always has at least one event: "Created"
|
||||
|
@ -1,7 +1,6 @@
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
use super::error::Result;
|
||||
use super::DumpInfo;
|
||||
|
||||
pub enum DumpMsg {
|
||||
CreateDump {
|
||||
|
@ -5,10 +5,12 @@ use std::sync::Arc;
|
||||
use anyhow::bail;
|
||||
use log::{info, trace};
|
||||
use meilisearch_auth::AuthController;
|
||||
use milli::heed::Env;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use tempfile::TempDir;
|
||||
use time::macros::format_description;
|
||||
use tokio::fs::create_dir_all;
|
||||
|
||||
use crate::analytics;
|
||||
@ -18,6 +20,7 @@ use crate::index_resolver::index_store::IndexStore;
|
||||
use crate::index_resolver::meta_store::IndexMetaStore;
|
||||
use crate::index_resolver::IndexResolver;
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
use error::Result;
|
||||
|
||||
@ -259,22 +262,31 @@ fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct DumpJob<U, I> {
|
||||
/// Generate uid from creation date
|
||||
pub fn generate_uid() -> String {
|
||||
OffsetDateTime::now_utc()
|
||||
.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub struct DumpHandler<U, I> {
|
||||
pub dump_path: PathBuf,
|
||||
pub db_path: PathBuf,
|
||||
pub update_file_store: UpdateFileStore,
|
||||
pub uid: String,
|
||||
pub update_db_size: usize,
|
||||
pub task_store_size: usize,
|
||||
pub index_db_size: usize,
|
||||
pub env: Arc<Env>,
|
||||
pub index_resolver: Arc<IndexResolver<U, I>>,
|
||||
}
|
||||
|
||||
impl<U, I> DumpJob<U, I>
|
||||
impl<U, I> DumpHandler<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
U: IndexMetaStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
pub async fn run(self) -> Result<()> {
|
||||
pub async fn run(&self, uid: String) -> Result<()> {
|
||||
trace!("Performing dump.");
|
||||
|
||||
create_dir_all(&self.dump_path).await?;
|
||||
@ -282,7 +294,7 @@ where
|
||||
let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
|
||||
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||
|
||||
let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size);
|
||||
let meta = MetadataVersion::new_v4(self.index_db_size, self.task_store_size);
|
||||
let meta_path = temp_dump_path.join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_path)?;
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
@ -292,25 +304,25 @@ where
|
||||
|
||||
// TODO: this is blocking!!
|
||||
AuthController::dump(&self.db_path, &temp_dump_path)?;
|
||||
TaskStore::dump(
|
||||
self.env.clone(),
|
||||
&self.dump_path,
|
||||
self.update_file_store.clone(),
|
||||
)
|
||||
.await?;
|
||||
self.index_resolver.dump(&self.dump_path).await?;
|
||||
|
||||
//TODO(marin): this is not right, the scheduler should dump itself, not do it here...
|
||||
// self.scheduler
|
||||
// .read()
|
||||
// .await
|
||||
// .dump(&temp_dump_path, self.update_file_store.clone())
|
||||
// .await?;
|
||||
|
||||
let dump_path = self.dump_path.clone();
|
||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||
// for now we simply copy the updates/updates_files
|
||||
// FIXME: We may copy more files than necessary, if new files are added while we are
|
||||
// performing the dump. We need a way to filter them out.
|
||||
|
||||
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?;
|
||||
let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?;
|
||||
to_tar_gz(temp_dump_path, temp_dump_file.path())
|
||||
.map_err(|e| DumpError::Internal(e.into()))?;
|
||||
|
||||
let dump_path = self.dump_path.join(self.uid).with_extension("dump");
|
||||
let dump_path = dump_path.join(uid).with_extension("dump");
|
||||
temp_dump_file.persist(&dump_path)?;
|
||||
|
||||
Ok(dump_path)
|
||||
|
@ -19,7 +19,7 @@ use tokio::time::sleep;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::document_formats::{read_csv, read_json, read_ndjson};
|
||||
use crate::dump::load_dump;
|
||||
use crate::dump::{self, load_dump, DumpHandler};
|
||||
use crate::index::{
|
||||
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
|
||||
};
|
||||
@ -27,9 +27,7 @@ use crate::options::{IndexerOpts, SchedulerConfig};
|
||||
use crate::snapshot::{load_snapshot, SnapshotService};
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId};
|
||||
use crate::tasks::{
|
||||
BatchHandler, DumpHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore,
|
||||
};
|
||||
use crate::tasks::{BatchHandler, EmptyBatchHandler, Scheduler, TaskFilter, TaskStore};
|
||||
use error::Result;
|
||||
|
||||
use self::error::IndexControllerError;
|
||||
@ -222,14 +220,15 @@ impl IndexControllerBuilder {
|
||||
.dump_dst
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?;
|
||||
|
||||
let dump_handler = Arc::new(DumpHandler::new(
|
||||
update_file_store.clone(),
|
||||
let dump_handler = Arc::new(DumpHandler {
|
||||
dump_path,
|
||||
db_path.as_ref().clone(),
|
||||
index_size,
|
||||
db_path: db_path.as_ref().into(),
|
||||
update_file_store: update_file_store.clone(),
|
||||
task_store_size,
|
||||
index_resolver.clone(),
|
||||
));
|
||||
index_db_size: index_size,
|
||||
env: meta_env.clone(),
|
||||
index_resolver: index_resolver.clone(),
|
||||
});
|
||||
let task_store = TaskStore::new(meta_env)?;
|
||||
|
||||
// register all the batch handlers for use with the scheduler.
|
||||
@ -421,7 +420,8 @@ where
|
||||
}
|
||||
|
||||
pub async fn register_dump_task(&self) -> Result<Task> {
|
||||
let content = TaskContent::Dump;
|
||||
let uid = dump::generate_uid();
|
||||
let content = TaskContent::Dump { uid };
|
||||
let task = self.task_store.register(None, content).await?;
|
||||
self.scheduler.read().await.notify();
|
||||
Ok(task)
|
||||
|
@ -1,101 +1,34 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use log::{error, trace};
|
||||
use time::{macros::format_description, OffsetDateTime};
|
||||
|
||||
use crate::dump::DumpJob;
|
||||
use crate::dump::DumpHandler;
|
||||
use crate::index_resolver::index_store::IndexStore;
|
||||
use crate::index_resolver::meta_store::IndexMetaStore;
|
||||
use crate::index_resolver::IndexResolver;
|
||||
use crate::tasks::batch::{Batch, BatchContent};
|
||||
use crate::tasks::task::{Task, TaskContent, TaskEvent, TaskResult};
|
||||
use crate::tasks::BatchHandler;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub struct DumpHandler<U, I> {
|
||||
update_file_store: UpdateFileStore,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
dump_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
update_db_size: usize,
|
||||
index_db_size: usize,
|
||||
}
|
||||
|
||||
/// Generate uid from creation date
|
||||
fn generate_uid() -> String {
|
||||
OffsetDateTime::now_utc()
|
||||
.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
impl<U, I> DumpHandler<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Send + Sync + 'static,
|
||||
I: IndexStore + Send + Sync + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
update_file_store: UpdateFileStore,
|
||||
dump_path: impl AsRef<Path>,
|
||||
db_path: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
update_file_store,
|
||||
dump_path: dump_path.as_ref().into(),
|
||||
db_path: db_path.as_ref().into(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
index_resolver,
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_dump(&self) {
|
||||
let uid = generate_uid();
|
||||
|
||||
let task = DumpJob {
|
||||
dump_path: self.dump_path.clone(),
|
||||
db_path: self.db_path.clone(),
|
||||
update_file_store: self.update_file_store.clone(),
|
||||
uid: uid.clone(),
|
||||
update_db_size: self.update_db_size,
|
||||
index_db_size: self.index_db_size,
|
||||
index_resolver: self.index_resolver.clone(),
|
||||
};
|
||||
|
||||
let task_result = tokio::task::spawn_local(task.run()).await;
|
||||
|
||||
match task_result {
|
||||
Ok(Ok(())) => {
|
||||
trace!("Dump succeed");
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
error!("Dump failed: {}", e);
|
||||
}
|
||||
Err(_) => {
|
||||
error!("Dump panicked. Dump status set to failed");
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<U, I> BatchHandler for DumpHandler<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Send + Sync + 'static,
|
||||
I: IndexStore + Send + Sync + 'static,
|
||||
U: IndexMetaStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
fn accept(&self, batch: &Batch) -> bool {
|
||||
matches!(batch.content, BatchContent::Dump { .. })
|
||||
}
|
||||
|
||||
async fn process_batch(&self, batch: Batch) -> Batch {
|
||||
match batch.content {
|
||||
BatchContent::Dump { .. } => {
|
||||
self.create_dump().await;
|
||||
async fn process_batch(&self, mut batch: Batch) -> Batch {
|
||||
match &batch.content {
|
||||
BatchContent::Dump(Task {
|
||||
content: TaskContent::Dump { uid },
|
||||
..
|
||||
}) => {
|
||||
match self.run(uid.clone()).await {
|
||||
Ok(_) => {
|
||||
batch
|
||||
.content
|
||||
.push_event(TaskEvent::succeeded(TaskResult::Other));
|
||||
}
|
||||
Err(e) => batch.content.push_event(TaskEvent::failed(e.into())),
|
||||
}
|
||||
batch
|
||||
}
|
||||
_ => unreachable!("invalid batch content for dump"),
|
||||
|
@ -1,6 +1,6 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
pub use batch_handlers::{dump_handler::DumpHandler, empty_handler::EmptyBatchHandler};
|
||||
pub use batch_handlers::empty_handler::EmptyBatchHandler;
|
||||
pub use scheduler::Scheduler;
|
||||
pub use task_store::TaskFilter;
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::path::Path;
|
||||
use std::slice;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@ -13,7 +12,6 @@ use tokio::sync::{watch, RwLock};
|
||||
|
||||
use crate::options::SchedulerConfig;
|
||||
use crate::snapshot::SnapshotJob;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
use super::batch::{Batch, BatchContent};
|
||||
use super::error::Result;
|
||||
@ -276,10 +274,6 @@ impl Scheduler {
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: &Path, file_store: UpdateFileStore) -> Result<()> {
|
||||
self.store.dump(path, file_store).await
|
||||
}
|
||||
|
||||
fn register_task(&mut self, task: Task) {
|
||||
assert!(!task.is_finished());
|
||||
self.tasks.insert(task);
|
||||
|
@ -62,6 +62,22 @@ pub enum TaskEvent {
|
||||
},
|
||||
}
|
||||
|
||||
impl TaskEvent {
|
||||
pub fn succeeded(result: TaskResult) -> Self {
|
||||
Self::Succeded {
|
||||
result,
|
||||
timestamp: OffsetDateTime::now_utc(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn failed(error: ResponseError) -> Self {
|
||||
Self::Failed {
|
||||
error,
|
||||
timestamp: OffsetDateTime::now_utc(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A task represents an operation that Meilisearch must do.
|
||||
/// It's stored on disk and executed from the lowest to highest Task id.
|
||||
/// Everytime a new task is created it has a higher Task id than the previous one.
|
||||
@ -140,7 +156,9 @@ pub enum TaskContent {
|
||||
IndexUpdate {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
Dump,
|
||||
Dump {
|
||||
uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -204,13 +204,14 @@ impl TaskStore {
|
||||
}
|
||||
|
||||
pub async fn dump(
|
||||
&self,
|
||||
env: Arc<Env>,
|
||||
dir_path: impl AsRef<Path>,
|
||||
update_file_store: UpdateFileStore,
|
||||
) -> Result<()> {
|
||||
let store = Self::new(env)?;
|
||||
let update_dir = dir_path.as_ref().join("updates");
|
||||
let updates_file = update_dir.join("data.jsonl");
|
||||
let tasks = self.list_tasks(None, None, None).await?;
|
||||
let tasks = store.list_tasks(None, None, None).await?;
|
||||
|
||||
let dir_path = dir_path.as_ref().to_path_buf();
|
||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||
@ -287,6 +288,14 @@ pub mod test {
|
||||
Ok(Self::Real(TaskStore::new(env)?))
|
||||
}
|
||||
|
||||
pub async fn dump(
|
||||
env: Arc<milli::heed::Env>,
|
||||
path: impl AsRef<Path>,
|
||||
update_file_store: UpdateFileStore,
|
||||
) -> Result<()> {
|
||||
TaskStore::dump(env, path, update_file_store).await
|
||||
}
|
||||
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(Arc::new(mocker))
|
||||
}
|
||||
@ -329,17 +338,6 @@ pub mod test {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(
|
||||
&self,
|
||||
path: impl AsRef<Path>,
|
||||
update_file_store: UpdateFileStore,
|
||||
) -> Result<()> {
|
||||
match self {
|
||||
Self::Real(s) => s.dump(path, update_file_store).await,
|
||||
Self::Mock(m) => unsafe { m.get("dump").call((path, update_file_store)) },
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn register(
|
||||
&self,
|
||||
index_uid: Option<IndexUid>,
|
||||
|
Loading…
x
Reference in New Issue
Block a user