MeiliSearch/meilisearch-lib/src/index_controller/snapshot.rs

313 lines
9.8 KiB
Rust
Raw Normal View History

2021-09-27 16:48:03 +02:00
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
2021-03-17 11:53:23 +01:00
2021-06-21 13:57:32 +02:00
use anyhow::bail;
2021-09-27 16:48:03 +02:00
use log::{error, info, trace};
2021-09-28 22:22:59 +02:00
use tokio::fs;
2021-09-27 16:48:03 +02:00
use tokio::task::spawn_blocking;
use tokio::time::sleep;
2021-10-26 13:02:40 +02:00
use crate::analytics;
2021-09-29 12:02:27 +02:00
use crate::compression::from_tar_gz;
2021-09-27 16:48:03 +02:00
use crate::index_controller::updates::UpdateMsg;
2021-10-04 18:31:05 +02:00
use super::index_resolver::index_store::IndexStore;
use super::index_resolver::uuid_store::UuidStore;
2021-10-06 13:01:02 +02:00
use super::index_resolver::IndexResolver;
2021-09-28 22:22:59 +02:00
use super::updates::UpdateSender;
2021-09-27 16:48:03 +02:00
2021-10-04 18:31:05 +02:00
pub struct SnapshotService<U, I> {
index_resolver: Arc<IndexResolver<U, I>>,
2021-09-27 16:48:03 +02:00
update_sender: UpdateSender,
snapshot_period: Duration,
snapshot_path: PathBuf,
2021-10-26 13:02:40 +02:00
db_path: PathBuf,
2021-09-27 16:48:03 +02:00
db_name: String,
}
2021-03-17 11:53:23 +01:00
2021-10-04 18:31:05 +02:00
impl<U, I> SnapshotService<U, I>
2021-10-06 13:01:02 +02:00
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
2021-10-04 18:31:05 +02:00
{
2021-09-27 16:48:03 +02:00
pub fn new(
2021-10-04 18:31:05 +02:00
index_resolver: Arc<IndexResolver<U, I>>,
2021-09-27 16:48:03 +02:00
update_sender: UpdateSender,
snapshot_period: Duration,
snapshot_path: PathBuf,
2021-10-26 13:02:40 +02:00
db_path: PathBuf,
2021-09-27 16:48:03 +02:00
db_name: String,
) -> Self {
Self {
index_resolver,
update_sender,
snapshot_period,
snapshot_path,
2021-10-26 13:02:40 +02:00
db_path,
2021-09-27 16:48:03 +02:00
db_name,
}
}
2021-03-17 11:53:23 +01:00
2021-09-27 16:48:03 +02:00
pub async fn run(self) {
info!(
"Snapshot scheduled every {}s.",
self.snapshot_period.as_secs()
);
loop {
if let Err(e) = self.perform_snapshot().await {
error!("Error while performing snapshot: {}", e);
}
sleep(self.snapshot_period).await;
}
}
2021-03-17 11:53:23 +01:00
2021-09-27 16:48:03 +02:00
async fn perform_snapshot(&self) -> anyhow::Result<()> {
trace!("Performing snapshot.");
2021-03-17 11:53:23 +01:00
2021-09-27 16:48:03 +02:00
let snapshot_dir = self.snapshot_path.clone();
fs::create_dir_all(&snapshot_dir).await?;
2021-09-28 22:22:59 +02:00
let temp_snapshot_dir = spawn_blocking(tempfile::tempdir).await??;
2021-09-27 16:48:03 +02:00
let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
let indexes = self
.index_resolver
.snapshot(temp_snapshot_path.clone())
.await?;
2021-10-26 13:02:40 +02:00
analytics::copy_user_id(&self.db_path, &temp_snapshot_path.clone());
2021-09-27 16:48:03 +02:00
if indexes.is_empty() {
return Ok(());
}
UpdateMsg::snapshot(&self.update_sender, temp_snapshot_path.clone(), indexes).await?;
let snapshot_path = self
.snapshot_path
.join(format!("{}.snapshot", self.db_name));
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
2021-09-27 16:48:03 +02:00
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
temp_snapshot_file.persist(&snapshot_path)?;
Ok(snapshot_path)
})
.await??;
trace!("Created snapshot in {:?}.", snapshot_path);
Ok(())
}
}
2021-03-23 16:19:01 +01:00
2021-03-23 16:37:46 +01:00
pub fn load_snapshot(
db_path: impl AsRef<Path>,
snapshot_path: impl AsRef<Path>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
2021-06-15 17:39:07 +02:00
) -> anyhow::Result<()> {
2021-03-23 16:37:46 +01:00
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
2021-09-29 12:02:27 +02:00
match from_tar_gz(snapshot_path, &db_path) {
2021-03-25 14:48:51 +01:00
Ok(()) => Ok(()),
Err(e) => {
2021-09-28 22:22:59 +02:00
//clean created db folder
2021-03-25 14:48:51 +01:00
std::fs::remove_dir_all(&db_path)?;
Err(e)
}
}
2021-03-23 16:37:46 +01:00
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
2021-06-21 13:57:32 +02:00
bail!(
2021-03-23 16:37:46 +01:00
"database already exists at {:?}, try to delete it or rename it",
db_path
.as_ref()
.canonicalize()
2021-03-24 11:50:52 +01:00
.unwrap_or_else(|_| db_path.as_ref().to_owned())
2021-03-23 16:37:46 +01:00
)
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
2021-06-21 13:57:32 +02:00
bail!(
2021-03-23 16:37:46 +01:00
"snapshot doesn't exist at {:?}",
snapshot_path
.as_ref()
.canonicalize()
2021-03-24 11:50:52 +01:00
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
2021-03-23 16:37:46 +01:00
)
} else {
Ok(())
}
}
2021-10-04 12:15:21 +02:00
#[cfg(test)]
mod test {
2021-10-04 18:31:05 +02:00
use std::{collections::HashSet, sync::Arc};
use futures::future::{err, ok};
use once_cell::sync::Lazy;
use rand::Rng;
use uuid::Uuid;
use crate::index::error::IndexError;
use crate::index::test::Mocker;
2021-10-06 13:01:02 +02:00
use crate::index::{error::Result as IndexResult, Index};
2021-10-04 18:31:05 +02:00
use crate::index_controller::index_resolver::error::IndexResolverError;
use crate::index_controller::index_resolver::index_store::MockIndexStore;
2021-10-06 13:01:02 +02:00
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
use crate::index_controller::index_resolver::IndexResolver;
2021-10-04 18:31:05 +02:00
use crate::index_controller::updates::create_update_handler;
use super::*;
fn setup() {
static SETUP: Lazy<()> = Lazy::new(|| {
if cfg!(windows) {
std::env::set_var("TMP", ".");
} else {
std::env::set_var("TMPDIR", ".");
}
});
// just deref to make sure the env is setup
*SETUP
}
#[actix_rt::test]
async fn test_normal() {
setup();
let mut rng = rand::thread_rng();
let uuids_num: usize = rng.gen_range(5..10);
let uuids = (0..uuids_num)
.map(|_| Uuid::new_v4())
.collect::<HashSet<_>>();
let mut uuid_store = MockUuidStore::new();
let uuids_clone = uuids.clone();
uuid_store
.expect_snapshot()
.times(1)
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
let mut indexes = uuids.clone().into_iter().map(|uuid| {
let mocker = Mocker::default();
2021-10-06 13:01:02 +02:00
mocker
.when("snapshot")
.times(1)
.then(|_: &Path| -> IndexResult<()> { Ok(()) });
2021-10-04 18:31:05 +02:00
mocker.when("uuid").then(move |_: ()| uuid);
Index::faux(mocker)
});
let uuids_clone = uuids.clone();
let mut index_store = MockIndexStore::new();
index_store
.expect_get()
.withf(move |uuid| uuids_clone.contains(uuid))
.times(uuids_num)
.returning(move |_| Box::pin(ok(Some(indexes.next().unwrap()))));
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let dir = tempfile::tempdir().unwrap();
2021-10-06 13:01:02 +02:00
let update_sender =
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
2021-10-04 18:31:05 +02:00
let snapshot_path = tempfile::tempdir().unwrap();
let snapshot_service = SnapshotService::new(
index_resolver,
update_sender,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
2021-10-26 13:02:40 +02:00
// this should do nothing
snapshot_path.path().to_owned(),
2021-10-04 18:31:05 +02:00
"data.ms".to_string(),
);
snapshot_service.perform_snapshot().await.unwrap();
}
#[actix_rt::test]
async fn error_performing_uuid_snapshot() {
setup();
let mut uuid_store = MockUuidStore::new();
2021-10-26 19:36:48 +02:00
uuid_store.expect_snapshot().once().returning(move |_| {
Box::pin(err(IndexResolverError::IndexAlreadyExists(
"test".to_string(),
)))
});
2021-10-04 18:31:05 +02:00
let mut index_store = MockIndexStore::new();
2021-10-06 13:01:02 +02:00
index_store.expect_get().never();
2021-10-04 18:31:05 +02:00
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let dir = tempfile::tempdir().unwrap();
2021-10-06 13:01:02 +02:00
let update_sender =
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
2021-10-04 18:31:05 +02:00
let snapshot_path = tempfile::tempdir().unwrap();
let snapshot_service = SnapshotService::new(
index_resolver,
update_sender,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
2021-10-26 13:02:40 +02:00
// this should do nothing
snapshot_path.path().to_owned(),
2021-10-04 18:31:05 +02:00
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
}
#[actix_rt::test]
async fn error_performing_index_snapshot() {
setup();
let uuids: HashSet<Uuid> = vec![Uuid::new_v4()].into_iter().collect();
let mut uuid_store = MockUuidStore::new();
let uuids_clone = uuids.clone();
uuid_store
.expect_snapshot()
.once()
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
let mut indexes = uuids.clone().into_iter().map(|uuid| {
let mocker = Mocker::default();
// index returns random error
2021-10-26 19:36:48 +02:00
mocker.when("snapshot").then(|_: &Path| -> IndexResult<()> {
Err(IndexError::DocumentNotFound("1".to_string()))
});
2021-10-04 18:31:05 +02:00
mocker.when("uuid").then(move |_: ()| uuid);
Index::faux(mocker)
});
let uuids_clone = uuids.clone();
let mut index_store = MockIndexStore::new();
index_store
.expect_get()
.withf(move |uuid| uuids_clone.contains(uuid))
.once()
.returning(move |_| Box::pin(ok(Some(indexes.next().unwrap()))));
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let dir = tempfile::tempdir().unwrap();
2021-10-06 13:01:02 +02:00
let update_sender =
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
2021-10-04 18:31:05 +02:00
let snapshot_path = tempfile::tempdir().unwrap();
let snapshot_service = SnapshotService::new(
index_resolver,
update_sender,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
2021-10-26 13:02:40 +02:00
// this should do nothing
snapshot_path.path().to_owned(),
2021-10-04 18:31:05 +02:00
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
}
2021-10-04 12:15:21 +02:00
}