MeiliSearch/meilisearch-http/src/backup.rs
many c254320860
Implement backups
* trigger backup importation via http route
* follow backup advancement with status route
* import backup via a command line
* let user choose batch size of documents to import (command lines)

closes #884
closes #840
2020-09-28 14:40:06 +02:00

417 lines
13 KiB
Rust

use std::fs::{create_dir_all, File};
use std::io::prelude::*;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::thread;
use actix_web::web;
use chrono::offset::Utc;
use indexmap::IndexMap;
use log::error;
use meilisearch_core::{MainWriter, MainReader, UpdateReader};
use meilisearch_core::settings::Settings;
use meilisearch_core::update::{apply_settings_update, apply_documents_addition};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use crate::Data;
use crate::error::Error;
use crate::helpers::compression;
use crate::routes::index;
use crate::routes::index::IndexResponse;
// Mutex to share backup progress.
static BACKUP_INFO: Lazy<Mutex<Option<BackupInfo>>> = Lazy::new(Mutex::default);
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
enum BackupVersion {
V1,
}
impl BackupVersion {
const CURRENT: Self = Self::V1;
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct BackupMetadata {
indexes: Vec<crate::routes::index::IndexResponse>,
db_version: String,
backup_version: BackupVersion,
}
impl BackupMetadata {
/// Create a BackupMetadata with the current backup version of meilisearch.
pub fn new(indexes: Vec<crate::routes::index::IndexResponse>, db_version: String) -> Self {
BackupMetadata {
indexes,
db_version,
backup_version: BackupVersion::CURRENT,
}
}
/// Extract BackupMetadata from `metadata.json` file present at provided `folder_path`
fn from_path(folder_path: &Path) -> Result<Self, Error> {
let path = folder_path.join("metadata.json");
let file = File::open(path)?;
let reader = std::io::BufReader::new(file);
let metadata = serde_json::from_reader(reader)?;
Ok(metadata)
}
/// Write BackupMetadata in `metadata.json` file at provided `folder_path`
fn to_path(&self, folder_path: &Path) -> Result<(), Error> {
let path = folder_path.join("metadata.json");
let file = File::create(path)?;
serde_json::to_writer(file, &self)?;
Ok(())
}
}
/// Extract Settings from `settings.json` file present at provided `folder_path`
fn settings_from_path(folder_path: &Path) -> Result<Settings, Error> {
let path = folder_path.join("settings.json");
let file = File::open(path)?;
let reader = std::io::BufReader::new(file);
let metadata = serde_json::from_reader(reader)?;
Ok(metadata)
}
/// Write Settings in `settings.json` file at provided `folder_path`
fn settings_to_path(settings: &Settings, folder_path: &Path) -> Result<(), Error> {
let path = folder_path.join("settings.json");
let file = File::create(path)?;
serde_json::to_writer(file, settings)?;
Ok(())
}
/// Import settings and documents of a backup with version `BackupVersion::V1` in specified index.
fn import_index_v1(
data: &Data,
backup_folder: &Path,
index_uid: &str,
document_batch_size: usize,
write_txn: &mut MainWriter,
) -> Result<(), Error> {
// open index
let index = data
.db
.open_index(index_uid)
.ok_or(Error::index_not_found(index_uid))?;
// index folder path in backup folder
let index_path = &backup_folder.join(index_uid);
// extract `settings.json` file and import content
let settings = settings_from_path(&index_path)?;
let settings = settings.to_update().or_else(|_e| Err(Error::backup_failed()))?;
apply_settings_update(write_txn, &index, settings)?;
// create iterator over documents in `documents.jsonl` to make batch importation
// create iterator over documents in `documents.jsonl` to make batch importation
let documents = {
let file = File::open(&index_path.join("documents.jsonl"))?;
let reader = std::io::BufReader::new(file);
let deserializer = serde_json::Deserializer::from_reader(reader);
deserializer.into_iter::<IndexMap<String, serde_json::Value>>()
};
// batch import document every `document_batch_size`:
// create a Vec to bufferize documents
let mut values = Vec::with_capacity(document_batch_size);
// iterate over documents
for document in documents {
// push document in buffer
values.push(document?);
// if buffer is full, create and apply a batch, and clean buffer
if values.len() == document_batch_size {
let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size));
apply_documents_addition(write_txn, &index, batch)?;
}
}
// apply documents remaining in the buffer
if !values.is_empty() {
apply_documents_addition(write_txn, &index, values)?;
}
Ok(())
}
/// Import backup from `backup_folder` in database.
pub fn import_backup(
data: &Data,
backup_folder: &Path,
document_batch_size: usize,
) -> Result<(), Error> {
// create a temporary directory
let tmp_dir = TempDir::new()?;
let tmp_dir_path = tmp_dir.path();
// extract backup in temporary directory
compression::from_tar_gz(backup_folder, tmp_dir_path)?;
// read backup metadata
let metadata = BackupMetadata::from_path(&tmp_dir_path)?;
// choose importation function from BackupVersion of metadata
let import_index = match metadata.backup_version {
BackupVersion::V1 => import_index_v1,
};
// remove indexes which have same `uid` than indexes to import and create empty indexes
let existing_index_uids = data.db.indexes_uids();
for index in metadata.indexes.iter() {
if existing_index_uids.contains(&index.uid) {
data.db.delete_index(index.uid.clone())?;
}
index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?;
}
// import each indexes content
data.db.main_write::<_, _, Error>(|mut writer| {
for index in metadata.indexes {
import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?;
}
Ok(())
})?;
Ok(())
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
#[serde(rename_all = "snake_case")]
pub enum BackupStatus {
Done,
Processing,
BackupProcessFailed,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct BackupInfo {
pub uid: String,
pub status: BackupStatus,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}
impl BackupInfo {
pub fn new(uid: String, status: BackupStatus) -> Self {
Self { uid, status, error: None }
}
pub fn with_error(mut self, error: String) -> Self {
self.status = BackupStatus::BackupProcessFailed;
self.error = Some(error);
self
}
pub fn backup_already_in_progress(&self) -> bool {
self.status == BackupStatus::Processing
}
pub fn get_current() -> Option<Self> {
BACKUP_INFO.lock().unwrap().clone()
}
pub fn set_current(&self) {
*BACKUP_INFO.lock().unwrap() = Some(self.clone());
}
}
/// Generate uid from creation date
fn generate_uid() -> String {
Utc::now().format("%Y%m%d-%H%M%S").to_string()
}
/// Infer backup_folder from backup_uid
pub fn compressed_backup_folder(backup_folder: &Path, backup_uid: &str) -> PathBuf {
backup_folder.join(format!("{}.tar.gz", backup_uid))
}
/// Write metadata in backup
fn backup_metadata(data: &web::Data<Data>, folder_path: &Path, indexes: Vec<IndexResponse>) -> Result<(), Error> {
let (db_major, db_minor, db_patch) = data.db.version();
let metadata = BackupMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch));
metadata.to_path(folder_path)
}
/// Export settings of provided index in backup
fn backup_index_settings(data: &web::Data<Data>, reader: &MainReader, folder_path: &Path, index_uid: &str) -> Result<(), Error> {
let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?;
settings_to_path(&settings, folder_path)
}
/// Export updates of provided index in backup
fn backup_index_updates(data: &web::Data<Data>, reader: &UpdateReader, folder_path: &Path, index_uid: &str) -> Result<(), Error> {
let updates_path = folder_path.join("updates.jsonl");
let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?;
let file = File::create(updates_path)?;
for update in updates {
serde_json::to_writer(&file, &update)?;
writeln!(&file)?;
}
Ok(())
}
/// Export documents of provided index in backup
fn backup_index_documents(data: &web::Data<Data>, reader: &MainReader, folder_path: &Path, index_uid: &str) -> Result<(), Error> {
let documents_path = folder_path.join("documents.jsonl");
let file = File::create(documents_path)?;
let backup_batch_size = data.backup_batch_size;
let mut offset = 0;
loop {
let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, backup_batch_size, None)?;
if documents.len() == 0 { break; } else { offset += backup_batch_size; }
for document in documents {
serde_json::to_writer(&file, &document)?;
writeln!(&file)?;
}
}
Ok(())
}
/// Write error with a context.
fn fail_backup_process<E: std::error::Error>(backup_info: BackupInfo, context: &str, error: E) {
let error = format!("Something went wrong during backup process: {}; {}", context, error);
error!("{}", &error);
backup_info.with_error(error).set_current();
}
/// Main function of backup.
fn backup_process(data: web::Data<Data>, backup_folder: PathBuf, backup_info: BackupInfo) {
// open read transaction on Update
let update_reader = match data.db.update_read_txn() {
Ok(r) => r,
Err(e) => {
fail_backup_process(backup_info, "creating RO transaction on updates", e);
return ;
}
};
// open read transaction on Main
let main_reader = match data.db.main_read_txn() {
Ok(r) => r,
Err(e) => {
fail_backup_process(backup_info, "creating RO transaction on main", e);
return ;
}
};
// create a temporary directory
let tmp_dir = match TempDir::new() {
Ok(tmp_dir) => tmp_dir,
Err(e) => {
fail_backup_process(backup_info, "creating temporary directory", e);
return ;
}
};
let tmp_dir_path = tmp_dir.path();
// fetch indexes
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
Ok(indexes) => indexes,
Err(e) => {
fail_backup_process(backup_info, "listing indexes", e);
return ;
}
};
// create metadata
if let Err(e) = backup_metadata(&data, &tmp_dir_path, indexes.clone()) {
fail_backup_process(backup_info, "generating metadata", e);
return ;
}
// export settings, updates and documents for each indexes
for index in indexes {
let index_path = tmp_dir_path.join(&index.uid);
// create index sub-dircetory
if let Err(e) = create_dir_all(&index_path) {
fail_backup_process(backup_info, &format!("creating directory for index {}", &index.uid), e);
return ;
}
// export settings
if let Err(e) = backup_index_settings(&data, &main_reader, &index_path, &index.uid) {
fail_backup_process(backup_info, &format!("generating settings for index {}", &index.uid), e);
return ;
}
// export documents
if let Err(e) = backup_index_documents(&data, &main_reader, &index_path, &index.uid) {
fail_backup_process(backup_info, &format!("generating documents for index {}", &index.uid), e);
return ;
}
// export updates
if let Err(e) = backup_index_updates(&data, &update_reader, &index_path, &index.uid) {
fail_backup_process(backup_info, &format!("generating updates for index {}", &index.uid), e);
return ;
}
}
// compress backup in a file named `{backup_uid}.tar.gz` in `backup_folder`
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_backup_folder(&backup_folder, &backup_info.uid)) {
fail_backup_process(backup_info, "compressing backup", e);
return ;
}
// update backup info to `done`
let resume = BackupInfo::new(
backup_info.uid,
BackupStatus::Done
);
resume.set_current();
}
pub fn init_backup_process(data: &web::Data<Data>, backup_folder: &Path) -> Result<BackupInfo, Error> {
create_dir_all(backup_folder).or(Err(Error::backup_failed()))?;
// check if a backup is already in progress
if let Some(resume) = BackupInfo::get_current() {
if resume.backup_already_in_progress() {
return Err(Error::backup_conflict())
}
}
// generate a new backup info
let info = BackupInfo::new(
generate_uid(),
BackupStatus::Processing
);
info.set_current();
let data = data.clone();
let backup_folder = backup_folder.to_path_buf();
let info_cloned = info.clone();
// run backup process in a new thread
thread::spawn(move ||
backup_process(data, backup_folder, info_cloned)
);
Ok(info)
}