mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 03:37:28 +01:00
WIP: rebasing on master
This commit is contained in:
parent
ceb8d6e1c9
commit
e389c088eb
@ -1,423 +0,0 @@
|
|||||||
use std::fs::{create_dir_all, File};
|
|
||||||
use std::io::prelude::*;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::Mutex;
|
|
||||||
use std::thread;
|
|
||||||
|
|
||||||
use actix_web::web;
|
|
||||||
use chrono::offset::Utc;
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
use log::{error, info};
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use serde_json::json;
|
|
||||||
use tempfile::TempDir;
|
|
||||||
|
|
||||||
use crate::Data;
|
|
||||||
use crate::error::{Error, ResponseError};
|
|
||||||
use crate::helpers::compression;
|
|
||||||
use crate::routes::index;
|
|
||||||
use crate::routes::setting::Settings;
|
|
||||||
use crate::routes::index::IndexResponse;
|
|
||||||
|
|
||||||
// Mutex to share dump progress.
|
|
||||||
static DUMP_INFO: Lazy<Mutex<Option<DumpInfo>>> = Lazy::new(Mutex::default);
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
|
||||||
enum DumpVersion {
|
|
||||||
V1,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DumpVersion {
|
|
||||||
const CURRENT: Self = Self::V1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct DumpMetadata {
|
|
||||||
indexes: Vec<crate::routes::index::IndexResponse>,
|
|
||||||
db_version: String,
|
|
||||||
dump_version: DumpVersion,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DumpMetadata {
|
|
||||||
/// Create a DumpMetadata with the current dump version of meilisearch.
|
|
||||||
pub fn new(indexes: Vec<crate::routes::index::IndexResponse>, db_version: String) -> Self {
|
|
||||||
DumpMetadata {
|
|
||||||
indexes,
|
|
||||||
db_version,
|
|
||||||
dump_version: DumpVersion::CURRENT,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract DumpMetadata from `metadata.json` file present at provided `dir_path`
|
|
||||||
fn from_path(dir_path: &Path) -> Result<Self, Error> {
|
|
||||||
let path = dir_path.join("metadata.json");
|
|
||||||
let file = File::open(path)?;
|
|
||||||
let reader = std::io::BufReader::new(file);
|
|
||||||
let metadata = serde_json::from_reader(reader)?;
|
|
||||||
|
|
||||||
Ok(metadata)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write DumpMetadata in `metadata.json` file at provided `dir_path`
|
|
||||||
fn to_path(&self, dir_path: &Path) -> Result<(), Error> {
|
|
||||||
let path = dir_path.join("metadata.json");
|
|
||||||
let file = File::create(path)?;
|
|
||||||
|
|
||||||
serde_json::to_writer(file, &self)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
|
||||||
fn settings_from_path(dir_path: &Path) -> Result<Settings, Error> {
|
|
||||||
let path = dir_path.join("settings.json");
|
|
||||||
let file = File::open(path)?;
|
|
||||||
let reader = std::io::BufReader::new(file);
|
|
||||||
let metadata = serde_json::from_reader(reader)?;
|
|
||||||
|
|
||||||
Ok(metadata)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write Settings in `settings.json` file at provided `dir_path`
|
|
||||||
fn settings_to_path(settings: &Settings, dir_path: &Path) -> Result<(), Error> {
|
|
||||||
let path = dir_path.join("settings.json");
|
|
||||||
let file = File::create(path)?;
|
|
||||||
|
|
||||||
serde_json::to_writer(file, settings)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Import settings and documents of a dump with version `DumpVersion::V1` in specified index.
|
|
||||||
fn import_index_v1(
|
|
||||||
data: &Data,
|
|
||||||
dumps_dir: &Path,
|
|
||||||
index_uid: &str,
|
|
||||||
document_batch_size: usize,
|
|
||||||
write_txn: &mut MainWriter,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
|
|
||||||
// open index
|
|
||||||
let index = data
|
|
||||||
.db
|
|
||||||
.open_index(index_uid)
|
|
||||||
.ok_or(Error::index_not_found(index_uid))?;
|
|
||||||
|
|
||||||
// index dir path in dump dir
|
|
||||||
let index_path = &dumps_dir.join(index_uid);
|
|
||||||
|
|
||||||
// extract `settings.json` file and import content
|
|
||||||
let settings = settings_from_path(&index_path)?;
|
|
||||||
let settings = settings.to_update().map_err(|e| Error::dump_failed(format!("importing settings for index {}; {}", index_uid, e)))?;
|
|
||||||
apply_settings_update(write_txn, &index, settings)?;
|
|
||||||
|
|
||||||
// create iterator over documents in `documents.jsonl` to make batch importation
|
|
||||||
// create iterator over documents in `documents.jsonl` to make batch importation
|
|
||||||
let documents = {
|
|
||||||
let file = File::open(&index_path.join("documents.jsonl"))?;
|
|
||||||
let reader = std::io::BufReader::new(file);
|
|
||||||
let deserializer = serde_json::Deserializer::from_reader(reader);
|
|
||||||
deserializer.into_iter::<IndexMap<String, serde_json::Value>>()
|
|
||||||
};
|
|
||||||
|
|
||||||
// batch import document every `document_batch_size`:
|
|
||||||
// create a Vec to bufferize documents
|
|
||||||
let mut values = Vec::with_capacity(document_batch_size);
|
|
||||||
// iterate over documents
|
|
||||||
for document in documents {
|
|
||||||
// push document in buffer
|
|
||||||
values.push(document?);
|
|
||||||
// if buffer is full, create and apply a batch, and clean buffer
|
|
||||||
if values.len() == document_batch_size {
|
|
||||||
let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size));
|
|
||||||
apply_documents_addition(write_txn, &index, batch)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// apply documents remaining in the buffer
|
|
||||||
if !values.is_empty() {
|
|
||||||
apply_documents_addition(write_txn, &index, values)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// sync index information: stats, updated_at, last_update
|
|
||||||
if let Err(e) = crate::index_update_callback_txn(index, index_uid, data, write_txn) {
|
|
||||||
return Err(Error::Internal(e));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Import dump from `dump_path` in database.
|
|
||||||
pub fn import_dump(
|
|
||||||
data: &Data,
|
|
||||||
dump_path: &Path,
|
|
||||||
document_batch_size: usize,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
info!("Importing dump from {:?}...", dump_path);
|
|
||||||
|
|
||||||
// create a temporary directory
|
|
||||||
let tmp_dir = TempDir::new()?;
|
|
||||||
let tmp_dir_path = tmp_dir.path();
|
|
||||||
|
|
||||||
// extract dump in temporary directory
|
|
||||||
compression::from_tar_gz(dump_path, tmp_dir_path)?;
|
|
||||||
|
|
||||||
// read dump metadata
|
|
||||||
let metadata = DumpMetadata::from_path(&tmp_dir_path)?;
|
|
||||||
|
|
||||||
// choose importation function from DumpVersion of metadata
|
|
||||||
let import_index = match metadata.dump_version {
|
|
||||||
DumpVersion::V1 => import_index_v1,
|
|
||||||
};
|
|
||||||
|
|
||||||
// remove indexes which have same `uid` than indexes to import and create empty indexes
|
|
||||||
let existing_index_uids = data.db.indexes_uids();
|
|
||||||
for index in metadata.indexes.iter() {
|
|
||||||
if existing_index_uids.contains(&index.uid) {
|
|
||||||
data.db.delete_index(index.uid.clone())?;
|
|
||||||
}
|
|
||||||
index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// import each indexes content
|
|
||||||
data.db.main_write::<_, _, Error>(|mut writer| {
|
|
||||||
for index in metadata.indexes {
|
|
||||||
import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
info!("Dump importation from {:?} succeed", dump_path);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
|
|
||||||
#[serde(rename_all = "snake_case")]
|
|
||||||
pub enum DumpStatus {
|
|
||||||
Done,
|
|
||||||
InProgress,
|
|
||||||
Failed,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Clone)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct DumpInfo {
|
|
||||||
pub uid: String,
|
|
||||||
pub status: DumpStatus,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none", flatten)]
|
|
||||||
pub error: Option<serde_json::Value>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DumpInfo {
|
|
||||||
pub fn new(uid: String, status: DumpStatus) -> Self {
|
|
||||||
Self { uid, status, error: None }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_error(mut self, error: ResponseError) -> Self {
|
|
||||||
self.status = DumpStatus::Failed;
|
|
||||||
self.error = Some(json!(error));
|
|
||||||
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dump_already_in_progress(&self) -> bool {
|
|
||||||
self.status == DumpStatus::InProgress
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_current() -> Option<Self> {
|
|
||||||
DUMP_INFO.lock().unwrap().clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_current(&self) {
|
|
||||||
*DUMP_INFO.lock().unwrap() = Some(self.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate uid from creation date
|
|
||||||
fn generate_uid() -> String {
|
|
||||||
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Infer dumps_dir from dump_uid
|
|
||||||
pub fn compressed_dumps_dir(dumps_dir: &Path, dump_uid: &str) -> PathBuf {
|
|
||||||
dumps_dir.join(format!("{}.dump", dump_uid))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write metadata in dump
|
|
||||||
fn dump_metadata(data: &web::Data<Data>, dir_path: &Path, indexes: Vec<IndexResponse>) -> Result<(), Error> {
|
|
||||||
let (db_major, db_minor, db_patch) = data.db.version();
|
|
||||||
let metadata = DumpMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch));
|
|
||||||
|
|
||||||
metadata.to_path(dir_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Export settings of provided index in dump
|
|
||||||
fn dump_index_settings(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
|
||||||
let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?;
|
|
||||||
|
|
||||||
settings_to_path(&settings, dir_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Export updates of provided index in dump
|
|
||||||
fn dump_index_updates(data: &web::Data<Data>, reader: &UpdateReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
|
||||||
let updates_path = dir_path.join("updates.jsonl");
|
|
||||||
let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?;
|
|
||||||
|
|
||||||
let file = File::create(updates_path)?;
|
|
||||||
|
|
||||||
for update in updates {
|
|
||||||
serde_json::to_writer(&file, &update)?;
|
|
||||||
writeln!(&file)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Export documents of provided index in dump
|
|
||||||
fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
|
||||||
let documents_path = dir_path.join("documents.jsonl");
|
|
||||||
let file = File::create(documents_path)?;
|
|
||||||
let dump_batch_size = data.dump_batch_size;
|
|
||||||
|
|
||||||
let mut offset = 0;
|
|
||||||
loop {
|
|
||||||
let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, dump_batch_size, None)?;
|
|
||||||
if documents.is_empty() { break; } else { offset += dump_batch_size; }
|
|
||||||
|
|
||||||
for document in documents {
|
|
||||||
serde_json::to_writer(&file, &document)?;
|
|
||||||
writeln!(&file)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write error with a context.
|
|
||||||
fn fail_dump_process<E: std::error::Error>(dump_info: DumpInfo, context: &str, error: E) {
|
|
||||||
let error_message = format!("{}; {}", context, error);
|
|
||||||
|
|
||||||
error!("Something went wrong during dump process: {}", &error_message);
|
|
||||||
dump_info.with_error(Error::dump_failed(error_message).into()).set_current();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Main function of dump.
|
|
||||||
fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo) {
|
|
||||||
// open read transaction on Update
|
|
||||||
let update_reader = match data.db.update_read_txn() {
|
|
||||||
Ok(r) => r,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "creating RO transaction on updates", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// open read transaction on Main
|
|
||||||
let main_reader = match data.db.main_read_txn() {
|
|
||||||
Ok(r) => r,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "creating RO transaction on main", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// create a temporary directory
|
|
||||||
let tmp_dir = match TempDir::new() {
|
|
||||||
Ok(tmp_dir) => tmp_dir,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "creating temporary directory", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let tmp_dir_path = tmp_dir.path();
|
|
||||||
|
|
||||||
// fetch indexes
|
|
||||||
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
|
|
||||||
Ok(indexes) => indexes,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "listing indexes", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// create metadata
|
|
||||||
if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) {
|
|
||||||
fail_dump_process(dump_info, "generating metadata", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export settings, updates and documents for each indexes
|
|
||||||
for index in indexes {
|
|
||||||
let index_path = tmp_dir_path.join(&index.uid);
|
|
||||||
|
|
||||||
// create index sub-dircetory
|
|
||||||
if let Err(e) = create_dir_all(&index_path) {
|
|
||||||
fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export settings
|
|
||||||
if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) {
|
|
||||||
fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export documents
|
|
||||||
if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) {
|
|
||||||
fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export updates
|
|
||||||
if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) {
|
|
||||||
fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// compress dump in a file named `{dump_uid}.dump` in `dumps_dir`
|
|
||||||
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) {
|
|
||||||
fail_dump_process(dump_info, "compressing dump", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update dump info to `done`
|
|
||||||
let resume = DumpInfo::new(
|
|
||||||
dump_info.uid,
|
|
||||||
DumpStatus::Done
|
|
||||||
);
|
|
||||||
|
|
||||||
resume.set_current();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> {
|
|
||||||
create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?;
|
|
||||||
|
|
||||||
// check if a dump is already in progress
|
|
||||||
if let Some(resume) = DumpInfo::get_current() {
|
|
||||||
if resume.dump_already_in_progress() {
|
|
||||||
return Err(Error::dump_conflict())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// generate a new dump info
|
|
||||||
let info = DumpInfo::new(
|
|
||||||
generate_uid(),
|
|
||||||
DumpStatus::InProgress
|
|
||||||
);
|
|
||||||
|
|
||||||
info.set_current();
|
|
||||||
|
|
||||||
let data = data.clone();
|
|
||||||
let dumps_dir = dumps_dir.to_path_buf();
|
|
||||||
let info_cloned = info.clone();
|
|
||||||
// run dump process in a new thread
|
|
||||||
thread::spawn(move ||
|
|
||||||
dump_process(data, dumps_dir, info_cloned)
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(info)
|
|
||||||
}
|
|
258
meilisearch-http/src/index_controller/dump.rs
Normal file
258
meilisearch-http/src/index_controller/dump.rs
Normal file
@ -0,0 +1,258 @@
|
|||||||
|
use std::{
|
||||||
|
fs::File,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
|
||||||
|
use anyhow::bail;
|
||||||
|
use heed::EnvOpenOptions;
|
||||||
|
use log::{error, info};
|
||||||
|
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tempfile::TempDir;
|
||||||
|
use tokio::fs;
|
||||||
|
use tokio::task::spawn_blocking;
|
||||||
|
|
||||||
|
use super::update_actor::UpdateActorHandle;
|
||||||
|
use super::uuid_resolver::UuidResolverHandle;
|
||||||
|
use super::IndexMetadata;
|
||||||
|
use crate::index::Index;
|
||||||
|
use crate::index_controller::uuid_resolver;
|
||||||
|
use crate::{helpers::compression, index::Settings};
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||||
|
enum DumpVersion {
|
||||||
|
V1,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DumpVersion {
|
||||||
|
const CURRENT: Self = Self::V1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct DumpMetadata {
|
||||||
|
indexes: Vec<IndexMetadata>,
|
||||||
|
db_version: String,
|
||||||
|
dump_version: DumpVersion,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DumpMetadata {
|
||||||
|
/// Create a DumpMetadata with the current dump version of meilisearch.
|
||||||
|
pub fn new(indexes: Vec<IndexMetadata>, db_version: String) -> Self {
|
||||||
|
DumpMetadata {
|
||||||
|
indexes,
|
||||||
|
db_version,
|
||||||
|
dump_version: DumpVersion::CURRENT,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract DumpMetadata from `metadata.json` file present at provided `dir_path`
|
||||||
|
fn from_path(dir_path: &Path) -> anyhow::Result<Self> {
|
||||||
|
let path = dir_path.join("metadata.json");
|
||||||
|
let file = File::open(path)?;
|
||||||
|
let reader = std::io::BufReader::new(file);
|
||||||
|
let metadata = serde_json::from_reader(reader)?;
|
||||||
|
|
||||||
|
Ok(metadata)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write DumpMetadata in `metadata.json` file at provided `dir_path`
|
||||||
|
fn to_path(&self, dir_path: &Path) -> anyhow::Result<()> {
|
||||||
|
let path = dir_path.join("metadata.json");
|
||||||
|
let file = File::create(path)?;
|
||||||
|
|
||||||
|
serde_json::to_writer(file, &self)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DumpService<U, R> {
|
||||||
|
uuid_resolver_handle: R,
|
||||||
|
update_handle: U,
|
||||||
|
dump_path: PathBuf,
|
||||||
|
db_name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<U, R> DumpService<U, R>
|
||||||
|
where
|
||||||
|
U: UpdateActorHandle,
|
||||||
|
R: UuidResolverHandle,
|
||||||
|
{
|
||||||
|
pub fn new(
|
||||||
|
uuid_resolver_handle: R,
|
||||||
|
update_handle: U,
|
||||||
|
dump_path: PathBuf,
|
||||||
|
db_name: String,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
uuid_resolver_handle,
|
||||||
|
update_handle,
|
||||||
|
dump_path,
|
||||||
|
db_name,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn run(self) {
|
||||||
|
if let Err(e) = self.perform_dump().await {
|
||||||
|
error!("{}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn perform_dump(&self) -> anyhow::Result<()> {
|
||||||
|
info!("Performing dump.");
|
||||||
|
|
||||||
|
let dump_dir = self.dump_path.clone();
|
||||||
|
fs::create_dir_all(&dump_dir).await?;
|
||||||
|
let temp_dump_dir = spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??;
|
||||||
|
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||||
|
|
||||||
|
let uuids = self
|
||||||
|
.uuid_resolver_handle
|
||||||
|
.dump(temp_dump_path.clone())
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if uuids.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let tasks = uuids
|
||||||
|
.iter()
|
||||||
|
.map(|&uuid| self.update_handle.dump(uuid, temp_dump_path.clone()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
futures::future::try_join_all(tasks).await?;
|
||||||
|
|
||||||
|
let dump_dir = self.dump_path.clone();
|
||||||
|
let dump_path = self.dump_path.join(format!("{}.dump", self.db_name));
|
||||||
|
let dump_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
||||||
|
let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?;
|
||||||
|
let temp_dump_file_path = temp_dump_file.path().to_owned();
|
||||||
|
compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?;
|
||||||
|
temp_dump_file.persist(&dump_path)?;
|
||||||
|
Ok(dump_path)
|
||||||
|
})
|
||||||
|
.await??;
|
||||||
|
|
||||||
|
info!("Created dump in {:?}.", dump_path);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||||
|
fn settings_from_path(dir_path: &Path) -> anyhow::Result<Settings> {
|
||||||
|
let path = dir_path.join("settings.json");
|
||||||
|
let file = File::open(path)?;
|
||||||
|
let reader = std::io::BufReader::new(file);
|
||||||
|
let metadata = serde_json::from_reader(reader)?;
|
||||||
|
|
||||||
|
Ok(metadata)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write Settings in `settings.json` file at provided `dir_path`
|
||||||
|
fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> {
|
||||||
|
let path = dir_path.join("settings.json");
|
||||||
|
let file = File::create(path)?;
|
||||||
|
|
||||||
|
serde_json::to_writer(file, settings)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn import_index_v1(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> {
|
||||||
|
std::fs::create_dir_all(&index_path)?;
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(size);
|
||||||
|
let index = milli::Index::new(options, index_path)?;
|
||||||
|
let index = Index(Arc::new(index));
|
||||||
|
|
||||||
|
// extract `settings.json` file and import content
|
||||||
|
let settings = settings_from_path(&dump_path)?;
|
||||||
|
let update_builder = UpdateBuilder::new(0);
|
||||||
|
index.update_settings(&settings, update_builder)?;
|
||||||
|
|
||||||
|
let update_builder = UpdateBuilder::new(1);
|
||||||
|
let file = File::open(&index_path.join("documents.jsonl"))?;
|
||||||
|
let reader = std::io::BufReader::new(file);
|
||||||
|
index.update_documents(
|
||||||
|
UpdateFormat::JsonStream,
|
||||||
|
IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
reader,
|
||||||
|
update_builder,
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// the last step: we extract the milli::Index and close it
|
||||||
|
Arc::try_unwrap(index.0)
|
||||||
|
.map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index")
|
||||||
|
.unwrap()
|
||||||
|
.prepare_for_closing()
|
||||||
|
.wait();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_dump(
|
||||||
|
db_path: impl AsRef<Path>,
|
||||||
|
dump_path: impl AsRef<Path>,
|
||||||
|
size: usize,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
info!("Importing dump from {}...", dump_path.as_ref().display());
|
||||||
|
let db_path = db_path.as_ref();
|
||||||
|
let dump_path = dump_path.as_ref();
|
||||||
|
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?;
|
||||||
|
|
||||||
|
// extract the dump in a temporary directory
|
||||||
|
let tmp_dir = TempDir::new()?;
|
||||||
|
let tmp_dir_path = tmp_dir.path();
|
||||||
|
compression::from_tar_gz(dump_path, tmp_dir_path)?;
|
||||||
|
|
||||||
|
// read dump metadata
|
||||||
|
let metadata = DumpMetadata::from_path(&tmp_dir_path)?;
|
||||||
|
|
||||||
|
// choose importation function from DumpVersion of metadata
|
||||||
|
let import_index = match metadata.dump_version {
|
||||||
|
DumpVersion::V1 => import_index_v1,
|
||||||
|
};
|
||||||
|
|
||||||
|
// remove indexes which have same `uuid` than indexes to import and create empty indexes
|
||||||
|
let existing_index_uids = futures::executor::block_on(uuid_resolver.list())?;
|
||||||
|
|
||||||
|
info!("Deleting indexes provided in the dump...");
|
||||||
|
for idx in &metadata.indexes {
|
||||||
|
if let Some((_, uuid)) = existing_index_uids.iter().find(|(s, _)| s == &idx.uid) {
|
||||||
|
// if we find the index in the `uuid_resolver` it's supposed to exist on the file system
|
||||||
|
// and we want to delete it
|
||||||
|
let path = db_path.join(&format!("indexes/index-{}", uuid));
|
||||||
|
info!("Deleting {}", path.display());
|
||||||
|
use std::io::ErrorKind::*;
|
||||||
|
match std::fs::remove_dir_all(path) {
|
||||||
|
Ok(()) => (),
|
||||||
|
// if an index was present in the metadata but missing of the fs we can ignore the
|
||||||
|
// problem because we are going to create it later
|
||||||
|
Err(e) if e.kind() == NotFound => (),
|
||||||
|
Err(e) => bail!(e),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// if the index does not exist in the `uuid_resolver` we create it
|
||||||
|
futures::executor::block_on(uuid_resolver.create(idx.uid.clone()))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// import each indexes content
|
||||||
|
for idx in metadata.indexes {
|
||||||
|
let dump_path = tmp_dir_path.join(&idx.uid);
|
||||||
|
let uuid = futures::executor::block_on(uuid_resolver.get(idx.uid))?;
|
||||||
|
let index_path = db_path.join(&format!("indexes/index-{}", uuid));
|
||||||
|
|
||||||
|
info!("Importing dump from {} into {}...", dump_path.display(), index_path.display());
|
||||||
|
import_index(size, &dump_path, &index_path).unwrap();
|
||||||
|
info!("Dump importation from {} succeed", dump_path.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
info!("Dump importation from {} succeed", dump_path.display());
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -36,6 +36,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
Ok(Self { receiver, update_handler, store })
|
Ok(Self { receiver, update_handler, store })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
|
||||||
|
/// through the read channel are processed concurrently, the messages sent through the write
|
||||||
|
/// channel are processed one at a time.
|
||||||
pub async fn run(mut self) {
|
pub async fn run(mut self) {
|
||||||
let mut receiver = self
|
let mut receiver = self
|
||||||
.receiver
|
.receiver
|
||||||
@ -119,6 +122,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
Snapshot { uuid, path, ret } => {
|
Snapshot { uuid, path, ret } => {
|
||||||
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
||||||
}
|
}
|
||||||
|
Dump { uuid, path, ret } => {
|
||||||
|
let _ = ret.send(self.handle_dump(uuid, path).await);
|
||||||
|
}
|
||||||
GetStats { uuid, ret } => {
|
GetStats { uuid, ret } => {
|
||||||
let _ = ret.send(self.handle_get_stats(uuid).await);
|
let _ = ret.send(self.handle_get_stats(uuid).await);
|
||||||
}
|
}
|
||||||
@ -306,7 +312,35 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
|
||||||
|
use tokio::fs::create_dir_all;
|
||||||
|
|
||||||
|
path.push("indexes");
|
||||||
|
create_dir_all(&path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| IndexError::Error(e.into()))?;
|
||||||
|
|
||||||
|
if let Some(index) = self.store.get(uuid).await? {
|
||||||
|
let mut index_path = path.join(format!("index-{}", uuid));
|
||||||
|
create_dir_all(&index_path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| IndexError::Error(e.into()))?;
|
||||||
|
index_path.push("data.mdb");
|
||||||
|
spawn_blocking(move || -> anyhow::Result<()> {
|
||||||
|
// Get write txn to wait for ongoing write transaction before dump.
|
||||||
|
let _txn = index.write_txn()?;
|
||||||
|
index.env.copy_to_path(index_path, CompactionOption::Enabled)?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| IndexError::Error(e.into()))?
|
||||||
|
.map_err(IndexError::Error)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||||
let index = self
|
let index = self
|
||||||
.store
|
.store
|
||||||
.get(uuid)
|
.get(uuid)
|
||||||
|
@ -136,7 +136,14 @@ impl IndexActorHandle for IndexActorHandleImpl {
|
|||||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = IndexMsg::Dump { uuid, path, ret };
|
||||||
|
let _ = self.read_sender.send(msg).await;
|
||||||
|
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||||
let (ret, receiver) = oneshot::channel();
|
let (ret, receiver) = oneshot::channel();
|
||||||
let msg = IndexMsg::GetStats { uuid, ret };
|
let msg = IndexMsg::GetStats { uuid, ret };
|
||||||
let _ = self.sender.send(msg).await;
|
let _ = self.sender.send(msg).await;
|
||||||
|
@ -60,6 +60,11 @@ pub enum IndexMsg {
|
|||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
ret: oneshot::Sender<IndexResult<()>>,
|
ret: oneshot::Sender<IndexResult<()>>,
|
||||||
},
|
},
|
||||||
|
Dump {
|
||||||
|
uuid: Uuid,
|
||||||
|
path: PathBuf,
|
||||||
|
ret: oneshot::Sender<Result<()>>,
|
||||||
|
},
|
||||||
GetStats {
|
GetStats {
|
||||||
uuid: Uuid,
|
uuid: Uuid,
|
||||||
ret: oneshot::Sender<IndexResult<IndexStats>>,
|
ret: oneshot::Sender<IndexResult<IndexStats>>,
|
||||||
|
@ -180,5 +180,4 @@ mod test {
|
|||||||
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
||||||
self.as_ref().get_index_stats(uuid).await
|
self.as_ref().get_index_stats(uuid).await
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,6 @@ use std::time::Duration;
|
|||||||
|
|
||||||
use actix_web::web::{Bytes, Payload};
|
use actix_web::web::{Bytes, Payload};
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use chrono::{DateTime, Utc};
|
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use log::info;
|
use log::info;
|
||||||
use milli::FieldsDistribution;
|
use milli::FieldsDistribution;
|
||||||
@ -25,6 +24,7 @@ use crate::option::Opt;
|
|||||||
|
|
||||||
mod index_actor;
|
mod index_actor;
|
||||||
mod snapshot;
|
mod snapshot;
|
||||||
|
mod dump;
|
||||||
mod update_actor;
|
mod update_actor;
|
||||||
mod update_handler;
|
mod update_handler;
|
||||||
mod updates;
|
mod updates;
|
||||||
@ -87,6 +87,13 @@ impl IndexController {
|
|||||||
options.ignore_snapshot_if_db_exists,
|
options.ignore_snapshot_if_db_exists,
|
||||||
options.ignore_missing_snapshot,
|
options.ignore_missing_snapshot,
|
||||||
)?;
|
)?;
|
||||||
|
} else if let Some(ref path) = options.import_dump {
|
||||||
|
load_dump(
|
||||||
|
&options.db_path,
|
||||||
|
path,
|
||||||
|
index_size,
|
||||||
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::fs::create_dir_all(&path)?;
|
std::fs::create_dir_all(&path)?;
|
||||||
|
@ -71,11 +71,16 @@ where
|
|||||||
Some(Delete { uuid, ret }) => {
|
Some(Delete { uuid, ret }) => {
|
||||||
let _ = ret.send(self.handle_delete(uuid).await);
|
let _ = ret.send(self.handle_delete(uuid).await);
|
||||||
}
|
}
|
||||||
Some(Snapshot { uuids, path, ret }) => {
|
Some(Snapshot { uuid, path, ret }) => {
|
||||||
let _ = ret.send(self.handle_snapshot(uuids, path).await);
|
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
||||||
|
}
|
||||||
|
Some(Dump { uuid, path, ret }) => {
|
||||||
|
let _ = ret.send(self.handle_dump(uuid, path).await);
|
||||||
}
|
}
|
||||||
Some(GetInfo { ret }) => {
|
Some(GetInfo { ret }) => {
|
||||||
let _ = ret.send(self.handle_get_info().await);
|
let _ = ret.send(self.handle_get_info().await);
|
||||||
|
Some(GetSize { uuid, ret }) => {
|
||||||
|
let _ = ret.send(self.handle_get_size(uuid).await);
|
||||||
}
|
}
|
||||||
None => break,
|
None => break,
|
||||||
}
|
}
|
||||||
@ -194,9 +199,51 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
||||||
let store = self.store.clone();
|
let store = self.store.delete(uuid).await?;
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || store.delete_all(uuid))
|
if let Some(store) = store {
|
||||||
|
tokio::task::spawn(async move {
|
||||||
|
let store = get_arc_ownership_blocking(store).await;
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
store.prepare_for_closing().wait();
|
||||||
|
info!("Update store {} was closed.", uuid);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_create(&self, uuid: Uuid) -> Result<()> {
|
||||||
|
let _ = self.store.get_or_create(uuid).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_create(&self, uuid: Uuid) -> Result<()> {
|
||||||
|
let _ = self.store.get_or_create(uuid).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||||
|
let index_handle = self.index_handle.clone();
|
||||||
|
if let Some(update_store) = self.store.get(uuid).await? {
|
||||||
|
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
|
||||||
|
// acquire write lock to prevent further writes during snapshot
|
||||||
|
// the update lock must be acquired BEFORE the write lock to prevent dead lock
|
||||||
|
let _lock = update_store.update_lock.lock();
|
||||||
|
let mut txn = update_store.env.write_txn()?;
|
||||||
|
|
||||||
|
// create db snapshot
|
||||||
|
update_store.snapshot(&mut txn, &path, uuid)?;
|
||||||
|
|
||||||
|
futures::executor::block_on(
|
||||||
|
async move { index_handle.snapshot(uuid, path).await },
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?
|
.map_err(|e| UpdateError::Error(e.into()))?
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?;
|
.map_err(|e| UpdateError::Error(e.into()))?;
|
||||||
@ -245,4 +292,42 @@ where
|
|||||||
|
|
||||||
Ok(info)
|
Ok(info)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||||
|
let index_handle = self.index_handle.clone();
|
||||||
|
if let Some(update_store) = self.store.get(uuid).await? {
|
||||||
|
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
|
||||||
|
// acquire write lock to prevent further writes during the dump
|
||||||
|
// the update lock must be acquired BEFORE the write lock to prevent dead lock
|
||||||
|
let _lock = update_store.update_lock.lock();
|
||||||
|
let mut txn = update_store.env.write_txn()?;
|
||||||
|
|
||||||
|
// create db dump
|
||||||
|
update_store.dump(&mut txn, &path, uuid)?;
|
||||||
|
|
||||||
|
futures::executor::block_on(
|
||||||
|
async move { index_handle.dump(uuid, path).await },
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| UpdateError::Error(e.into()))?
|
||||||
|
.map_err(|e| UpdateError::Error(e.into()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||||
|
let size = match self.store.get(uuid).await? {
|
||||||
|
Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result<u64> {
|
||||||
|
let txn = update_store.env.read_txn()?;
|
||||||
|
|
||||||
|
update_store.get_size(&txn)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| UpdateError::Error(e.into()))?
|
||||||
|
.map_err(|e| UpdateError::Error(e.into()))?,
|
||||||
|
None => 0,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
@ -78,6 +78,20 @@ where
|
|||||||
receiver.await.expect("update actor killed.")
|
receiver.await.expect("update actor killed.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = UpdateMsg::Dump { uuid, path, ret };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
receiver.await.expect("update actor killed.")
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = UpdateMsg::GetSize { uuid, ret };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
receiver.await.expect("update actor killed.")
|
||||||
|
}
|
||||||
|
|
||||||
async fn update(
|
async fn update(
|
||||||
&self,
|
&self,
|
||||||
meta: UpdateMeta,
|
meta: UpdateMeta,
|
||||||
|
@ -31,7 +31,16 @@ pub enum UpdateMsg<D> {
|
|||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
ret: oneshot::Sender<Result<()>>,
|
ret: oneshot::Sender<Result<()>>,
|
||||||
},
|
},
|
||||||
|
Dump {
|
||||||
|
uuid: Uuid,
|
||||||
|
path: PathBuf,
|
||||||
|
ret: oneshot::Sender<Result<()>>,
|
||||||
|
},
|
||||||
GetInfo {
|
GetInfo {
|
||||||
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
|
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
|
||||||
},
|
},
|
||||||
|
GetSize {
|
||||||
|
uuid: Uuid,
|
||||||
|
ret: oneshot::Sender<Result<u64>>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
@ -40,8 +40,11 @@ pub trait UpdateActorHandle {
|
|||||||
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
|
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
|
||||||
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
|
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
|
||||||
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
||||||
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
async fn create(&self, uuid: Uuid) -> Result<()>;
|
||||||
|
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
|
||||||
|
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
|
||||||
async fn get_info(&self) -> Result<UpdateStoreInfo>;
|
async fn get_info(&self) -> Result<UpdateStoreInfo>;
|
||||||
|
async fn get_size(&self, uuid: Uuid) -> Result<u64>;
|
||||||
async fn update(
|
async fn update(
|
||||||
&self,
|
&self,
|
||||||
meta: UpdateMeta,
|
meta: UpdateMeta,
|
||||||
|
@ -499,9 +499,56 @@ impl UpdateStore {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn dump(
|
||||||
|
&self,
|
||||||
|
txn: &mut heed::RwTxn,
|
||||||
|
path: impl AsRef<Path>,
|
||||||
|
uuid: Uuid,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let update_path = path.as_ref().join("updates");
|
||||||
|
create_dir_all(&update_path)?;
|
||||||
|
|
||||||
|
let mut dump_path = update_path.join(format!("update-{}", uuid));
|
||||||
|
// acquire write lock to prevent further writes during dump
|
||||||
|
create_dir_all(&dump_path)?;
|
||||||
|
dump_path.push("data.mdb");
|
||||||
|
|
||||||
|
// create db dump
|
||||||
|
self.env.copy_to_path(&dump_path, CompactionOption::Enabled)?;
|
||||||
|
|
||||||
|
let update_files_path = update_path.join("update_files");
|
||||||
|
create_dir_all(&update_files_path)?;
|
||||||
|
|
||||||
|
for path in self.pending.iter(&txn)? {
|
||||||
|
let (_, path) = path?;
|
||||||
|
let name = path.file_name().unwrap();
|
||||||
|
let to = update_files_path.join(name);
|
||||||
|
copy(path, to)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_info(&self) -> anyhow::Result<UpdateStoreInfo> {
|
pub fn get_info(&self) -> anyhow::Result<UpdateStoreInfo> {
|
||||||
let mut size = self.env.size();
|
let mut size = self.env.size();
|
||||||
let txn = self.env.read_txn()?;
|
let txn = self.env.read_txn()?;
|
||||||
|
for entry in self.pending_queue.iter(&txn)? {
|
||||||
|
let (_, pending) = entry?;
|
||||||
|
if let Some(path) = pending.content_path() {
|
||||||
|
size += File::open(path)?.metadata()?.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let processing = match *self.state.read() {
|
||||||
|
State::Processing(uuid, _) => Some(uuid),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(UpdateStoreInfo { size, processing })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_size(&self, txn: &heed::RoTxn) -> anyhow::Result<u64> {
|
||||||
|
let mut size = self.env.size();
|
||||||
|
let txn = self.env.read_txn()?;
|
||||||
|
|
||||||
for entry in self.pending_queue.iter(&txn)? {
|
for entry in self.pending_queue.iter(&txn)? {
|
||||||
let (_, pending) = entry?;
|
let (_, pending) = entry?;
|
||||||
|
@ -41,6 +41,9 @@ impl<S: UuidStore> UuidResolverActor<S> {
|
|||||||
Some(SnapshotRequest { path, ret }) => {
|
Some(SnapshotRequest { path, ret }) => {
|
||||||
let _ = ret.send(self.handle_snapshot(path).await);
|
let _ = ret.send(self.handle_snapshot(path).await);
|
||||||
}
|
}
|
||||||
|
Some(DumpRequest { path, ret }) => {
|
||||||
|
let _ = ret.send(self.handle_dump(path).await);
|
||||||
|
}
|
||||||
Some(GetSize { ret }) => {
|
Some(GetSize { ret }) => {
|
||||||
let _ = ret.send(self.handle_get_size().await);
|
let _ = ret.send(self.handle_get_size().await);
|
||||||
}
|
}
|
||||||
@ -82,6 +85,10 @@ impl<S: UuidStore> UuidResolverActor<S> {
|
|||||||
self.store.snapshot(path).await
|
self.store.snapshot(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_dump(&self, path: PathBuf) -> Result<Vec<Uuid>> {
|
||||||
|
self.store.dump(path).await
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
|
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
|
||||||
if !is_index_uid_valid(&uid) {
|
if !is_index_uid_valid(&uid) {
|
||||||
return Err(UuidError::BadlyFormatted(uid));
|
return Err(UuidError::BadlyFormatted(uid));
|
||||||
|
@ -68,6 +68,7 @@ impl UuidResolverHandle for UuidResolverHandleImpl {
|
|||||||
.expect("Uuid resolver actor has been killed")?)
|
.expect("Uuid resolver actor has been killed")?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// TODO: we should merge this function with the dump function
|
||||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
let (ret, receiver) = oneshot::channel();
|
let (ret, receiver) = oneshot::channel();
|
||||||
let msg = UuidResolveMsg::SnapshotRequest { path, ret };
|
let msg = UuidResolveMsg::SnapshotRequest { path, ret };
|
||||||
@ -77,6 +78,15 @@ impl UuidResolverHandle for UuidResolverHandleImpl {
|
|||||||
.expect("Uuid resolver actor has been killed")?)
|
.expect("Uuid resolver actor has been killed")?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn dump(&self, path: PathBuf) -> Result<Vec<Uuid>> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = UuidResolveMsg::DumpRequest { path, ret };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
Ok(receiver
|
||||||
|
.await
|
||||||
|
.expect("Uuid resolver actor has been killed")?)
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_size(&self) -> Result<u64> {
|
async fn get_size(&self) -> Result<u64> {
|
||||||
let (ret, receiver) = oneshot::channel();
|
let (ret, receiver) = oneshot::channel();
|
||||||
let msg = UuidResolveMsg::GetSize { ret };
|
let msg = UuidResolveMsg::GetSize { ret };
|
||||||
|
@ -31,6 +31,10 @@ pub enum UuidResolveMsg {
|
|||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
|
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
|
||||||
},
|
},
|
||||||
|
DumpRequest {
|
||||||
|
path: PathBuf,
|
||||||
|
ret: oneshot::Sender<Result<Vec<Uuid>>>,
|
||||||
|
},
|
||||||
GetSize {
|
GetSize {
|
||||||
ret: oneshot::Sender<Result<u64>>,
|
ret: oneshot::Sender<Result<u64>>,
|
||||||
},
|
},
|
||||||
|
@ -31,6 +31,7 @@ pub trait UuidResolverHandle {
|
|||||||
async fn delete(&self, name: String) -> anyhow::Result<Uuid>;
|
async fn delete(&self, name: String) -> anyhow::Result<Uuid>;
|
||||||
async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>;
|
async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>;
|
||||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||||
|
async fn dump(&self, path: PathBuf) -> Result<Vec<Uuid>>;
|
||||||
async fn get_size(&self) -> Result<u64>;
|
async fn get_size(&self) -> Result<u64>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ pub trait UuidStore {
|
|||||||
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
||||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
||||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||||
|
async fn dump(&self, path: PathBuf) -> Result<Vec<Uuid>>;
|
||||||
async fn get_size(&self) -> Result<u64>;
|
async fn get_size(&self) -> Result<u64>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,6 +131,8 @@ impl UuidStore for HeedUuidStore {
|
|||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: we should merge this function and the following function for the dump. it's exactly
|
||||||
|
// the same code
|
||||||
async fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
async fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
let env = self.env.clone();
|
let env = self.env.clone();
|
||||||
let db = self.db;
|
let db = self.db;
|
||||||
@ -155,6 +158,31 @@ impl UuidStore for HeedUuidStore {
|
|||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn dump(&self, mut path: PathBuf) -> Result<Vec<Uuid>> {
|
||||||
|
let env = self.env.clone();
|
||||||
|
let db = self.db;
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
// Write transaction to acquire a lock on the database.
|
||||||
|
let txn = env.write_txn()?;
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
for entry in db.iter(&txn)? {
|
||||||
|
let (_, uuid) = entry?;
|
||||||
|
let uuid = Uuid::from_slice(uuid)?;
|
||||||
|
entries.push(uuid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// only perform dump if there are indexes
|
||||||
|
if !entries.is_empty() {
|
||||||
|
path.push("index_uuids");
|
||||||
|
create_dir_all(&path).unwrap();
|
||||||
|
path.push("data.mdb");
|
||||||
|
env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||||
|
}
|
||||||
|
Ok(entries)
|
||||||
|
})
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_size(&self) -> Result<u64> {
|
async fn get_size(&self) -> Result<u64> {
|
||||||
Ok(self.env.size())
|
Ok(self.env.size())
|
||||||
}
|
}
|
||||||
|
@ -203,6 +203,7 @@ pub struct Opt {
|
|||||||
pub import_dump: Option<PathBuf>,
|
pub import_dump: Option<PathBuf>,
|
||||||
|
|
||||||
/// The batch size used in the importation process, the bigger it is the faster the dump is created.
|
/// The batch size used in the importation process, the bigger it is the faster the dump is created.
|
||||||
|
/// This options is now deprecated and will be ignored
|
||||||
#[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")]
|
#[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")]
|
||||||
pub dump_batch_size: usize,
|
pub dump_batch_size: usize,
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use actix_web::{delete, get, post, put};
|
use actix_web::{delete, get, post, put};
|
||||||
use actix_web::{web, HttpResponse};
|
use actix_web::{web, HttpResponse};
|
||||||
|
use chrono::DateTime;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
use crate::error::ResponseError;
|
use crate::error::ResponseError;
|
||||||
@ -68,6 +69,16 @@ struct UpdateIndexRequest {
|
|||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct UpdateIndexResponse {
|
||||||
|
name: String,
|
||||||
|
uid: String,
|
||||||
|
created_at: DateTime<Utc>,
|
||||||
|
updated_at: DateTime<Utc>,
|
||||||
|
primary_key: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
#[put("/indexes/{index_uid}", wrap = "Authentication::Private")]
|
#[put("/indexes/{index_uid}", wrap = "Authentication::Private")]
|
||||||
async fn update_index(
|
async fn update_index(
|
||||||
data: web::Data<Data>,
|
data: web::Data<Data>,
|
||||||
|
@ -19,7 +19,7 @@ async fn get_settings() {
|
|||||||
assert_eq!(settings.keys().len(), 6);
|
assert_eq!(settings.keys().len(), 6);
|
||||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["attributesForFaceting"], json!({}));
|
assert_eq!(settings["attributesForFaceting"], json!(null));
|
||||||
assert_eq!(settings["distinctAttribute"], json!(null));
|
assert_eq!(settings["distinctAttribute"], json!(null));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
settings["rankingRules"],
|
settings["rankingRules"],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user