mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 21:44:34 +01:00
Merge #158
158: Implements the dumps r=irevoire a=irevoire closes #20 divergence from legacy meilisearch: - dump v2 added, support loading of pending updates (only works dumps created from v2) - added time stamps to the dump info - Dump info are only persisted in an internal data structure, and they are not fetched from fs on demand anymore. This was a potential security flaw. This means that the dump infos are flushed on every restart. Co-authored-by: tamo <tamo@meilisearch.com> Co-authored-by: Marin Postma <postma.marin@protonmail.com>
This commit is contained in:
commit
509a56a43d
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -1,5 +1,7 @@
|
|||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "actix-codec"
|
name = "actix-codec"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@ -1840,8 +1842,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.0#792225eaffce6b3682f9b30b7370b6a547c4757e"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.1#25f75d4d03732131e6edcf20f4d126210b159d43"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bstr",
|
"bstr",
|
||||||
|
@ -51,7 +51,7 @@ main_error = "0.1.0"
|
|||||||
meilisearch-error = { path = "../meilisearch-error" }
|
meilisearch-error = { path = "../meilisearch-error" }
|
||||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.2" }
|
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.2" }
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.0" }
|
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.1" }
|
||||||
mime = "0.3.16"
|
mime = "0.3.16"
|
||||||
once_cell = "1.5.2"
|
once_cell = "1.5.2"
|
||||||
oxidized-json-checker = "0.3.2"
|
oxidized-json-checker = "0.3.2"
|
||||||
|
@ -50,7 +50,7 @@ mod mini_dashboard {
|
|||||||
sha1_file.read_to_string(&mut sha1)?;
|
sha1_file.read_to_string(&mut sha1)?;
|
||||||
if sha1 == meta["sha1"].as_str().unwrap() {
|
if sha1 == meta["sha1"].as_str().unwrap() {
|
||||||
// Nothing to do.
|
// Nothing to do.
|
||||||
return Ok(())
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,7 +62,11 @@ mod mini_dashboard {
|
|||||||
hasher.update(&dashboard_assets_bytes);
|
hasher.update(&dashboard_assets_bytes);
|
||||||
let sha1 = hex::encode(hasher.finalize());
|
let sha1 = hex::encode(hasher.finalize());
|
||||||
|
|
||||||
assert_eq!(meta["sha1"].as_str().unwrap(), sha1, "Downloaded mini-dashboard shasum differs from the one specified in the Cargo.toml");
|
assert_eq!(
|
||||||
|
meta["sha1"].as_str().unwrap(),
|
||||||
|
sha1,
|
||||||
|
"Downloaded mini-dashboard shasum differs from the one specified in the Cargo.toml"
|
||||||
|
);
|
||||||
|
|
||||||
create_dir_all(&dashboard_dir)?;
|
create_dir_all(&dashboard_dir)?;
|
||||||
let cursor = Cursor::new(&dashboard_assets_bytes);
|
let cursor = Cursor::new(&dashboard_assets_bytes);
|
||||||
|
@ -4,8 +4,9 @@ use std::sync::Arc;
|
|||||||
use sha2::Digest;
|
use sha2::Digest;
|
||||||
|
|
||||||
use crate::index::{Checked, Settings};
|
use crate::index::{Checked, Settings};
|
||||||
use crate::index_controller::{IndexController, IndexStats, Stats};
|
use crate::index_controller::{
|
||||||
use crate::index_controller::{IndexMetadata, IndexSettings};
|
DumpInfo, IndexController, IndexMetadata, IndexSettings, IndexStats, Stats,
|
||||||
|
};
|
||||||
use crate::option::Opt;
|
use crate::option::Opt;
|
||||||
|
|
||||||
pub mod search;
|
pub mod search;
|
||||||
@ -68,7 +69,11 @@ impl Data {
|
|||||||
|
|
||||||
api_keys.generate_missing_api_keys();
|
api_keys.generate_missing_api_keys();
|
||||||
|
|
||||||
let inner = DataInner { index_controller, api_keys, options };
|
let inner = DataInner {
|
||||||
|
index_controller,
|
||||||
|
api_keys,
|
||||||
|
options,
|
||||||
|
};
|
||||||
let inner = Arc::new(inner);
|
let inner = Arc::new(inner);
|
||||||
|
|
||||||
Ok(Data { inner })
|
Ok(Data { inner })
|
||||||
@ -108,6 +113,14 @@ impl Data {
|
|||||||
Ok(self.index_controller.get_all_stats().await?)
|
Ok(self.index_controller.get_all_stats().await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn create_dump(&self) -> anyhow::Result<DumpInfo> {
|
||||||
|
Ok(self.index_controller.create_dump().await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn dump_status(&self, uid: String) -> anyhow::Result<DumpInfo> {
|
||||||
|
Ok(self.index_controller.dump_info(uid).await?)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn http_payload_size_limit(&self) -> usize {
|
pub fn http_payload_size_limit(&self) -> usize {
|
||||||
self.options.http_payload_size_limit.get_bytes() as usize
|
self.options.http_payload_size_limit.get_bytes() as usize
|
||||||
|
@ -1,423 +0,0 @@
|
|||||||
use std::fs::{create_dir_all, File};
|
|
||||||
use std::io::prelude::*;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::Mutex;
|
|
||||||
use std::thread;
|
|
||||||
|
|
||||||
use actix_web::web;
|
|
||||||
use chrono::offset::Utc;
|
|
||||||
use indexmap::IndexMap;
|
|
||||||
use log::{error, info};
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use serde_json::json;
|
|
||||||
use tempfile::TempDir;
|
|
||||||
|
|
||||||
use crate::Data;
|
|
||||||
use crate::error::{Error, ResponseError};
|
|
||||||
use crate::helpers::compression;
|
|
||||||
use crate::routes::index;
|
|
||||||
use crate::routes::setting::Settings;
|
|
||||||
use crate::routes::index::IndexResponse;
|
|
||||||
|
|
||||||
// Mutex to share dump progress.
|
|
||||||
static DUMP_INFO: Lazy<Mutex<Option<DumpInfo>>> = Lazy::new(Mutex::default);
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
|
||||||
enum DumpVersion {
|
|
||||||
V1,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DumpVersion {
|
|
||||||
const CURRENT: Self = Self::V1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct DumpMetadata {
|
|
||||||
indexes: Vec<crate::routes::index::IndexResponse>,
|
|
||||||
db_version: String,
|
|
||||||
dump_version: DumpVersion,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DumpMetadata {
|
|
||||||
/// Create a DumpMetadata with the current dump version of meilisearch.
|
|
||||||
pub fn new(indexes: Vec<crate::routes::index::IndexResponse>, db_version: String) -> Self {
|
|
||||||
DumpMetadata {
|
|
||||||
indexes,
|
|
||||||
db_version,
|
|
||||||
dump_version: DumpVersion::CURRENT,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract DumpMetadata from `metadata.json` file present at provided `dir_path`
|
|
||||||
fn from_path(dir_path: &Path) -> Result<Self, Error> {
|
|
||||||
let path = dir_path.join("metadata.json");
|
|
||||||
let file = File::open(path)?;
|
|
||||||
let reader = std::io::BufReader::new(file);
|
|
||||||
let metadata = serde_json::from_reader(reader)?;
|
|
||||||
|
|
||||||
Ok(metadata)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write DumpMetadata in `metadata.json` file at provided `dir_path`
|
|
||||||
fn to_path(&self, dir_path: &Path) -> Result<(), Error> {
|
|
||||||
let path = dir_path.join("metadata.json");
|
|
||||||
let file = File::create(path)?;
|
|
||||||
|
|
||||||
serde_json::to_writer(file, &self)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
|
||||||
fn settings_from_path(dir_path: &Path) -> Result<Settings, Error> {
|
|
||||||
let path = dir_path.join("settings.json");
|
|
||||||
let file = File::open(path)?;
|
|
||||||
let reader = std::io::BufReader::new(file);
|
|
||||||
let metadata = serde_json::from_reader(reader)?;
|
|
||||||
|
|
||||||
Ok(metadata)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write Settings in `settings.json` file at provided `dir_path`
|
|
||||||
fn settings_to_path(settings: &Settings, dir_path: &Path) -> Result<(), Error> {
|
|
||||||
let path = dir_path.join("settings.json");
|
|
||||||
let file = File::create(path)?;
|
|
||||||
|
|
||||||
serde_json::to_writer(file, settings)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Import settings and documents of a dump with version `DumpVersion::V1` in specified index.
|
|
||||||
fn import_index_v1(
|
|
||||||
data: &Data,
|
|
||||||
dumps_dir: &Path,
|
|
||||||
index_uid: &str,
|
|
||||||
document_batch_size: usize,
|
|
||||||
write_txn: &mut MainWriter,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
|
|
||||||
// open index
|
|
||||||
let index = data
|
|
||||||
.db
|
|
||||||
.open_index(index_uid)
|
|
||||||
.ok_or(Error::index_not_found(index_uid))?;
|
|
||||||
|
|
||||||
// index dir path in dump dir
|
|
||||||
let index_path = &dumps_dir.join(index_uid);
|
|
||||||
|
|
||||||
// extract `settings.json` file and import content
|
|
||||||
let settings = settings_from_path(&index_path)?;
|
|
||||||
let settings = settings.to_update().map_err(|e| Error::dump_failed(format!("importing settings for index {}; {}", index_uid, e)))?;
|
|
||||||
apply_settings_update(write_txn, &index, settings)?;
|
|
||||||
|
|
||||||
// create iterator over documents in `documents.jsonl` to make batch importation
|
|
||||||
// create iterator over documents in `documents.jsonl` to make batch importation
|
|
||||||
let documents = {
|
|
||||||
let file = File::open(&index_path.join("documents.jsonl"))?;
|
|
||||||
let reader = std::io::BufReader::new(file);
|
|
||||||
let deserializer = serde_json::Deserializer::from_reader(reader);
|
|
||||||
deserializer.into_iter::<IndexMap<String, serde_json::Value>>()
|
|
||||||
};
|
|
||||||
|
|
||||||
// batch import document every `document_batch_size`:
|
|
||||||
// create a Vec to bufferize documents
|
|
||||||
let mut values = Vec::with_capacity(document_batch_size);
|
|
||||||
// iterate over documents
|
|
||||||
for document in documents {
|
|
||||||
// push document in buffer
|
|
||||||
values.push(document?);
|
|
||||||
// if buffer is full, create and apply a batch, and clean buffer
|
|
||||||
if values.len() == document_batch_size {
|
|
||||||
let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size));
|
|
||||||
apply_documents_addition(write_txn, &index, batch)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// apply documents remaining in the buffer
|
|
||||||
if !values.is_empty() {
|
|
||||||
apply_documents_addition(write_txn, &index, values)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// sync index information: stats, updated_at, last_update
|
|
||||||
if let Err(e) = crate::index_update_callback_txn(index, index_uid, data, write_txn) {
|
|
||||||
return Err(Error::Internal(e));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Import dump from `dump_path` in database.
|
|
||||||
pub fn import_dump(
|
|
||||||
data: &Data,
|
|
||||||
dump_path: &Path,
|
|
||||||
document_batch_size: usize,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
info!("Importing dump from {:?}...", dump_path);
|
|
||||||
|
|
||||||
// create a temporary directory
|
|
||||||
let tmp_dir = TempDir::new()?;
|
|
||||||
let tmp_dir_path = tmp_dir.path();
|
|
||||||
|
|
||||||
// extract dump in temporary directory
|
|
||||||
compression::from_tar_gz(dump_path, tmp_dir_path)?;
|
|
||||||
|
|
||||||
// read dump metadata
|
|
||||||
let metadata = DumpMetadata::from_path(&tmp_dir_path)?;
|
|
||||||
|
|
||||||
// choose importation function from DumpVersion of metadata
|
|
||||||
let import_index = match metadata.dump_version {
|
|
||||||
DumpVersion::V1 => import_index_v1,
|
|
||||||
};
|
|
||||||
|
|
||||||
// remove indexes which have same `uid` than indexes to import and create empty indexes
|
|
||||||
let existing_index_uids = data.db.indexes_uids();
|
|
||||||
for index in metadata.indexes.iter() {
|
|
||||||
if existing_index_uids.contains(&index.uid) {
|
|
||||||
data.db.delete_index(index.uid.clone())?;
|
|
||||||
}
|
|
||||||
index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// import each indexes content
|
|
||||||
data.db.main_write::<_, _, Error>(|mut writer| {
|
|
||||||
for index in metadata.indexes {
|
|
||||||
import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
info!("Dump importation from {:?} succeed", dump_path);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
|
|
||||||
#[serde(rename_all = "snake_case")]
|
|
||||||
pub enum DumpStatus {
|
|
||||||
Done,
|
|
||||||
InProgress,
|
|
||||||
Failed,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Clone)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct DumpInfo {
|
|
||||||
pub uid: String,
|
|
||||||
pub status: DumpStatus,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none", flatten)]
|
|
||||||
pub error: Option<serde_json::Value>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DumpInfo {
|
|
||||||
pub fn new(uid: String, status: DumpStatus) -> Self {
|
|
||||||
Self { uid, status, error: None }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_error(mut self, error: ResponseError) -> Self {
|
|
||||||
self.status = DumpStatus::Failed;
|
|
||||||
self.error = Some(json!(error));
|
|
||||||
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dump_already_in_progress(&self) -> bool {
|
|
||||||
self.status == DumpStatus::InProgress
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_current() -> Option<Self> {
|
|
||||||
DUMP_INFO.lock().unwrap().clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_current(&self) {
|
|
||||||
*DUMP_INFO.lock().unwrap() = Some(self.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate uid from creation date
|
|
||||||
fn generate_uid() -> String {
|
|
||||||
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Infer dumps_dir from dump_uid
|
|
||||||
pub fn compressed_dumps_dir(dumps_dir: &Path, dump_uid: &str) -> PathBuf {
|
|
||||||
dumps_dir.join(format!("{}.dump", dump_uid))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write metadata in dump
|
|
||||||
fn dump_metadata(data: &web::Data<Data>, dir_path: &Path, indexes: Vec<IndexResponse>) -> Result<(), Error> {
|
|
||||||
let (db_major, db_minor, db_patch) = data.db.version();
|
|
||||||
let metadata = DumpMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch));
|
|
||||||
|
|
||||||
metadata.to_path(dir_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Export settings of provided index in dump
|
|
||||||
fn dump_index_settings(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
|
||||||
let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?;
|
|
||||||
|
|
||||||
settings_to_path(&settings, dir_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Export updates of provided index in dump
|
|
||||||
fn dump_index_updates(data: &web::Data<Data>, reader: &UpdateReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
|
||||||
let updates_path = dir_path.join("updates.jsonl");
|
|
||||||
let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?;
|
|
||||||
|
|
||||||
let file = File::create(updates_path)?;
|
|
||||||
|
|
||||||
for update in updates {
|
|
||||||
serde_json::to_writer(&file, &update)?;
|
|
||||||
writeln!(&file)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Export documents of provided index in dump
|
|
||||||
fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
|
|
||||||
let documents_path = dir_path.join("documents.jsonl");
|
|
||||||
let file = File::create(documents_path)?;
|
|
||||||
let dump_batch_size = data.dump_batch_size;
|
|
||||||
|
|
||||||
let mut offset = 0;
|
|
||||||
loop {
|
|
||||||
let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, dump_batch_size, None)?;
|
|
||||||
if documents.is_empty() { break; } else { offset += dump_batch_size; }
|
|
||||||
|
|
||||||
for document in documents {
|
|
||||||
serde_json::to_writer(&file, &document)?;
|
|
||||||
writeln!(&file)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write error with a context.
|
|
||||||
fn fail_dump_process<E: std::error::Error>(dump_info: DumpInfo, context: &str, error: E) {
|
|
||||||
let error_message = format!("{}; {}", context, error);
|
|
||||||
|
|
||||||
error!("Something went wrong during dump process: {}", &error_message);
|
|
||||||
dump_info.with_error(Error::dump_failed(error_message).into()).set_current();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Main function of dump.
|
|
||||||
fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo) {
|
|
||||||
// open read transaction on Update
|
|
||||||
let update_reader = match data.db.update_read_txn() {
|
|
||||||
Ok(r) => r,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "creating RO transaction on updates", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// open read transaction on Main
|
|
||||||
let main_reader = match data.db.main_read_txn() {
|
|
||||||
Ok(r) => r,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "creating RO transaction on main", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// create a temporary directory
|
|
||||||
let tmp_dir = match TempDir::new() {
|
|
||||||
Ok(tmp_dir) => tmp_dir,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "creating temporary directory", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let tmp_dir_path = tmp_dir.path();
|
|
||||||
|
|
||||||
// fetch indexes
|
|
||||||
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
|
|
||||||
Ok(indexes) => indexes,
|
|
||||||
Err(e) => {
|
|
||||||
fail_dump_process(dump_info, "listing indexes", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// create metadata
|
|
||||||
if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) {
|
|
||||||
fail_dump_process(dump_info, "generating metadata", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export settings, updates and documents for each indexes
|
|
||||||
for index in indexes {
|
|
||||||
let index_path = tmp_dir_path.join(&index.uid);
|
|
||||||
|
|
||||||
// create index sub-dircetory
|
|
||||||
if let Err(e) = create_dir_all(&index_path) {
|
|
||||||
fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export settings
|
|
||||||
if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) {
|
|
||||||
fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export documents
|
|
||||||
if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) {
|
|
||||||
fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// export updates
|
|
||||||
if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) {
|
|
||||||
fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// compress dump in a file named `{dump_uid}.dump` in `dumps_dir`
|
|
||||||
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) {
|
|
||||||
fail_dump_process(dump_info, "compressing dump", e);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update dump info to `done`
|
|
||||||
let resume = DumpInfo::new(
|
|
||||||
dump_info.uid,
|
|
||||||
DumpStatus::Done
|
|
||||||
);
|
|
||||||
|
|
||||||
resume.set_current();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> {
|
|
||||||
create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?;
|
|
||||||
|
|
||||||
// check if a dump is already in progress
|
|
||||||
if let Some(resume) = DumpInfo::get_current() {
|
|
||||||
if resume.dump_already_in_progress() {
|
|
||||||
return Err(Error::dump_conflict())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// generate a new dump info
|
|
||||||
let info = DumpInfo::new(
|
|
||||||
generate_uid(),
|
|
||||||
DumpStatus::InProgress
|
|
||||||
);
|
|
||||||
|
|
||||||
info.set_current();
|
|
||||||
|
|
||||||
let data = data.clone();
|
|
||||||
let dumps_dir = dumps_dir.to_path_buf();
|
|
||||||
let info_cloned = info.clone();
|
|
||||||
// run dump process in a new thread
|
|
||||||
thread::spawn(move ||
|
|
||||||
dump_process(data, dumps_dir, info_cloned)
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(info)
|
|
||||||
}
|
|
@ -299,7 +299,7 @@ impl From<JsonPayloadError> for Error {
|
|||||||
JsonPayloadError::Payload(err) => {
|
JsonPayloadError::Payload(err) => {
|
||||||
Error::BadRequest(format!("Problem while decoding the request: {}", err))
|
Error::BadRequest(format!("Problem while decoding the request: {}", err))
|
||||||
}
|
}
|
||||||
e => Error::Internal(format!("Unexpected Json error: {}", e))
|
e => Error::Internal(format!("Unexpected Json error: {}", e)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -310,7 +310,7 @@ impl From<QueryPayloadError> for Error {
|
|||||||
QueryPayloadError::Deserialize(err) => {
|
QueryPayloadError::Deserialize(err) => {
|
||||||
Error::BadRequest(format!("Invalid query parameters: {}", err))
|
Error::BadRequest(format!("Invalid query parameters: {}", err))
|
||||||
}
|
}
|
||||||
e => Error::Internal(format!("Unexpected query payload error: {}", e))
|
e => Error::Internal(format!("Unexpected query payload error: {}", e)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,16 +1,16 @@
|
|||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::task::{Context, Poll};
|
use std::task::{Context, Poll};
|
||||||
|
|
||||||
|
use actix_web::body::Body;
|
||||||
use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
|
use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
|
||||||
use actix_web::web;
|
use actix_web::web;
|
||||||
use actix_web::body::Body;
|
|
||||||
use futures::ready;
|
|
||||||
use futures::future::{ok, Future, Ready};
|
|
||||||
use actix_web::ResponseError as _;
|
use actix_web::ResponseError as _;
|
||||||
|
use futures::future::{ok, Future, Ready};
|
||||||
|
use futures::ready;
|
||||||
use pin_project::pin_project;
|
use pin_project::pin_project;
|
||||||
|
|
||||||
use crate::Data;
|
|
||||||
use crate::error::{Error, ResponseError};
|
use crate::error::{Error, ResponseError};
|
||||||
|
use crate::Data;
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub enum Authentication {
|
pub enum Authentication {
|
||||||
@ -59,19 +59,15 @@ where
|
|||||||
let data = req.app_data::<web::Data<Data>>().unwrap();
|
let data = req.app_data::<web::Data<Data>>().unwrap();
|
||||||
|
|
||||||
if data.api_keys().master.is_none() {
|
if data.api_keys().master.is_none() {
|
||||||
return AuthenticationFuture::Authenticated(self.service.call(req))
|
return AuthenticationFuture::Authenticated(self.service.call(req));
|
||||||
}
|
}
|
||||||
|
|
||||||
let auth_header = match req.headers().get("X-Meili-API-Key") {
|
let auth_header = match req.headers().get("X-Meili-API-Key") {
|
||||||
Some(auth) => match auth.to_str() {
|
Some(auth) => match auth.to_str() {
|
||||||
Ok(auth) => auth,
|
Ok(auth) => auth,
|
||||||
Err(_) => {
|
Err(_) => return AuthenticationFuture::NoHeader(Some(req)),
|
||||||
return AuthenticationFuture::NoHeader(Some(req))
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
None => {
|
None => return AuthenticationFuture::NoHeader(Some(req)),
|
||||||
return AuthenticationFuture::NoHeader(Some(req))
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let authenticated = match self.acl {
|
let authenticated = match self.acl {
|
||||||
@ -114,12 +110,10 @@ where
|
|||||||
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||||
let this = self.project();
|
let this = self.project();
|
||||||
match this {
|
match this {
|
||||||
AuthProj::Authenticated(fut) => {
|
AuthProj::Authenticated(fut) => match ready!(fut.poll(cx)) {
|
||||||
match ready!(fut.poll(cx)) {
|
|
||||||
Ok(resp) => Poll::Ready(Ok(resp)),
|
Ok(resp) => Poll::Ready(Ok(resp)),
|
||||||
Err(e) => Poll::Ready(Err(e)),
|
Err(e) => Poll::Ready(Err(e)),
|
||||||
}
|
},
|
||||||
}
|
|
||||||
AuthProj::NoHeader(req) => {
|
AuthProj::NoHeader(req) => {
|
||||||
match req.take() {
|
match req.take() {
|
||||||
Some(req) => {
|
Some(req) => {
|
||||||
@ -135,7 +129,8 @@ where
|
|||||||
AuthProj::Refused(req) => {
|
AuthProj::Refused(req) => {
|
||||||
match req.take() {
|
match req.take() {
|
||||||
Some(req) => {
|
Some(req) => {
|
||||||
let bad_token = req.headers()
|
let bad_token = req
|
||||||
|
.headers()
|
||||||
.get("X-Meili-API-Key")
|
.get("X-Meili-API-Key")
|
||||||
.map(|h| h.to_str().map(String::from).unwrap_or_default())
|
.map(|h| h.to_str().map(String::from).unwrap_or_default())
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
132
meilisearch-http/src/index/dump.rs
Normal file
132
meilisearch-http/src/index/dump.rs
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
use std::fs::{create_dir_all, File};
|
||||||
|
use std::io::{BufRead, BufReader, Write};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use anyhow::{bail, Context};
|
||||||
|
use heed::RoTxn;
|
||||||
|
use indexmap::IndexMap;
|
||||||
|
use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::option::IndexerOpts;
|
||||||
|
|
||||||
|
use super::{update_handler::UpdateHandler, Index, Settings, Unchecked};
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct DumpMeta {
|
||||||
|
settings: Settings<Unchecked>,
|
||||||
|
primary_key: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
const META_FILE_NAME: &str = "meta.json";
|
||||||
|
const DATA_FILE_NAME: &str = "documents.jsonl";
|
||||||
|
|
||||||
|
impl Index {
|
||||||
|
pub fn dump(&self, path: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||||
|
// acquire write txn make sure any ongoing write is finished before we start.
|
||||||
|
let txn = self.env.write_txn()?;
|
||||||
|
|
||||||
|
self.dump_documents(&txn, &path)?;
|
||||||
|
self.dump_meta(&txn, &path)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||||
|
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
|
||||||
|
let mut document_file = File::create(&document_file_path)?;
|
||||||
|
|
||||||
|
let documents = self.all_documents(txn)?;
|
||||||
|
let fields_ids_map = self.fields_ids_map(txn)?;
|
||||||
|
|
||||||
|
// dump documents
|
||||||
|
let mut json_map = IndexMap::new();
|
||||||
|
for document in documents {
|
||||||
|
let (_, reader) = document?;
|
||||||
|
|
||||||
|
for (fid, bytes) in reader.iter() {
|
||||||
|
if let Some(name) = fields_ids_map.name(fid) {
|
||||||
|
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
serde_json::to_writer(&mut document_file, &json_map)?;
|
||||||
|
document_file.write_all(b"\n")?;
|
||||||
|
|
||||||
|
json_map.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||||
|
let meta_file_path = path.as_ref().join(META_FILE_NAME);
|
||||||
|
let mut meta_file = File::create(&meta_file_path)?;
|
||||||
|
|
||||||
|
let settings = self.settings_txn(txn)?.into_unchecked();
|
||||||
|
let primary_key = self.primary_key(txn)?.map(String::from);
|
||||||
|
let meta = DumpMeta {
|
||||||
|
settings,
|
||||||
|
primary_key,
|
||||||
|
};
|
||||||
|
|
||||||
|
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_dump(
|
||||||
|
src: impl AsRef<Path>,
|
||||||
|
dst: impl AsRef<Path>,
|
||||||
|
size: usize,
|
||||||
|
indexing_options: &IndexerOpts,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let dir_name = src
|
||||||
|
.as_ref()
|
||||||
|
.file_name()
|
||||||
|
.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||||
|
let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||||
|
create_dir_all(&dst_dir_path)?;
|
||||||
|
|
||||||
|
let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||||
|
let mut meta_file = File::open(meta_path)?;
|
||||||
|
let DumpMeta {
|
||||||
|
settings,
|
||||||
|
primary_key,
|
||||||
|
} = serde_json::from_reader(&mut meta_file)?;
|
||||||
|
let settings = settings.check();
|
||||||
|
let index = Self::open(&dst_dir_path, size)?;
|
||||||
|
let mut txn = index.write_txn()?;
|
||||||
|
|
||||||
|
let handler = UpdateHandler::new(&indexing_options)?;
|
||||||
|
|
||||||
|
index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
|
||||||
|
|
||||||
|
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||||
|
let reader = File::open(&document_file_path)?;
|
||||||
|
let mut reader = BufReader::new(reader);
|
||||||
|
reader.fill_buf()?;
|
||||||
|
// If the document file is empty, we don't perform the document addition, to prevent
|
||||||
|
// a primary key error to be thrown.
|
||||||
|
if !reader.buffer().is_empty() {
|
||||||
|
index.update_documents_txn(
|
||||||
|
&mut txn,
|
||||||
|
JsonStream,
|
||||||
|
IndexDocumentsMethod::UpdateDocuments,
|
||||||
|
Some(reader),
|
||||||
|
handler.update_builder(0),
|
||||||
|
primary_key.as_deref(),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
txn.commit()?;
|
||||||
|
|
||||||
|
match Arc::try_unwrap(index.0) {
|
||||||
|
Ok(inner) => inner.prepare_for_closing().wait(),
|
||||||
|
Err(_) => bail!("Could not close index properly."),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
@ -1,16 +1,23 @@
|
|||||||
use std::{collections::{BTreeSet, HashSet}, marker::PhantomData};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
|
use std::fs::create_dir_all;
|
||||||
|
use std::marker::PhantomData;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
|
use heed::{EnvOpenOptions, RoTxn};
|
||||||
use milli::obkv_to_json;
|
use milli::obkv_to_json;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
use crate::helpers::EnvSizer;
|
use crate::helpers::EnvSizer;
|
||||||
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
||||||
pub use updates::{Facets, Settings, Checked, Unchecked};
|
use serde::{de::Deserializer, Deserialize};
|
||||||
|
pub use updates::{Checked, Facets, Settings, Unchecked};
|
||||||
|
|
||||||
|
mod dump;
|
||||||
mod search;
|
mod search;
|
||||||
|
pub mod update_handler;
|
||||||
mod updates;
|
mod updates;
|
||||||
|
|
||||||
pub type Document = Map<String, Value>;
|
pub type Document = Map<String, Value>;
|
||||||
@ -26,19 +33,36 @@ impl Deref for Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
||||||
|
where
|
||||||
|
T: Deserialize<'de>,
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
Deserialize::deserialize(deserializer).map(Some)
|
||||||
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
|
pub fn open(path: impl AsRef<Path>, size: usize) -> anyhow::Result<Self> {
|
||||||
|
create_dir_all(&path)?;
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(size);
|
||||||
|
let index = milli::Index::new(options, &path)?;
|
||||||
|
Ok(Index(Arc::new(index)))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&self) -> anyhow::Result<Settings<Checked>> {
|
pub fn settings(&self) -> anyhow::Result<Settings<Checked>> {
|
||||||
let txn = self.read_txn()?;
|
let txn = self.read_txn()?;
|
||||||
|
self.settings_txn(&txn)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn settings_txn(&self, txn: &RoTxn) -> anyhow::Result<Settings<Checked>> {
|
||||||
let displayed_attributes = self
|
let displayed_attributes = self
|
||||||
.displayed_fields(&txn)?
|
.displayed_fields(&txn)?
|
||||||
.map(|fields| fields.into_iter().map(String::from).collect())
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
.unwrap_or_else(|| vec!["*".to_string()]);
|
|
||||||
|
|
||||||
let searchable_attributes = self
|
let searchable_attributes = self
|
||||||
.searchable_fields(&txn)?
|
.searchable_fields(&txn)?
|
||||||
.map(|fields| fields.into_iter().map(String::from).collect())
|
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||||
.unwrap_or_else(|| vec!["*".to_string()]);
|
|
||||||
|
|
||||||
let faceted_attributes = self
|
let faceted_attributes = self
|
||||||
.faceted_fields(&txn)?
|
.faceted_fields(&txn)?
|
||||||
@ -62,8 +86,8 @@ impl Index {
|
|||||||
let distinct_attribute = self.distinct_attribute(&txn)?.map(String::from);
|
let distinct_attribute = self.distinct_attribute(&txn)?.map(String::from);
|
||||||
|
|
||||||
Ok(Settings {
|
Ok(Settings {
|
||||||
displayed_attributes: Some(Some(displayed_attributes)),
|
displayed_attributes: Some(displayed_attributes),
|
||||||
searchable_attributes: Some(Some(searchable_attributes)),
|
searchable_attributes: Some(searchable_attributes),
|
||||||
attributes_for_faceting: Some(Some(faceted_attributes)),
|
attributes_for_faceting: Some(Some(faceted_attributes)),
|
||||||
ranking_rules: Some(Some(criteria)),
|
ranking_rules: Some(Some(criteria)),
|
||||||
stop_words: Some(Some(stop_words)),
|
stop_words: Some(Some(stop_words)),
|
||||||
|
@ -90,7 +90,8 @@ impl Index {
|
|||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
|
||||||
|
|
||||||
let displayed_ids = self.displayed_fields_ids(&rtxn)?
|
let displayed_ids = self
|
||||||
|
.displayed_fields_ids(&rtxn)?
|
||||||
.map(|fields| fields.into_iter().collect::<HashSet<_>>())
|
.map(|fields| fields.into_iter().collect::<HashSet<_>>())
|
||||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||||
|
|
||||||
@ -156,10 +157,8 @@ impl Index {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
let highlighter = Highlighter::new(
|
let highlighter =
|
||||||
&stop_words,
|
Highlighter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
(String::from("<em>"), String::from("</em>")),
|
|
||||||
);
|
|
||||||
|
|
||||||
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
|
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
|
||||||
let document = make_document(&all_attributes, &fields_ids_map, obkv)?;
|
let document = make_document(&all_attributes, &fields_ids_map, obkv)?;
|
||||||
@ -384,17 +383,16 @@ mod test {
|
|||||||
#[test]
|
#[test]
|
||||||
fn no_formatted() {
|
fn no_formatted() {
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
let highlighter = Highlighter::new(
|
let highlighter =
|
||||||
&stop_words,
|
Highlighter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
(String::from("<em>"), String::from("</em>")),
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut fields = FieldsIdsMap::new();
|
let mut fields = FieldsIdsMap::new();
|
||||||
let id = fields.insert("test").unwrap();
|
let id = fields.insert("test").unwrap();
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||||
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()).unwrap();
|
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
obkv.finish().unwrap();
|
obkv.finish().unwrap();
|
||||||
|
|
||||||
let obkv = obkv::KvReader::new(&buf);
|
let obkv = obkv::KvReader::new(&buf);
|
||||||
@ -410,8 +408,9 @@ mod test {
|
|||||||
&highlighter,
|
&highlighter,
|
||||||
&matching_words,
|
&matching_words,
|
||||||
&all_formatted,
|
&all_formatted,
|
||||||
&to_highlight_ids
|
&to_highlight_ids,
|
||||||
).unwrap();
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert!(value.is_empty());
|
assert!(value.is_empty());
|
||||||
}
|
}
|
||||||
@ -419,17 +418,16 @@ mod test {
|
|||||||
#[test]
|
#[test]
|
||||||
fn formatted_no_highlight() {
|
fn formatted_no_highlight() {
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
let highlighter = Highlighter::new(
|
let highlighter =
|
||||||
&stop_words,
|
Highlighter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
(String::from("<em>"), String::from("</em>")),
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut fields = FieldsIdsMap::new();
|
let mut fields = FieldsIdsMap::new();
|
||||||
let id = fields.insert("test").unwrap();
|
let id = fields.insert("test").unwrap();
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||||
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()).unwrap();
|
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
obkv.finish().unwrap();
|
obkv.finish().unwrap();
|
||||||
|
|
||||||
let obkv = obkv::KvReader::new(&buf);
|
let obkv = obkv::KvReader::new(&buf);
|
||||||
@ -445,8 +443,9 @@ mod test {
|
|||||||
&highlighter,
|
&highlighter,
|
||||||
&matching_words,
|
&matching_words,
|
||||||
&all_formatted,
|
&all_formatted,
|
||||||
&to_highlight_ids
|
&to_highlight_ids,
|
||||||
).unwrap();
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(value["test"], "hello");
|
assert_eq!(value["test"], "hello");
|
||||||
}
|
}
|
||||||
@ -454,17 +453,16 @@ mod test {
|
|||||||
#[test]
|
#[test]
|
||||||
fn formatted_with_highlight() {
|
fn formatted_with_highlight() {
|
||||||
let stop_words = fst::Set::default();
|
let stop_words = fst::Set::default();
|
||||||
let highlighter = Highlighter::new(
|
let highlighter =
|
||||||
&stop_words,
|
Highlighter::new(&stop_words, (String::from("<em>"), String::from("</em>")));
|
||||||
(String::from("<em>"), String::from("</em>")),
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut fields = FieldsIdsMap::new();
|
let mut fields = FieldsIdsMap::new();
|
||||||
let id = fields.insert("test").unwrap();
|
let id = fields.insert("test").unwrap();
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
let mut obkv = obkv::KvWriter::new(&mut buf);
|
let mut obkv = obkv::KvWriter::new(&mut buf);
|
||||||
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()).unwrap();
|
obkv.insert(id, Value::String("hello".into()).to_string().as_bytes())
|
||||||
|
.unwrap();
|
||||||
obkv.finish().unwrap();
|
obkv.finish().unwrap();
|
||||||
|
|
||||||
let obkv = obkv::KvReader::new(&buf);
|
let obkv = obkv::KvReader::new(&buf);
|
||||||
@ -480,8 +478,9 @@ mod test {
|
|||||||
&highlighter,
|
&highlighter,
|
||||||
&matching_words,
|
&matching_words,
|
||||||
&all_formatted,
|
&all_formatted,
|
||||||
&to_highlight_ids
|
&to_highlight_ids,
|
||||||
).unwrap();
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(value["test"], "<em>hello</em>");
|
assert_eq!(value["test"], "<em>hello</em>");
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,7 @@ impl UpdateHandler {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_builder(&self, update_id: u64) -> UpdateBuilder {
|
pub fn update_builder(&self, update_id: u64) -> UpdateBuilder {
|
||||||
// We prepare the update by using the update builder.
|
// We prepare the update by using the update builder.
|
||||||
let mut update_builder = UpdateBuilder::new(update_id);
|
let mut update_builder = UpdateBuilder::new(update_id);
|
||||||
if let Some(max_nb_chunks) = self.max_nb_chunks {
|
if let Some(max_nb_chunks) = self.max_nb_chunks {
|
||||||
@ -82,7 +82,7 @@ impl UpdateHandler {
|
|||||||
),
|
),
|
||||||
ClearDocuments => index.clear_documents(update_builder),
|
ClearDocuments => index.clear_documents(update_builder),
|
||||||
DeleteDocuments => index.delete_documents(content, update_builder),
|
DeleteDocuments => index.delete_documents(content, update_builder),
|
||||||
Settings(settings) => index.update_settings(settings, update_builder),
|
Settings(settings) => index.update_settings(&settings.clone().check(), update_builder),
|
||||||
};
|
};
|
||||||
|
|
||||||
match result {
|
match result {
|
@ -1,28 +1,39 @@
|
|||||||
use std::collections::{BTreeSet, HashMap};
|
use std::collections::{BTreeSet, HashMap};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use flate2::read::GzDecoder;
|
use flate2::read::GzDecoder;
|
||||||
use log::info;
|
use log::info;
|
||||||
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
||||||
use serde::{de::Deserializer, Deserialize, Serialize};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
|
|
||||||
use super::Index;
|
|
||||||
use crate::index_controller::UpdateResult;
|
use crate::index_controller::UpdateResult;
|
||||||
|
|
||||||
#[derive(Clone, Default, Debug)]
|
use super::{deserialize_some, Index};
|
||||||
|
|
||||||
|
fn serialize_with_wildcard<S>(field: &Option<Option<Vec<String>>>, s: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
let wildcard = vec!["*".to_string()];
|
||||||
|
s.serialize_some(&field.as_ref().map(|o| o.as_ref().unwrap_or(&wildcard)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Default, Debug, Serialize)]
|
||||||
pub struct Checked;
|
pub struct Checked;
|
||||||
#[derive(Clone, Default, Debug)]
|
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
|
||||||
pub struct Unchecked;
|
pub struct Unchecked;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
|
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||||
pub struct Settings<T> {
|
pub struct Settings<T> {
|
||||||
#[serde(
|
#[serde(
|
||||||
default,
|
default,
|
||||||
deserialize_with = "deserialize_some",
|
deserialize_with = "deserialize_some",
|
||||||
|
serialize_with = "serialize_with_wildcard",
|
||||||
skip_serializing_if = "Option::is_none"
|
skip_serializing_if = "Option::is_none"
|
||||||
)]
|
)]
|
||||||
pub displayed_attributes: Option<Option<Vec<String>>>,
|
pub displayed_attributes: Option<Option<Vec<String>>>,
|
||||||
@ -30,11 +41,16 @@ pub struct Settings<T> {
|
|||||||
#[serde(
|
#[serde(
|
||||||
default,
|
default,
|
||||||
deserialize_with = "deserialize_some",
|
deserialize_with = "deserialize_some",
|
||||||
|
serialize_with = "serialize_with_wildcard",
|
||||||
skip_serializing_if = "Option::is_none"
|
skip_serializing_if = "Option::is_none"
|
||||||
)]
|
)]
|
||||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||||
|
|
||||||
#[serde(default)]
|
#[serde(
|
||||||
|
default,
|
||||||
|
deserialize_with = "deserialize_some",
|
||||||
|
skip_serializing_if = "Option::is_none"
|
||||||
|
)]
|
||||||
pub attributes_for_faceting: Option<Option<HashMap<String, String>>>,
|
pub attributes_for_faceting: Option<Option<HashMap<String, String>>>,
|
||||||
|
|
||||||
#[serde(
|
#[serde(
|
||||||
@ -72,6 +88,28 @@ impl Settings<Checked> {
|
|||||||
_kind: PhantomData,
|
_kind: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
||||||
|
let Self {
|
||||||
|
displayed_attributes,
|
||||||
|
searchable_attributes,
|
||||||
|
attributes_for_faceting,
|
||||||
|
ranking_rules,
|
||||||
|
stop_words,
|
||||||
|
distinct_attribute,
|
||||||
|
..
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
Settings {
|
||||||
|
displayed_attributes,
|
||||||
|
searchable_attributes,
|
||||||
|
attributes_for_faceting,
|
||||||
|
ranking_rules,
|
||||||
|
stop_words,
|
||||||
|
distinct_attribute,
|
||||||
|
_kind: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Settings<Unchecked> {
|
impl Settings<Unchecked> {
|
||||||
@ -118,14 +156,6 @@ pub struct Facets {
|
|||||||
pub min_level_size: Option<NonZeroUsize>,
|
pub min_level_size: Option<NonZeroUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
|
||||||
where
|
|
||||||
T: Deserialize<'de>,
|
|
||||||
D: Deserializer<'de>,
|
|
||||||
{
|
|
||||||
Deserialize::deserialize(deserializer).map(Some)
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn update_documents(
|
pub fn update_documents(
|
||||||
&self,
|
&self,
|
||||||
@ -135,16 +165,36 @@ impl Index {
|
|||||||
update_builder: UpdateBuilder,
|
update_builder: UpdateBuilder,
|
||||||
primary_key: Option<&str>,
|
primary_key: Option<&str>,
|
||||||
) -> anyhow::Result<UpdateResult> {
|
) -> anyhow::Result<UpdateResult> {
|
||||||
info!("performing document addition");
|
let mut txn = self.write_txn()?;
|
||||||
// We must use the write transaction of the update here.
|
let result = self.update_documents_txn(
|
||||||
let mut wtxn = self.write_txn()?;
|
&mut txn,
|
||||||
|
format,
|
||||||
// Set the primary key if not set already, ignore if already set.
|
method,
|
||||||
if let (None, Some(ref primary_key)) = (self.primary_key(&wtxn)?, primary_key) {
|
content,
|
||||||
self.put_primary_key(&mut wtxn, primary_key)?;
|
update_builder,
|
||||||
|
primary_key,
|
||||||
|
)?;
|
||||||
|
txn.commit()?;
|
||||||
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut builder = update_builder.index_documents(&mut wtxn, self);
|
pub fn update_documents_txn<'a, 'b>(
|
||||||
|
&'a self,
|
||||||
|
txn: &mut heed::RwTxn<'a, 'b>,
|
||||||
|
format: UpdateFormat,
|
||||||
|
method: IndexDocumentsMethod,
|
||||||
|
content: Option<impl io::Read>,
|
||||||
|
update_builder: UpdateBuilder,
|
||||||
|
primary_key: Option<&str>,
|
||||||
|
) -> anyhow::Result<UpdateResult> {
|
||||||
|
info!("performing document addition");
|
||||||
|
|
||||||
|
// Set the primary key if not set already, ignore if already set.
|
||||||
|
if let (None, Some(ref primary_key)) = (self.primary_key(txn)?, primary_key) {
|
||||||
|
self.put_primary_key(txn, primary_key)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut builder = update_builder.index_documents(txn, self);
|
||||||
builder.update_format(format);
|
builder.update_format(format);
|
||||||
builder.index_documents_method(method);
|
builder.index_documents_method(method);
|
||||||
|
|
||||||
@ -152,19 +202,17 @@ impl Index {
|
|||||||
|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step);
|
|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step);
|
||||||
|
|
||||||
let gzipped = false;
|
let gzipped = false;
|
||||||
let result = match content {
|
let addition = match content {
|
||||||
Some(content) if gzipped => builder.execute(GzDecoder::new(content), indexing_callback),
|
Some(content) if gzipped => {
|
||||||
Some(content) => builder.execute(content, indexing_callback),
|
builder.execute(GzDecoder::new(content), indexing_callback)?
|
||||||
None => builder.execute(std::io::empty(), indexing_callback),
|
}
|
||||||
|
Some(content) => builder.execute(content, indexing_callback)?,
|
||||||
|
None => builder.execute(std::io::empty(), indexing_callback)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
info!("document addition done: {:?}", result);
|
info!("document addition done: {:?}", addition);
|
||||||
|
|
||||||
result.and_then(|addition_result| {
|
Ok(UpdateResult::DocumentsAddition(addition))
|
||||||
wtxn.commit()
|
|
||||||
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
|
|
||||||
.map_err(Into::into)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
|
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
|
||||||
@ -181,14 +229,14 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_settings(
|
pub fn update_settings_txn<'a, 'b>(
|
||||||
&self,
|
&'a self,
|
||||||
|
txn: &mut heed::RwTxn<'a, 'b>,
|
||||||
settings: &Settings<Checked>,
|
settings: &Settings<Checked>,
|
||||||
update_builder: UpdateBuilder,
|
update_builder: UpdateBuilder,
|
||||||
) -> anyhow::Result<UpdateResult> {
|
) -> anyhow::Result<UpdateResult> {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = self.write_txn()?;
|
let mut builder = update_builder.settings(txn, self);
|
||||||
let mut builder = update_builder.settings(&mut wtxn, self);
|
|
||||||
|
|
||||||
if let Some(ref names) = settings.searchable_attributes {
|
if let Some(ref names) = settings.searchable_attributes {
|
||||||
match names {
|
match names {
|
||||||
@ -230,16 +278,22 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let result = builder
|
builder.execute(|indexing_step, update_id| {
|
||||||
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
|
info!("update {}: {:?}", update_id, indexing_step)
|
||||||
|
})?;
|
||||||
|
|
||||||
match result {
|
Ok(UpdateResult::Other)
|
||||||
Ok(()) => wtxn
|
|
||||||
.commit()
|
|
||||||
.and(Ok(UpdateResult::Other))
|
|
||||||
.map_err(Into::into),
|
|
||||||
Err(e) => Err(e),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn update_settings(
|
||||||
|
&self,
|
||||||
|
settings: &Settings<Checked>,
|
||||||
|
update_builder: UpdateBuilder,
|
||||||
|
) -> anyhow::Result<UpdateResult> {
|
||||||
|
let mut txn = self.write_txn()?;
|
||||||
|
let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
|
||||||
|
txn.commit()?;
|
||||||
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_documents(
|
pub fn delete_documents(
|
||||||
@ -288,7 +342,10 @@ mod test {
|
|||||||
|
|
||||||
let checked = settings.clone().check();
|
let checked = settings.clone().check();
|
||||||
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
||||||
assert_eq!(settings.searchable_attributes, checked.searchable_attributes);
|
assert_eq!(
|
||||||
|
settings.searchable_attributes,
|
||||||
|
checked.searchable_attributes
|
||||||
|
);
|
||||||
|
|
||||||
// test wildcard
|
// test wildcard
|
||||||
// test no changes
|
// test no changes
|
||||||
|
156
meilisearch-http/src/index_controller/dump_actor/actor.rs
Normal file
156
meilisearch-http/src/index_controller/dump_actor/actor.rs
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_stream::stream;
|
||||||
|
use chrono::Utc;
|
||||||
|
use futures::{lock::Mutex, stream::StreamExt};
|
||||||
|
use log::{error, info};
|
||||||
|
use tokio::sync::{mpsc, oneshot, RwLock};
|
||||||
|
use update_actor::UpdateActorHandle;
|
||||||
|
use uuid_resolver::UuidResolverHandle;
|
||||||
|
|
||||||
|
use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus, DumpTask};
|
||||||
|
use crate::index_controller::{update_actor, uuid_resolver};
|
||||||
|
|
||||||
|
pub const CONCURRENT_DUMP_MSG: usize = 10;
|
||||||
|
|
||||||
|
pub struct DumpActor<UuidResolver, Update> {
|
||||||
|
inbox: Option<mpsc::Receiver<DumpMsg>>,
|
||||||
|
uuid_resolver: UuidResolver,
|
||||||
|
update: Update,
|
||||||
|
dump_path: PathBuf,
|
||||||
|
lock: Arc<Mutex<()>>,
|
||||||
|
dump_infos: Arc<RwLock<HashMap<String, DumpInfo>>>,
|
||||||
|
update_db_size: usize,
|
||||||
|
index_db_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate uid from creation date
|
||||||
|
fn generate_uid() -> String {
|
||||||
|
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<UuidResolver, Update> DumpActor<UuidResolver, Update>
|
||||||
|
where
|
||||||
|
UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||||
|
Update: UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||||
|
{
|
||||||
|
pub fn new(
|
||||||
|
inbox: mpsc::Receiver<DumpMsg>,
|
||||||
|
uuid_resolver: UuidResolver,
|
||||||
|
update: Update,
|
||||||
|
dump_path: impl AsRef<Path>,
|
||||||
|
index_db_size: usize,
|
||||||
|
update_db_size: usize,
|
||||||
|
) -> Self {
|
||||||
|
let dump_infos = Arc::new(RwLock::new(HashMap::new()));
|
||||||
|
let lock = Arc::new(Mutex::new(()));
|
||||||
|
Self {
|
||||||
|
inbox: Some(inbox),
|
||||||
|
uuid_resolver,
|
||||||
|
update,
|
||||||
|
dump_path: dump_path.as_ref().into(),
|
||||||
|
dump_infos,
|
||||||
|
lock,
|
||||||
|
index_db_size,
|
||||||
|
update_db_size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn run(mut self) {
|
||||||
|
info!("Started dump actor.");
|
||||||
|
|
||||||
|
let mut inbox = self
|
||||||
|
.inbox
|
||||||
|
.take()
|
||||||
|
.expect("Dump Actor must have a inbox at this point.");
|
||||||
|
|
||||||
|
let stream = stream! {
|
||||||
|
loop {
|
||||||
|
match inbox.recv().await {
|
||||||
|
Some(msg) => yield msg,
|
||||||
|
None => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
stream
|
||||||
|
.for_each_concurrent(Some(CONCURRENT_DUMP_MSG), |msg| self.handle_message(msg))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
error!("Dump actor stopped.");
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_message(&self, msg: DumpMsg) {
|
||||||
|
use DumpMsg::*;
|
||||||
|
|
||||||
|
match msg {
|
||||||
|
CreateDump { ret } => {
|
||||||
|
let _ = self.handle_create_dump(ret).await;
|
||||||
|
}
|
||||||
|
DumpInfo { ret, uid } => {
|
||||||
|
let _ = ret.send(self.handle_dump_info(uid).await);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_create_dump(&self, ret: oneshot::Sender<DumpResult<DumpInfo>>) {
|
||||||
|
let uid = generate_uid();
|
||||||
|
let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress);
|
||||||
|
|
||||||
|
let _lock = match self.lock.try_lock() {
|
||||||
|
Some(lock) => lock,
|
||||||
|
None => {
|
||||||
|
ret.send(Err(DumpError::DumpAlreadyRunning))
|
||||||
|
.expect("Dump actor is dead");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
self.dump_infos
|
||||||
|
.write()
|
||||||
|
.await
|
||||||
|
.insert(uid.clone(), info.clone());
|
||||||
|
|
||||||
|
ret.send(Ok(info)).expect("Dump actor is dead");
|
||||||
|
|
||||||
|
let task = DumpTask {
|
||||||
|
path: self.dump_path.clone(),
|
||||||
|
uuid_resolver: self.uuid_resolver.clone(),
|
||||||
|
update_handle: self.update.clone(),
|
||||||
|
uid: uid.clone(),
|
||||||
|
update_db_size: self.update_db_size,
|
||||||
|
index_db_size: self.index_db_size,
|
||||||
|
};
|
||||||
|
|
||||||
|
let task_result = tokio::task::spawn(task.run()).await;
|
||||||
|
|
||||||
|
let mut dump_infos = self.dump_infos.write().await;
|
||||||
|
let dump_infos = dump_infos
|
||||||
|
.get_mut(&uid)
|
||||||
|
.expect("dump entry deleted while lock was acquired");
|
||||||
|
|
||||||
|
match task_result {
|
||||||
|
Ok(Ok(())) => {
|
||||||
|
dump_infos.done();
|
||||||
|
info!("Dump succeed");
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
dump_infos.with_error(e.to_string());
|
||||||
|
error!("Dump failed: {}", e);
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
dump_infos.with_error("Unexpected error while performing dump.".to_string());
|
||||||
|
error!("Dump panicked. Dump status set to failed");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_dump_info(&self, uid: String) -> DumpResult<DumpInfo> {
|
||||||
|
match self.dump_infos.read().await.get(&uid) {
|
||||||
|
Some(info) => Ok(info.clone()),
|
||||||
|
_ => Err(DumpError::DumpDoesNotExist(uid)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,52 @@
|
|||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use actix_web::web::Bytes;
|
||||||
|
use tokio::sync::{mpsc, oneshot};
|
||||||
|
|
||||||
|
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult};
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct DumpActorHandleImpl {
|
||||||
|
sender: mpsc::Sender<DumpMsg>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl DumpActorHandle for DumpActorHandleImpl {
|
||||||
|
async fn create_dump(&self) -> DumpResult<DumpInfo> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = DumpMsg::CreateDump { ret };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
receiver.await.expect("IndexActor has been killed")
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn dump_info(&self, uid: String) -> DumpResult<DumpInfo> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = DumpMsg::DumpInfo { ret, uid };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
receiver.await.expect("IndexActor has been killed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DumpActorHandleImpl {
|
||||||
|
pub fn new(
|
||||||
|
path: impl AsRef<Path>,
|
||||||
|
uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl,
|
||||||
|
update: crate::index_controller::update_actor::UpdateActorHandleImpl<Bytes>,
|
||||||
|
index_db_size: usize,
|
||||||
|
update_db_size: usize,
|
||||||
|
) -> anyhow::Result<Self> {
|
||||||
|
let (sender, receiver) = mpsc::channel(10);
|
||||||
|
let actor = DumpActor::new(
|
||||||
|
receiver,
|
||||||
|
uuid_resolver,
|
||||||
|
update,
|
||||||
|
path,
|
||||||
|
index_db_size,
|
||||||
|
update_db_size,
|
||||||
|
);
|
||||||
|
|
||||||
|
tokio::task::spawn(actor.run());
|
||||||
|
|
||||||
|
Ok(Self { sender })
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,2 @@
|
|||||||
|
pub mod v1;
|
||||||
|
pub mod v2;
|
183
meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs
Normal file
183
meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
use std::fs::{create_dir_all, File};
|
||||||
|
use std::io::BufRead;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use heed::EnvOpenOptions;
|
||||||
|
use log::{error, info, warn};
|
||||||
|
use milli::update::{IndexDocumentsMethod, UpdateFormat};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata};
|
||||||
|
use crate::{
|
||||||
|
index::{deserialize_some, update_handler::UpdateHandler, Index, Unchecked},
|
||||||
|
option::IndexerOpts,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct MetadataV1 {
|
||||||
|
db_version: String,
|
||||||
|
indexes: Vec<IndexMetadata>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MetadataV1 {
|
||||||
|
pub fn load_dump(
|
||||||
|
self,
|
||||||
|
src: impl AsRef<Path>,
|
||||||
|
dst: impl AsRef<Path>,
|
||||||
|
size: usize,
|
||||||
|
indexer_options: &IndexerOpts,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
info!(
|
||||||
|
"Loading dump, dump database version: {}, dump version: V1",
|
||||||
|
self.db_version
|
||||||
|
);
|
||||||
|
|
||||||
|
let uuid_store = HeedUuidStore::new(&dst)?;
|
||||||
|
for index in self.indexes {
|
||||||
|
let uuid = Uuid::new_v4();
|
||||||
|
uuid_store.insert(index.uid.clone(), uuid)?;
|
||||||
|
let src = src.as_ref().join(index.uid);
|
||||||
|
load_index(
|
||||||
|
&src,
|
||||||
|
&dst,
|
||||||
|
uuid,
|
||||||
|
index.meta.primary_key.as_deref(),
|
||||||
|
size,
|
||||||
|
indexer_options,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// These are the settings used in legacy meilisearch (<v0.21.0).
|
||||||
|
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||||
|
struct Settings {
|
||||||
|
#[serde(default, deserialize_with = "deserialize_some")]
|
||||||
|
pub ranking_rules: Option<Option<Vec<String>>>,
|
||||||
|
#[serde(default, deserialize_with = "deserialize_some")]
|
||||||
|
pub distinct_attribute: Option<Option<String>>,
|
||||||
|
#[serde(default, deserialize_with = "deserialize_some")]
|
||||||
|
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||||
|
#[serde(default, deserialize_with = "deserialize_some")]
|
||||||
|
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
|
||||||
|
#[serde(default, deserialize_with = "deserialize_some")]
|
||||||
|
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||||
|
#[serde(default, deserialize_with = "deserialize_some")]
|
||||||
|
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
|
||||||
|
#[serde(default, deserialize_with = "deserialize_some")]
|
||||||
|
pub attributes_for_faceting: Option<Option<Vec<String>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_index(
|
||||||
|
src: impl AsRef<Path>,
|
||||||
|
dst: impl AsRef<Path>,
|
||||||
|
uuid: Uuid,
|
||||||
|
primary_key: Option<&str>,
|
||||||
|
size: usize,
|
||||||
|
indexer_options: &IndexerOpts,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid));
|
||||||
|
|
||||||
|
create_dir_all(&index_path)?;
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(size);
|
||||||
|
let index = milli::Index::new(options, index_path)?;
|
||||||
|
let index = Index(Arc::new(index));
|
||||||
|
|
||||||
|
// extract `settings.json` file and import content
|
||||||
|
let settings = import_settings(&src)?;
|
||||||
|
let settings: index_controller::Settings<Unchecked> = settings.into();
|
||||||
|
|
||||||
|
let mut txn = index.write_txn()?;
|
||||||
|
|
||||||
|
let handler = UpdateHandler::new(&indexer_options)?;
|
||||||
|
|
||||||
|
index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?;
|
||||||
|
|
||||||
|
let file = File::open(&src.as_ref().join("documents.jsonl"))?;
|
||||||
|
let mut reader = std::io::BufReader::new(file);
|
||||||
|
reader.fill_buf()?;
|
||||||
|
if !reader.buffer().is_empty() {
|
||||||
|
index.update_documents_txn(
|
||||||
|
&mut txn,
|
||||||
|
UpdateFormat::JsonStream,
|
||||||
|
IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
Some(reader),
|
||||||
|
handler.update_builder(0),
|
||||||
|
primary_key,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
txn.commit()?;
|
||||||
|
|
||||||
|
// Finaly, we extract the original milli::Index and close it
|
||||||
|
Arc::try_unwrap(index.0)
|
||||||
|
.map_err(|_e| "Couldn't close the index properly")
|
||||||
|
.unwrap()
|
||||||
|
.prepare_for_closing()
|
||||||
|
.wait();
|
||||||
|
|
||||||
|
// Updates are ignored in dumps V1.
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// we need to **always** be able to convert the old settings to the settings currently being used
|
||||||
|
impl From<Settings> for index_controller::Settings<Unchecked> {
|
||||||
|
fn from(settings: Settings) -> Self {
|
||||||
|
if settings.synonyms.flatten().is_some() {
|
||||||
|
error!("`synonyms` are not yet implemented and thus will be ignored");
|
||||||
|
}
|
||||||
|
Self {
|
||||||
|
distinct_attribute: settings.distinct_attribute,
|
||||||
|
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||||
|
displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())),
|
||||||
|
searchable_attributes: settings.searchable_attributes,
|
||||||
|
// we previously had a `Vec<String>` but now we have a `HashMap<String, String>`
|
||||||
|
// representing the name of the faceted field + the type of the field. Since the type
|
||||||
|
// was not known in the V1 of the dump we are just going to assume everything is a
|
||||||
|
// String
|
||||||
|
attributes_for_faceting: settings.attributes_for_faceting.map(|o| o.map(|vec| vec.into_iter().map(|key| (key, String::from("string"))).collect())),
|
||||||
|
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||||
|
ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| {
|
||||||
|
match criterion.as_str() {
|
||||||
|
"words" | "typo" | "proximity" | "attribute" => Some(criterion),
|
||||||
|
s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion),
|
||||||
|
"wordsPosition" => {
|
||||||
|
warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored");
|
||||||
|
Some(String::from("words"))
|
||||||
|
}
|
||||||
|
"exactness" => {
|
||||||
|
error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
s => {
|
||||||
|
error!("Unknown criterion found in the dump: `{}`, it will be ignored", s);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}).collect())),
|
||||||
|
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||||
|
stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())),
|
||||||
|
_kind: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||||
|
fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> {
|
||||||
|
let path = dir_path.as_ref().join("settings.json");
|
||||||
|
let file = File::open(path)?;
|
||||||
|
let reader = std::io::BufReader::new(file);
|
||||||
|
let metadata = serde_json::from_reader(reader)?;
|
||||||
|
|
||||||
|
Ok(metadata)
|
||||||
|
}
|
@ -0,0 +1,59 @@
|
|||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use log::info;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::index::Index;
|
||||||
|
use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore};
|
||||||
|
use crate::option::IndexerOpts;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct MetadataV2 {
|
||||||
|
db_version: String,
|
||||||
|
index_db_size: usize,
|
||||||
|
update_db_size: usize,
|
||||||
|
dump_date: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MetadataV2 {
|
||||||
|
pub fn new(index_db_size: usize, update_db_size: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
db_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||||
|
index_db_size,
|
||||||
|
update_db_size,
|
||||||
|
dump_date: Utc::now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_dump(
|
||||||
|
self,
|
||||||
|
src: impl AsRef<Path>,
|
||||||
|
dst: impl AsRef<Path>,
|
||||||
|
index_db_size: usize,
|
||||||
|
update_db_size: usize,
|
||||||
|
indexing_options: &IndexerOpts,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
info!(
|
||||||
|
"Loading dump from {}, dump database version: {}, dump version: V2",
|
||||||
|
self.dump_date, self.db_version
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("Loading index database.");
|
||||||
|
HeedUuidStore::load_dump(src.as_ref(), &dst)?;
|
||||||
|
|
||||||
|
info!("Loading updates.");
|
||||||
|
UpdateStore::load_dump(&src, &dst, update_db_size)?;
|
||||||
|
|
||||||
|
info!("Loading indexes.");
|
||||||
|
let indexes_path = src.as_ref().join("indexes");
|
||||||
|
let indexes = indexes_path.read_dir()?;
|
||||||
|
for index in indexes {
|
||||||
|
let index = index?;
|
||||||
|
Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
13
meilisearch-http/src/index_controller/dump_actor/message.rs
Normal file
13
meilisearch-http/src/index_controller/dump_actor/message.rs
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
use tokio::sync::oneshot;
|
||||||
|
|
||||||
|
use super::{DumpInfo, DumpResult};
|
||||||
|
|
||||||
|
pub enum DumpMsg {
|
||||||
|
CreateDump {
|
||||||
|
ret: oneshot::Sender<DumpResult<DumpInfo>>,
|
||||||
|
},
|
||||||
|
DumpInfo {
|
||||||
|
uid: String,
|
||||||
|
ret: oneshot::Sender<DumpResult<DumpInfo>>,
|
||||||
|
},
|
||||||
|
}
|
214
meilisearch-http/src/index_controller/dump_actor/mod.rs
Normal file
214
meilisearch-http/src/index_controller/dump_actor/mod.rs
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
use std::fs::File;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use log::{error, info, warn};
|
||||||
|
#[cfg(test)]
|
||||||
|
use mockall::automock;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use thiserror::Error;
|
||||||
|
use tokio::fs::create_dir_all;
|
||||||
|
|
||||||
|
use loaders::v1::MetadataV1;
|
||||||
|
use loaders::v2::MetadataV2;
|
||||||
|
|
||||||
|
pub use actor::DumpActor;
|
||||||
|
pub use handle_impl::*;
|
||||||
|
pub use message::DumpMsg;
|
||||||
|
|
||||||
|
use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle};
|
||||||
|
use crate::{helpers::compression, option::IndexerOpts};
|
||||||
|
|
||||||
|
mod actor;
|
||||||
|
mod handle_impl;
|
||||||
|
mod loaders;
|
||||||
|
mod message;
|
||||||
|
|
||||||
|
const META_FILE_NAME: &str = "metadata.json";
|
||||||
|
|
||||||
|
pub type DumpResult<T> = std::result::Result<T, DumpError>;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum DumpError {
|
||||||
|
#[error("error with index: {0}")]
|
||||||
|
Error(#[from] anyhow::Error),
|
||||||
|
#[error("Heed error: {0}")]
|
||||||
|
HeedError(#[from] heed::Error),
|
||||||
|
#[error("dump already running")]
|
||||||
|
DumpAlreadyRunning,
|
||||||
|
#[error("dump `{0}` does not exist")]
|
||||||
|
DumpDoesNotExist(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
#[cfg_attr(test, automock)]
|
||||||
|
pub trait DumpActorHandle {
|
||||||
|
/// Start the creation of a dump
|
||||||
|
/// Implementation: [handle_impl::DumpActorHandleImpl::create_dump]
|
||||||
|
async fn create_dump(&self) -> DumpResult<DumpInfo>;
|
||||||
|
|
||||||
|
/// Return the status of an already created dump
|
||||||
|
/// Implementation: [handle_impl::DumpActorHandleImpl::dump_status]
|
||||||
|
async fn dump_info(&self, uid: String) -> DumpResult<DumpInfo>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
#[serde(tag = "dumpVersion")]
|
||||||
|
pub enum Metadata {
|
||||||
|
V1(MetadataV1),
|
||||||
|
V2(MetadataV2),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Metadata {
|
||||||
|
pub fn new_v2(index_db_size: usize, update_db_size: usize) -> Self {
|
||||||
|
let meta = MetadataV2::new(index_db_size, update_db_size);
|
||||||
|
Self::V2(meta)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum DumpStatus {
|
||||||
|
Done,
|
||||||
|
InProgress,
|
||||||
|
Failed,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Clone)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct DumpInfo {
|
||||||
|
pub uid: String,
|
||||||
|
pub status: DumpStatus,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub error: Option<String>,
|
||||||
|
started_at: DateTime<Utc>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
finished_at: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DumpInfo {
|
||||||
|
pub fn new(uid: String, status: DumpStatus) -> Self {
|
||||||
|
Self {
|
||||||
|
uid,
|
||||||
|
status,
|
||||||
|
error: None,
|
||||||
|
started_at: Utc::now(),
|
||||||
|
finished_at: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_error(&mut self, error: String) {
|
||||||
|
self.status = DumpStatus::Failed;
|
||||||
|
self.finished_at = Some(Utc::now());
|
||||||
|
self.error = Some(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn done(&mut self) {
|
||||||
|
self.finished_at = Some(Utc::now());
|
||||||
|
self.status = DumpStatus::Done;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dump_already_in_progress(&self) -> bool {
|
||||||
|
self.status == DumpStatus::InProgress
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_dump(
|
||||||
|
dst_path: impl AsRef<Path>,
|
||||||
|
src_path: impl AsRef<Path>,
|
||||||
|
index_db_size: usize,
|
||||||
|
update_db_size: usize,
|
||||||
|
indexer_opts: &IndexerOpts,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let tmp_src = tempfile::tempdir_in(".")?;
|
||||||
|
let tmp_src_path = tmp_src.path();
|
||||||
|
|
||||||
|
compression::from_tar_gz(&src_path, tmp_src_path)?;
|
||||||
|
|
||||||
|
let meta_path = tmp_src_path.join(META_FILE_NAME);
|
||||||
|
let mut meta_file = File::open(&meta_path)?;
|
||||||
|
let meta: Metadata = serde_json::from_reader(&mut meta_file)?;
|
||||||
|
|
||||||
|
let dst_dir = dst_path
|
||||||
|
.as_ref()
|
||||||
|
.parent()
|
||||||
|
.with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?;
|
||||||
|
|
||||||
|
let tmp_dst = tempfile::tempdir_in(dst_dir)?;
|
||||||
|
|
||||||
|
match meta {
|
||||||
|
Metadata::V1(meta) => {
|
||||||
|
meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)?
|
||||||
|
}
|
||||||
|
Metadata::V2(meta) => meta.load_dump(
|
||||||
|
&tmp_src_path,
|
||||||
|
tmp_dst.path(),
|
||||||
|
index_db_size,
|
||||||
|
update_db_size,
|
||||||
|
indexer_opts,
|
||||||
|
)?,
|
||||||
|
}
|
||||||
|
// Persist and atomically rename the db
|
||||||
|
let persisted_dump = tmp_dst.into_path();
|
||||||
|
if dst_path.as_ref().exists() {
|
||||||
|
warn!("Overwriting database at {}", dst_path.as_ref().display());
|
||||||
|
std::fs::remove_dir_all(&dst_path)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fs::rename(&persisted_dump, &dst_path)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DumpTask<U, P> {
|
||||||
|
path: PathBuf,
|
||||||
|
uuid_resolver: U,
|
||||||
|
update_handle: P,
|
||||||
|
uid: String,
|
||||||
|
update_db_size: usize,
|
||||||
|
index_db_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<U, P> DumpTask<U, P>
|
||||||
|
where
|
||||||
|
U: UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||||
|
P: UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||||
|
{
|
||||||
|
async fn run(self) -> anyhow::Result<()> {
|
||||||
|
info!("Performing dump.");
|
||||||
|
|
||||||
|
create_dir_all(&self.path).await?;
|
||||||
|
|
||||||
|
let path_clone = self.path.clone();
|
||||||
|
let temp_dump_dir =
|
||||||
|
tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??;
|
||||||
|
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||||
|
|
||||||
|
let meta = Metadata::new_v2(self.index_db_size, self.update_db_size);
|
||||||
|
let meta_path = temp_dump_path.join(META_FILE_NAME);
|
||||||
|
let mut meta_file = File::create(&meta_path)?;
|
||||||
|
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||||
|
|
||||||
|
let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?;
|
||||||
|
|
||||||
|
self.update_handle
|
||||||
|
.dump(uuids, temp_dump_path.clone())
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
||||||
|
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
|
||||||
|
compression::to_tar_gz(temp_dump_path, temp_dump_file.path())?;
|
||||||
|
|
||||||
|
let dump_path = self.path.join(self.uid).with_extension("dump");
|
||||||
|
temp_dump_file.persist(&dump_path)?;
|
||||||
|
|
||||||
|
Ok(dump_path)
|
||||||
|
})
|
||||||
|
.await??;
|
||||||
|
|
||||||
|
info!("Created dump in {:?}.", dump_path);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
@ -6,14 +6,15 @@ use async_stream::stream;
|
|||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use heed::CompactionOption;
|
use heed::CompactionOption;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use tokio::sync::mpsc;
|
|
||||||
use tokio::task::spawn_blocking;
|
use tokio::task::spawn_blocking;
|
||||||
|
use tokio::{fs, sync::mpsc};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
use crate::index::{
|
||||||
|
update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings,
|
||||||
|
};
|
||||||
use crate::index_controller::{
|
use crate::index_controller::{
|
||||||
get_arc_ownership_blocking, update_handler::UpdateHandler, Failed, IndexStats, Processed,
|
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
|
||||||
Processing,
|
|
||||||
};
|
};
|
||||||
use crate::option::IndexerOpts;
|
use crate::option::IndexerOpts;
|
||||||
|
|
||||||
@ -30,12 +31,19 @@ pub struct IndexActor<S> {
|
|||||||
impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||||
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> IndexResult<Self> {
|
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> IndexResult<Self> {
|
||||||
let options = IndexerOpts::default();
|
let options = IndexerOpts::default();
|
||||||
let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?;
|
let update_handler = UpdateHandler::new(&options)?;
|
||||||
let update_handler = Arc::new(update_handler);
|
let update_handler = Arc::new(update_handler);
|
||||||
let receiver = Some(receiver);
|
let receiver = Some(receiver);
|
||||||
Ok(Self { receiver, update_handler, store })
|
Ok(Self {
|
||||||
|
receiver,
|
||||||
|
update_handler,
|
||||||
|
store,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
|
||||||
|
/// through the read channel are processed concurrently, the messages sent through the write
|
||||||
|
/// channel are processed one at a time.
|
||||||
pub async fn run(mut self) {
|
pub async fn run(mut self) {
|
||||||
let mut receiver = self
|
let mut receiver = self
|
||||||
.receiver
|
.receiver
|
||||||
@ -119,6 +127,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
Snapshot { uuid, path, ret } => {
|
Snapshot { uuid, path, ret } => {
|
||||||
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
||||||
}
|
}
|
||||||
|
Dump { uuid, path, ret } => {
|
||||||
|
let _ = ret.send(self.handle_dump(uuid, path).await);
|
||||||
|
}
|
||||||
GetStats { uuid, ret } => {
|
GetStats { uuid, ret } => {
|
||||||
let _ = ret.send(self.handle_get_stats(uuid).await);
|
let _ = ret.send(self.handle_get_stats(uuid).await);
|
||||||
}
|
}
|
||||||
@ -140,9 +151,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
) -> IndexResult<IndexMeta> {
|
) -> IndexResult<IndexMeta> {
|
||||||
let index = self.store.create(uuid, primary_key).await?;
|
let index = self.store.create(uuid, primary_key).await?;
|
||||||
let meta = spawn_blocking(move || IndexMeta::new(&index))
|
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
|
||||||
.await
|
|
||||||
.map_err(|e| IndexError::Error(e.into()))??;
|
|
||||||
Ok(meta)
|
Ok(meta)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,9 +168,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
None => self.store.create(uuid, None).await?,
|
None => self.store.create(uuid, None).await?,
|
||||||
};
|
};
|
||||||
|
|
||||||
spawn_blocking(move || update_handler.handle_update(meta, data, index))
|
let result =
|
||||||
.await
|
spawn_blocking(move || update_handler.handle_update(meta, data, index)).await?;
|
||||||
.map_err(|e| IndexError::Error(e.into()))
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_settings(&self, uuid: Uuid) -> IndexResult<Settings<Checked>> {
|
async fn handle_settings(&self, uuid: Uuid) -> IndexResult<Settings<Checked>> {
|
||||||
@ -170,9 +179,8 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
.get(uuid)
|
.get(uuid)
|
||||||
.await?
|
.await?
|
||||||
.ok_or(IndexError::UnexistingIndex)?;
|
.ok_or(IndexError::UnexistingIndex)?;
|
||||||
spawn_blocking(move || index.settings().map_err(IndexError::Error))
|
let result = spawn_blocking(move || index.settings()).await??;
|
||||||
.await
|
Ok(result)
|
||||||
.map_err(|e| IndexError::Error(e.into()))?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_fetch_documents(
|
async fn handle_fetch_documents(
|
||||||
@ -187,13 +195,11 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
.get(uuid)
|
.get(uuid)
|
||||||
.await?
|
.await?
|
||||||
.ok_or(IndexError::UnexistingIndex)?;
|
.ok_or(IndexError::UnexistingIndex)?;
|
||||||
spawn_blocking(move || {
|
let result =
|
||||||
index
|
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
|
||||||
.retrieve_documents(offset, limit, attributes_to_retrieve)
|
.await??;
|
||||||
.map_err(IndexError::Error)
|
|
||||||
})
|
Ok(result)
|
||||||
.await
|
|
||||||
.map_err(|e| IndexError::Error(e.into()))?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_fetch_document(
|
async fn handle_fetch_document(
|
||||||
@ -207,13 +213,12 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
.get(uuid)
|
.get(uuid)
|
||||||
.await?
|
.await?
|
||||||
.ok_or(IndexError::UnexistingIndex)?;
|
.ok_or(IndexError::UnexistingIndex)?;
|
||||||
spawn_blocking(move || {
|
|
||||||
index
|
let result =
|
||||||
.retrieve_document(doc_id, attributes_to_retrieve)
|
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
|
||||||
.map_err(IndexError::Error)
|
.await??;
|
||||||
})
|
|
||||||
.await
|
Ok(result)
|
||||||
.map_err(|e| IndexError::Error(e.into()))?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_delete(&self, uuid: Uuid) -> IndexResult<()> {
|
async fn handle_delete(&self, uuid: Uuid) -> IndexResult<()> {
|
||||||
@ -236,9 +241,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
async fn handle_get_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
|
async fn handle_get_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
|
||||||
match self.store.get(uuid).await? {
|
match self.store.get(uuid).await? {
|
||||||
Some(index) => {
|
Some(index) => {
|
||||||
let meta = spawn_blocking(move || IndexMeta::new(&index))
|
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
|
||||||
.await
|
|
||||||
.map_err(|e| IndexError::Error(e.into()))??;
|
|
||||||
Ok(meta)
|
Ok(meta)
|
||||||
}
|
}
|
||||||
None => Err(IndexError::UnexistingIndex),
|
None => Err(IndexError::UnexistingIndex),
|
||||||
@ -256,7 +259,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
.await?
|
.await?
|
||||||
.ok_or(IndexError::UnexistingIndex)?;
|
.ok_or(IndexError::UnexistingIndex)?;
|
||||||
|
|
||||||
spawn_blocking(move || match index_settings.primary_key {
|
let result = spawn_blocking(move || match index_settings.primary_key {
|
||||||
Some(ref primary_key) => {
|
Some(ref primary_key) => {
|
||||||
let mut txn = index.write_txn()?;
|
let mut txn = index.write_txn()?;
|
||||||
if index.primary_key(&txn)?.is_some() {
|
if index.primary_key(&txn)?.is_some() {
|
||||||
@ -272,23 +275,22 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
Ok(meta)
|
Ok(meta)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.await
|
.await??;
|
||||||
.map_err(|e| IndexError::Error(e.into()))?
|
|
||||||
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
|
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
|
||||||
use tokio::fs::create_dir_all;
|
use tokio::fs::create_dir_all;
|
||||||
|
|
||||||
path.push("indexes");
|
path.push("indexes");
|
||||||
create_dir_all(&path)
|
create_dir_all(&path).await?;
|
||||||
.await
|
|
||||||
.map_err(|e| IndexError::Error(e.into()))?;
|
|
||||||
|
|
||||||
if let Some(index) = self.store.get(uuid).await? {
|
if let Some(index) = self.store.get(uuid).await? {
|
||||||
let mut index_path = path.join(format!("index-{}", uuid));
|
let mut index_path = path.join(format!("index-{}", uuid));
|
||||||
create_dir_all(&index_path)
|
|
||||||
.await
|
create_dir_all(&index_path).await?;
|
||||||
.map_err(|e| IndexError::Error(e.into()))?;
|
|
||||||
index_path.push("data.mdb");
|
index_path.push("data.mdb");
|
||||||
spawn_blocking(move || -> anyhow::Result<()> {
|
spawn_blocking(move || -> anyhow::Result<()> {
|
||||||
// Get write txn to wait for ongoing write transaction before snapshot.
|
// Get write txn to wait for ongoing write transaction before snapshot.
|
||||||
@ -298,14 +300,29 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
.copy_to_path(index_path, CompactionOption::Enabled)?;
|
.copy_to_path(index_path, CompactionOption::Enabled)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.await
|
.await??;
|
||||||
.map_err(|e| IndexError::Error(e.into()))?
|
|
||||||
.map_err(IndexError::Error)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the
|
||||||
|
/// documents and all the settings.
|
||||||
|
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
|
||||||
|
let index = self
|
||||||
|
.store
|
||||||
|
.get(uuid)
|
||||||
|
.await?
|
||||||
|
.ok_or(IndexError::UnexistingIndex)?;
|
||||||
|
|
||||||
|
let path = path.join(format!("indexes/index-{}/", uuid));
|
||||||
|
fs::create_dir_all(&path).await?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || index.dump(path)).await??;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
||||||
let index = self
|
let index = self
|
||||||
.store
|
.store
|
||||||
@ -323,7 +340,6 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
|||||||
fields_distribution: index.fields_distribution(&rtxn)?,
|
fields_distribution: index.fields_distribution(&rtxn)?,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.await
|
.await?
|
||||||
.map_err(|e| IndexError::Error(e.into()))?
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,10 @@ use std::path::{Path, PathBuf};
|
|||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::{index::Checked, index_controller::{IndexSettings, IndexStats, Processing}};
|
use crate::{
|
||||||
|
index::Checked,
|
||||||
|
index_controller::{IndexSettings, IndexStats, Processing},
|
||||||
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
index::{Document, SearchQuery, SearchResult, Settings},
|
index::{Document, SearchQuery, SearchResult, Settings},
|
||||||
index_controller::{Failed, Processed},
|
index_controller::{Failed, Processed},
|
||||||
@ -136,6 +139,13 @@ impl IndexActorHandle for IndexActorHandleImpl {
|
|||||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = IndexMsg::Dump { uuid, path, ret };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
||||||
let (ret, receiver) = oneshot::channel();
|
let (ret, receiver) = oneshot::channel();
|
||||||
let msg = IndexMsg::GetStats { uuid, ret };
|
let msg = IndexMsg::GetStats { uuid, ret };
|
||||||
|
@ -3,7 +3,7 @@ use std::path::PathBuf;
|
|||||||
use tokio::sync::oneshot;
|
use tokio::sync::oneshot;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::index::{Document, SearchQuery, SearchResult, Settings, Checked};
|
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
||||||
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
|
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
|
||||||
|
|
||||||
use super::{IndexMeta, IndexResult, IndexSettings};
|
use super::{IndexMeta, IndexResult, IndexSettings};
|
||||||
@ -60,6 +60,11 @@ pub enum IndexMsg {
|
|||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
ret: oneshot::Sender<IndexResult<()>>,
|
ret: oneshot::Sender<IndexResult<()>>,
|
||||||
},
|
},
|
||||||
|
Dump {
|
||||||
|
uuid: Uuid,
|
||||||
|
path: PathBuf,
|
||||||
|
ret: oneshot::Sender<IndexResult<()>>,
|
||||||
|
},
|
||||||
GetStats {
|
GetStats {
|
||||||
uuid: Uuid,
|
uuid: Uuid,
|
||||||
ret: oneshot::Sender<IndexResult<IndexStats>>,
|
ret: oneshot::Sender<IndexResult<IndexStats>>,
|
||||||
|
@ -15,7 +15,7 @@ use message::IndexMsg;
|
|||||||
use store::{IndexStore, MapIndexStore};
|
use store::{IndexStore, MapIndexStore};
|
||||||
|
|
||||||
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
|
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
|
||||||
use crate::index_controller::{Failed, Processed, Processing, IndexStats};
|
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
|
||||||
|
|
||||||
use super::IndexSettings;
|
use super::IndexSettings;
|
||||||
|
|
||||||
@ -31,7 +31,7 @@ pub type IndexResult<T> = std::result::Result<T, IndexError>;
|
|||||||
pub struct IndexMeta {
|
pub struct IndexMeta {
|
||||||
created_at: DateTime<Utc>,
|
created_at: DateTime<Utc>,
|
||||||
pub updated_at: DateTime<Utc>,
|
pub updated_at: DateTime<Utc>,
|
||||||
primary_key: Option<String>,
|
pub primary_key: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexMeta {
|
impl IndexMeta {
|
||||||
@ -44,24 +44,45 @@ impl IndexMeta {
|
|||||||
let created_at = index.created_at(&txn)?;
|
let created_at = index.created_at(&txn)?;
|
||||||
let updated_at = index.updated_at(&txn)?;
|
let updated_at = index.updated_at(&txn)?;
|
||||||
let primary_key = index.primary_key(&txn)?.map(String::from);
|
let primary_key = index.primary_key(&txn)?.map(String::from);
|
||||||
Ok(Self { created_at, updated_at, primary_key })
|
Ok(Self {
|
||||||
|
created_at,
|
||||||
|
updated_at,
|
||||||
|
primary_key,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum IndexError {
|
pub enum IndexError {
|
||||||
#[error("error with index: {0}")]
|
|
||||||
Error(#[from] anyhow::Error),
|
|
||||||
#[error("index already exists")]
|
#[error("index already exists")]
|
||||||
IndexAlreadyExists,
|
IndexAlreadyExists,
|
||||||
#[error("Index doesn't exists")]
|
#[error("Index doesn't exists")]
|
||||||
UnexistingIndex,
|
UnexistingIndex,
|
||||||
#[error("Heed error: {0}")]
|
|
||||||
HeedError(#[from] heed::Error),
|
|
||||||
#[error("Existing primary key")]
|
#[error("Existing primary key")]
|
||||||
ExistingPrimaryKey,
|
ExistingPrimaryKey,
|
||||||
|
#[error("Internal Index Error: {0}")]
|
||||||
|
Internal(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! internal_error {
|
||||||
|
($($other:path), *) => {
|
||||||
|
$(
|
||||||
|
impl From<$other> for IndexError {
|
||||||
|
fn from(other: $other) -> Self {
|
||||||
|
Self::Internal(other.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal_error!(
|
||||||
|
anyhow::Error,
|
||||||
|
heed::Error,
|
||||||
|
tokio::task::JoinError,
|
||||||
|
std::io::Error
|
||||||
|
);
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
#[cfg_attr(test, automock)]
|
#[cfg_attr(test, automock)]
|
||||||
pub trait IndexActorHandle {
|
pub trait IndexActorHandle {
|
||||||
@ -97,6 +118,7 @@ pub trait IndexActorHandle {
|
|||||||
index_settings: IndexSettings,
|
index_settings: IndexSettings,
|
||||||
) -> IndexResult<IndexMeta>;
|
) -> IndexResult<IndexMeta>;
|
||||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
|
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
|
||||||
|
async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
|
||||||
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats>;
|
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,6 +199,10 @@ mod test {
|
|||||||
self.as_ref().snapshot(uuid, path).await
|
self.as_ref().snapshot(uuid, path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
|
||||||
|
self.as_ref().dump(uuid, path).await
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
||||||
self.as_ref().get_index_stats(uuid).await
|
self.as_ref().get_index_stats(uuid).await
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,6 @@ use std::collections::HashMap;
|
|||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use heed::EnvOpenOptions;
|
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
use tokio::task::spawn_blocking;
|
use tokio::task::spawn_blocking;
|
||||||
@ -48,7 +47,7 @@ impl IndexStore for MapIndexStore {
|
|||||||
|
|
||||||
let index_size = self.index_size;
|
let index_size = self.index_size;
|
||||||
let index = spawn_blocking(move || -> IndexResult<Index> {
|
let index = spawn_blocking(move || -> IndexResult<Index> {
|
||||||
let index = open_index(&path, index_size)?;
|
let index = Index::open(path, index_size)?;
|
||||||
if let Some(primary_key) = primary_key {
|
if let Some(primary_key) = primary_key {
|
||||||
let mut txn = index.write_txn()?;
|
let mut txn = index.write_txn()?;
|
||||||
index.put_primary_key(&mut txn, &primary_key)?;
|
index.put_primary_key(&mut txn, &primary_key)?;
|
||||||
@ -56,8 +55,7 @@ impl IndexStore for MapIndexStore {
|
|||||||
}
|
}
|
||||||
Ok(index)
|
Ok(index)
|
||||||
})
|
})
|
||||||
.await
|
.await??;
|
||||||
.map_err(|e| IndexError::Error(e.into()))??;
|
|
||||||
|
|
||||||
self.index_store.write().await.insert(uuid, index.clone());
|
self.index_store.write().await.insert(uuid, index.clone());
|
||||||
|
|
||||||
@ -77,9 +75,7 @@ impl IndexStore for MapIndexStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let index_size = self.index_size;
|
let index_size = self.index_size;
|
||||||
let index = spawn_blocking(move || open_index(path, index_size))
|
let index = spawn_blocking(move || Index::open(path, index_size)).await??;
|
||||||
.await
|
|
||||||
.map_err(|e| IndexError::Error(e.into()))??;
|
|
||||||
self.index_store.write().await.insert(uuid, index.clone());
|
self.index_store.write().await.insert(uuid, index.clone());
|
||||||
Ok(Some(index))
|
Ok(Some(index))
|
||||||
}
|
}
|
||||||
@ -88,18 +84,8 @@ impl IndexStore for MapIndexStore {
|
|||||||
|
|
||||||
async fn delete(&self, uuid: Uuid) -> IndexResult<Option<Index>> {
|
async fn delete(&self, uuid: Uuid) -> IndexResult<Option<Index>> {
|
||||||
let db_path = self.path.join(format!("index-{}", uuid));
|
let db_path = self.path.join(format!("index-{}", uuid));
|
||||||
fs::remove_dir_all(db_path)
|
fs::remove_dir_all(db_path).await?;
|
||||||
.await
|
|
||||||
.map_err(|e| IndexError::Error(e.into()))?;
|
|
||||||
let index = self.index_store.write().await.remove(&uuid);
|
let index = self.index_store.write().await.remove(&uuid);
|
||||||
Ok(index)
|
Ok(index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn open_index(path: impl AsRef<Path>, size: usize) -> IndexResult<Index> {
|
|
||||||
std::fs::create_dir_all(&path).map_err(|e| IndexError::Error(e.into()))?;
|
|
||||||
let mut options = EnvOpenOptions::new();
|
|
||||||
options.map_size(size);
|
|
||||||
let index = milli::Index::new(options, &path).map_err(IndexError::Error)?;
|
|
||||||
Ok(Index(Arc::new(index)))
|
|
||||||
}
|
|
||||||
|
@ -14,19 +14,23 @@ use tokio::sync::mpsc;
|
|||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
pub use updates::*;
|
use dump_actor::DumpActorHandle;
|
||||||
|
pub use dump_actor::{DumpInfo, DumpStatus};
|
||||||
use index_actor::IndexActorHandle;
|
use index_actor::IndexActorHandle;
|
||||||
use snapshot::{SnapshotService, load_snapshot};
|
use snapshot::{load_snapshot, SnapshotService};
|
||||||
use update_actor::UpdateActorHandle;
|
use update_actor::UpdateActorHandle;
|
||||||
use uuid_resolver::{UuidError, UuidResolverHandle};
|
pub use updates::*;
|
||||||
|
use uuid_resolver::{UuidResolverError, UuidResolverHandle};
|
||||||
|
|
||||||
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
||||||
use crate::option::Opt;
|
use crate::option::Opt;
|
||||||
|
|
||||||
|
use self::dump_actor::load_dump;
|
||||||
|
|
||||||
|
mod dump_actor;
|
||||||
mod index_actor;
|
mod index_actor;
|
||||||
mod snapshot;
|
mod snapshot;
|
||||||
mod update_actor;
|
mod update_actor;
|
||||||
mod update_handler;
|
|
||||||
mod updates;
|
mod updates;
|
||||||
mod uuid_resolver;
|
mod uuid_resolver;
|
||||||
|
|
||||||
@ -60,10 +64,12 @@ pub struct IndexStats {
|
|||||||
pub fields_distribution: FieldsDistribution,
|
pub fields_distribution: FieldsDistribution,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct IndexController {
|
pub struct IndexController {
|
||||||
uuid_resolver: uuid_resolver::UuidResolverHandleImpl,
|
uuid_resolver: uuid_resolver::UuidResolverHandleImpl,
|
||||||
index_handle: index_actor::IndexActorHandleImpl,
|
index_handle: index_actor::IndexActorHandleImpl,
|
||||||
update_handle: update_actor::UpdateActorHandleImpl<Bytes>,
|
update_handle: update_actor::UpdateActorHandleImpl<Bytes>,
|
||||||
|
dump_handle: dump_actor::DumpActorHandleImpl,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
@ -87,6 +93,14 @@ impl IndexController {
|
|||||||
options.ignore_snapshot_if_db_exists,
|
options.ignore_snapshot_if_db_exists,
|
||||||
options.ignore_missing_snapshot,
|
options.ignore_missing_snapshot,
|
||||||
)?;
|
)?;
|
||||||
|
} else if let Some(ref src_path) = options.import_dump {
|
||||||
|
load_dump(
|
||||||
|
&options.db_path,
|
||||||
|
src_path,
|
||||||
|
options.max_mdb_size.get_bytes() as usize,
|
||||||
|
options.max_udb_size.get_bytes() as usize,
|
||||||
|
&options.indexer_options,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::fs::create_dir_all(&path)?;
|
std::fs::create_dir_all(&path)?;
|
||||||
@ -98,6 +112,13 @@ impl IndexController {
|
|||||||
&path,
|
&path,
|
||||||
update_store_size,
|
update_store_size,
|
||||||
)?;
|
)?;
|
||||||
|
let dump_handle = dump_actor::DumpActorHandleImpl::new(
|
||||||
|
&options.dumps_dir,
|
||||||
|
uuid_resolver.clone(),
|
||||||
|
update_handle.clone(),
|
||||||
|
options.max_mdb_size.get_bytes() as usize,
|
||||||
|
options.max_udb_size.get_bytes() as usize,
|
||||||
|
)?;
|
||||||
|
|
||||||
if options.schedule_snapshot {
|
if options.schedule_snapshot {
|
||||||
let snapshot_service = SnapshotService::new(
|
let snapshot_service = SnapshotService::new(
|
||||||
@ -119,6 +140,7 @@ impl IndexController {
|
|||||||
uuid_resolver,
|
uuid_resolver,
|
||||||
index_handle,
|
index_handle,
|
||||||
update_handle,
|
update_handle,
|
||||||
|
dump_handle,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,11 +165,6 @@ impl IndexController {
|
|||||||
// registered and the update_actor that waits for the the payload to be sent to it.
|
// registered and the update_actor that waits for the the payload to be sent to it.
|
||||||
tokio::task::spawn_local(async move {
|
tokio::task::spawn_local(async move {
|
||||||
payload
|
payload
|
||||||
.map(|bytes| {
|
|
||||||
bytes.map_err(|e| {
|
|
||||||
Box::new(e) as Box<dyn std::error::Error + Sync + Send + 'static>
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.for_each(|r| async {
|
.for_each(|r| async {
|
||||||
let _ = sender.send(r).await;
|
let _ = sender.send(r).await;
|
||||||
})
|
})
|
||||||
@ -160,7 +177,7 @@ impl IndexController {
|
|||||||
|
|
||||||
match self.uuid_resolver.get(uid).await {
|
match self.uuid_resolver.get(uid).await {
|
||||||
Ok(uuid) => Ok(perform_update(uuid).await?),
|
Ok(uuid) => Ok(perform_update(uuid).await?),
|
||||||
Err(UuidError::UnexistingIndex(name)) => {
|
Err(UuidResolverError::UnexistingIndex(name)) => {
|
||||||
let uuid = Uuid::new_v4();
|
let uuid = Uuid::new_v4();
|
||||||
let status = perform_update(uuid).await?;
|
let status = perform_update(uuid).await?;
|
||||||
// ignore if index creation fails now, since it may already have been created
|
// ignore if index creation fails now, since it may already have been created
|
||||||
@ -206,7 +223,7 @@ impl IndexController {
|
|||||||
create: bool,
|
create: bool,
|
||||||
) -> anyhow::Result<UpdateStatus> {
|
) -> anyhow::Result<UpdateStatus> {
|
||||||
let perform_udpate = |uuid| async move {
|
let perform_udpate = |uuid| async move {
|
||||||
let meta = UpdateMeta::Settings(settings);
|
let meta = UpdateMeta::Settings(settings.into_unchecked());
|
||||||
// Nothing so send, drop the sender right away, as not to block the update actor.
|
// Nothing so send, drop the sender right away, as not to block the update actor.
|
||||||
let (_, receiver) = mpsc::channel(1);
|
let (_, receiver) = mpsc::channel(1);
|
||||||
self.update_handle.update(meta, receiver, uuid).await
|
self.update_handle.update(meta, receiver, uuid).await
|
||||||
@ -214,7 +231,7 @@ impl IndexController {
|
|||||||
|
|
||||||
match self.uuid_resolver.get(uid).await {
|
match self.uuid_resolver.get(uid).await {
|
||||||
Ok(uuid) => Ok(perform_udpate(uuid).await?),
|
Ok(uuid) => Ok(perform_udpate(uuid).await?),
|
||||||
Err(UuidError::UnexistingIndex(name)) if create => {
|
Err(UuidResolverError::UnexistingIndex(name)) if create => {
|
||||||
let uuid = Uuid::new_v4();
|
let uuid = Uuid::new_v4();
|
||||||
let status = perform_udpate(uuid).await?;
|
let status = perform_udpate(uuid).await?;
|
||||||
// ignore if index creation fails now, since it may already have been created
|
// ignore if index creation fails now, since it may already have been created
|
||||||
@ -393,6 +410,14 @@ impl IndexController {
|
|||||||
indexes,
|
indexes,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn create_dump(&self) -> anyhow::Result<DumpInfo> {
|
||||||
|
Ok(self.dump_handle.create_dump().await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn dump_info(&self, uid: String) -> anyhow::Result<DumpInfo> {
|
||||||
|
Ok(self.dump_handle.dump_info(uid).await?)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
||||||
|
@ -144,7 +144,7 @@ mod test {
|
|||||||
use crate::index_controller::update_actor::{
|
use crate::index_controller::update_actor::{
|
||||||
MockUpdateActorHandle, UpdateActorHandleImpl, UpdateError,
|
MockUpdateActorHandle, UpdateActorHandleImpl, UpdateError,
|
||||||
};
|
};
|
||||||
use crate::index_controller::uuid_resolver::{MockUuidResolverHandle, UuidError};
|
use crate::index_controller::uuid_resolver::{MockUuidResolverHandle, UuidResolverError};
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn test_normal() {
|
async fn test_normal() {
|
||||||
@ -193,7 +193,7 @@ mod test {
|
|||||||
.expect_snapshot()
|
.expect_snapshot()
|
||||||
.times(1)
|
.times(1)
|
||||||
// abitrary error
|
// abitrary error
|
||||||
.returning(|_| Box::pin(err(UuidError::NameAlreadyExist)));
|
.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
|
||||||
|
|
||||||
let update_handle = MockUpdateActorHandle::new();
|
let update_handle = MockUpdateActorHandle::new();
|
||||||
|
|
||||||
@ -248,7 +248,7 @@ mod test {
|
|||||||
// we expect the funtion to be called between 2 and 3 time in the given interval.
|
// we expect the funtion to be called between 2 and 3 time in the given interval.
|
||||||
.times(2..4)
|
.times(2..4)
|
||||||
// abitrary error, to short-circuit the function
|
// abitrary error, to short-circuit the function
|
||||||
.returning(move |_| Box::pin(err(UuidError::NameAlreadyExist)));
|
.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
|
||||||
|
|
||||||
let update_handle = MockUpdateActorHandle::new();
|
let update_handle = MockUpdateActorHandle::new();
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ use tokio::sync::mpsc;
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo};
|
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo};
|
||||||
use crate::index_controller::index_actor::{IndexActorHandle};
|
use crate::index_controller::index_actor::IndexActorHandle;
|
||||||
use crate::index_controller::{UpdateMeta, UpdateStatus};
|
use crate::index_controller::{UpdateMeta, UpdateStatus};
|
||||||
|
|
||||||
pub struct UpdateActor<D, I> {
|
pub struct UpdateActor<D, I> {
|
||||||
@ -42,7 +42,12 @@ where
|
|||||||
let store = UpdateStore::open(options, &path, index_handle.clone())?;
|
let store = UpdateStore::open(options, &path, index_handle.clone())?;
|
||||||
std::fs::create_dir_all(path.join("update_files"))?;
|
std::fs::create_dir_all(path.join("update_files"))?;
|
||||||
assert!(path.exists());
|
assert!(path.exists());
|
||||||
Ok(Self { path, store, inbox, index_handle })
|
Ok(Self {
|
||||||
|
path,
|
||||||
|
store,
|
||||||
|
inbox,
|
||||||
|
index_handle,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn run(mut self) {
|
pub async fn run(mut self) {
|
||||||
@ -72,6 +77,9 @@ where
|
|||||||
Some(Snapshot { uuids, path, ret }) => {
|
Some(Snapshot { uuids, path, ret }) => {
|
||||||
let _ = ret.send(self.handle_snapshot(uuids, path).await);
|
let _ = ret.send(self.handle_snapshot(uuids, path).await);
|
||||||
}
|
}
|
||||||
|
Some(Dump { uuids, path, ret }) => {
|
||||||
|
let _ = ret.send(self.handle_dump(uuids, path).await);
|
||||||
|
}
|
||||||
Some(GetInfo { ret }) => {
|
Some(GetInfo { ret }) => {
|
||||||
let _ = ret.send(self.handle_get_info().await);
|
let _ = ret.send(self.handle_get_info().await);
|
||||||
}
|
}
|
||||||
@ -86,11 +94,8 @@ where
|
|||||||
meta: UpdateMeta,
|
meta: UpdateMeta,
|
||||||
mut payload: mpsc::Receiver<PayloadData<D>>,
|
mut payload: mpsc::Receiver<PayloadData<D>>,
|
||||||
) -> Result<UpdateStatus> {
|
) -> Result<UpdateStatus> {
|
||||||
|
|
||||||
let file_path = match meta {
|
let file_path = match meta {
|
||||||
UpdateMeta::DocumentsAddition { .. }
|
UpdateMeta::DocumentsAddition { .. } | UpdateMeta::DeleteDocuments => {
|
||||||
| UpdateMeta::DeleteDocuments => {
|
|
||||||
|
|
||||||
let update_file_id = uuid::Uuid::new_v4();
|
let update_file_id = uuid::Uuid::new_v4();
|
||||||
let path = self
|
let path = self
|
||||||
.path
|
.path
|
||||||
@ -100,39 +105,26 @@ where
|
|||||||
.write(true)
|
.write(true)
|
||||||
.create(true)
|
.create(true)
|
||||||
.open(&path)
|
.open(&path)
|
||||||
.await
|
.await?;
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?;
|
|
||||||
|
|
||||||
let mut file_len = 0;
|
let mut file_len = 0;
|
||||||
while let Some(bytes) = payload.recv().await {
|
while let Some(bytes) = payload.recv().await {
|
||||||
match bytes {
|
let bytes = bytes?;
|
||||||
Ok(bytes) => {
|
|
||||||
file_len += bytes.as_ref().len();
|
file_len += bytes.as_ref().len();
|
||||||
file.write_all(bytes.as_ref())
|
file.write_all(bytes.as_ref()).await?;
|
||||||
.await
|
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?;
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
return Err(UpdateError::Error(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if file_len != 0 {
|
if file_len != 0 {
|
||||||
file.flush()
|
file.flush().await?;
|
||||||
.await
|
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?;
|
|
||||||
let file = file.into_std().await;
|
let file = file.into_std().await;
|
||||||
Some((file, path))
|
Some((file, update_file_id))
|
||||||
} else {
|
} else {
|
||||||
// empty update, delete the empty file.
|
// empty update, delete the empty file.
|
||||||
fs::remove_file(&path)
|
fs::remove_file(&path).await?;
|
||||||
.await
|
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?;
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => None
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let update_store = self.store.clone();
|
let update_store = self.store.clone();
|
||||||
@ -141,52 +133,45 @@ where
|
|||||||
use std::io::{copy, sink, BufReader, Seek};
|
use std::io::{copy, sink, BufReader, Seek};
|
||||||
|
|
||||||
// If the payload is empty, ignore the check.
|
// If the payload is empty, ignore the check.
|
||||||
let path = if let Some((mut file, path)) = file_path {
|
let update_uuid = if let Some((mut file, uuid)) = file_path {
|
||||||
// set the file back to the beginning
|
// set the file back to the beginning
|
||||||
file.seek(SeekFrom::Start(0)).map_err(|e| UpdateError::Error(Box::new(e)))?;
|
file.seek(SeekFrom::Start(0))?;
|
||||||
// Check that the json payload is valid:
|
// Check that the json payload is valid:
|
||||||
let reader = BufReader::new(&mut file);
|
let reader = BufReader::new(&mut file);
|
||||||
let mut checker = JsonChecker::new(reader);
|
let mut checker = JsonChecker::new(reader);
|
||||||
|
|
||||||
if copy(&mut checker, &mut sink()).is_err() || checker.finish().is_err() {
|
if copy(&mut checker, &mut sink()).is_err() || checker.finish().is_err() {
|
||||||
// The json file is invalid, we use Serde to get a nice error message:
|
// The json file is invalid, we use Serde to get a nice error message:
|
||||||
file.seek(SeekFrom::Start(0))
|
file.seek(SeekFrom::Start(0))?;
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?;
|
let _: serde_json::Value = serde_json::from_reader(file)?;
|
||||||
let _: serde_json::Value = serde_json::from_reader(file)
|
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?;
|
|
||||||
}
|
}
|
||||||
Some(path)
|
Some(uuid)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
// The payload is valid, we can register it to the update store.
|
// The payload is valid, we can register it to the update store.
|
||||||
update_store
|
let status = update_store
|
||||||
.register_update(meta, path, uuid)
|
.register_update(meta, update_uuid, uuid)
|
||||||
.map(UpdateStatus::Enqueued)
|
.map(UpdateStatus::Enqueued)?;
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))
|
Ok(status)
|
||||||
})
|
})
|
||||||
.await
|
.await?
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||||
let update_store = self.store.clone();
|
let update_store = self.store.clone();
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
let result = update_store
|
let result = update_store.list(uuid)?;
|
||||||
.list(uuid)
|
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?;
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
})
|
})
|
||||||
.await
|
.await?
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
|
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
|
||||||
let store = self.store.clone();
|
let store = self.store.clone();
|
||||||
let result = store
|
let result = store
|
||||||
.meta(uuid, id)
|
.meta(uuid, id)?
|
||||||
.map_err(|e| UpdateError::Error(Box::new(e)))?
|
|
||||||
.ok_or(UpdateError::UnexistingUpdate(id))?;
|
.ok_or(UpdateError::UnexistingUpdate(id))?;
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
@ -194,10 +179,7 @@ where
|
|||||||
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
||||||
let store = self.store.clone();
|
let store = self.store.clone();
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || store.delete_all(uuid))
|
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
|
||||||
.await
|
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?
|
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -207,9 +189,20 @@ where
|
|||||||
let update_store = self.store.clone();
|
let update_store = self.store.clone();
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle))
|
tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle))
|
||||||
.await
|
.await??;
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?
|
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?;
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||||
|
let index_handle = self.index_handle.clone();
|
||||||
|
let update_store = self.store.clone();
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
|
||||||
|
update_store.dump(&uuids, path.to_path_buf(), index_handle)?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.await??;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -220,9 +213,7 @@ where
|
|||||||
let info = update_store.get_info()?;
|
let info = update_store.get_info()?;
|
||||||
Ok(info)
|
Ok(info)
|
||||||
})
|
})
|
||||||
.await
|
.await??;
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?
|
|
||||||
.map_err(|e| UpdateError::Error(e.into()))?;
|
|
||||||
|
|
||||||
Ok(info)
|
Ok(info)
|
||||||
}
|
}
|
||||||
|
@ -71,6 +71,13 @@ where
|
|||||||
receiver.await.expect("update actor killed.")
|
receiver.await.expect("update actor killed.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = UpdateMsg::Dump { uuids, path, ret };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
receiver.await.expect("update actor killed.")
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_info(&self) -> Result<UpdateStoreInfo> {
|
async fn get_info(&self) -> Result<UpdateStoreInfo> {
|
||||||
let (ret, receiver) = oneshot::channel();
|
let (ret, receiver) = oneshot::channel();
|
||||||
let msg = UpdateMsg::GetInfo { ret };
|
let msg = UpdateMsg::GetInfo { ret };
|
||||||
|
@ -31,6 +31,11 @@ pub enum UpdateMsg<D> {
|
|||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
ret: oneshot::Sender<Result<()>>,
|
ret: oneshot::Sender<Result<()>>,
|
||||||
},
|
},
|
||||||
|
Dump {
|
||||||
|
uuids: HashSet<Uuid>,
|
||||||
|
path: PathBuf,
|
||||||
|
ret: oneshot::Sender<Result<()>>,
|
||||||
|
},
|
||||||
GetInfo {
|
GetInfo {
|
||||||
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
|
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
|
||||||
},
|
},
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
mod actor;
|
mod actor;
|
||||||
mod handle_impl;
|
mod handle_impl;
|
||||||
mod message;
|
mod message;
|
||||||
mod update_store;
|
pub mod store;
|
||||||
|
|
||||||
use std::{collections::HashSet, path::PathBuf};
|
use std::{collections::HashSet, path::PathBuf};
|
||||||
|
|
||||||
|
use actix_http::error::PayloadError;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
@ -13,25 +14,45 @@ use crate::index_controller::{UpdateMeta, UpdateStatus};
|
|||||||
|
|
||||||
use actor::UpdateActor;
|
use actor::UpdateActor;
|
||||||
use message::UpdateMsg;
|
use message::UpdateMsg;
|
||||||
use update_store::UpdateStore;
|
|
||||||
pub use update_store::UpdateStoreInfo;
|
|
||||||
|
|
||||||
pub use handle_impl::UpdateActorHandleImpl;
|
pub use handle_impl::UpdateActorHandleImpl;
|
||||||
|
pub use store::{UpdateStore, UpdateStoreInfo};
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, UpdateError>;
|
pub type Result<T> = std::result::Result<T, UpdateError>;
|
||||||
type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>;
|
type PayloadData<D> = std::result::Result<D, PayloadError>;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use mockall::automock;
|
use mockall::automock;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum UpdateError {
|
pub enum UpdateError {
|
||||||
#[error("error with update: {0}")]
|
|
||||||
Error(Box<dyn std::error::Error + Sync + Send + 'static>),
|
|
||||||
#[error("Update {0} doesn't exist.")]
|
#[error("Update {0} doesn't exist.")]
|
||||||
UnexistingUpdate(u64),
|
UnexistingUpdate(u64),
|
||||||
|
#[error("Internal error processing update: {0}")]
|
||||||
|
Internal(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! internal_error {
|
||||||
|
($($other:path), *) => {
|
||||||
|
$(
|
||||||
|
impl From<$other> for UpdateError {
|
||||||
|
fn from(other: $other) -> Self {
|
||||||
|
Self::Internal(other.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal_error!(
|
||||||
|
heed::Error,
|
||||||
|
std::io::Error,
|
||||||
|
serde_json::Error,
|
||||||
|
PayloadError,
|
||||||
|
tokio::task::JoinError,
|
||||||
|
anyhow::Error
|
||||||
|
);
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
#[cfg_attr(test, automock(type Data=Vec<u8>;))]
|
#[cfg_attr(test, automock(type Data=Vec<u8>;))]
|
||||||
pub trait UpdateActorHandle {
|
pub trait UpdateActorHandle {
|
||||||
@ -40,7 +61,8 @@ pub trait UpdateActorHandle {
|
|||||||
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
|
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
|
||||||
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
|
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
|
||||||
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
||||||
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
async fn snapshot(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
||||||
|
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
||||||
async fn get_info(&self) -> Result<UpdateStoreInfo>;
|
async fn get_info(&self) -> Result<UpdateStoreInfo>;
|
||||||
async fn update(
|
async fn update(
|
||||||
&self,
|
&self,
|
||||||
|
@ -0,0 +1,86 @@
|
|||||||
|
use std::{borrow::Cow, convert::TryInto, mem::size_of};
|
||||||
|
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub struct NextIdCodec;
|
||||||
|
|
||||||
|
pub enum NextIdKey {
|
||||||
|
Global,
|
||||||
|
Index(Uuid),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesEncode<'a> for NextIdCodec {
|
||||||
|
type EItem = NextIdKey;
|
||||||
|
|
||||||
|
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
match item {
|
||||||
|
NextIdKey::Global => Some(Cow::Borrowed(b"__global__")),
|
||||||
|
NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct PendingKeyCodec;
|
||||||
|
|
||||||
|
impl<'a> BytesEncode<'a> for PendingKeyCodec {
|
||||||
|
type EItem = (u64, Uuid, u64);
|
||||||
|
|
||||||
|
fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||||
|
bytes.extend_from_slice(&global_id.to_be_bytes());
|
||||||
|
bytes.extend_from_slice(uuid.as_bytes());
|
||||||
|
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||||
|
Some(Cow::Owned(bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesDecode<'a> for PendingKeyCodec {
|
||||||
|
type DItem = (u64, Uuid, u64);
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?;
|
||||||
|
let global_id = u64::from_be_bytes(global_id_bytes);
|
||||||
|
|
||||||
|
let uuid_bytes = bytes
|
||||||
|
.get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))?
|
||||||
|
.try_into()
|
||||||
|
.ok()?;
|
||||||
|
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||||
|
|
||||||
|
let update_id_bytes = bytes
|
||||||
|
.get((size_of::<u64>() + size_of::<Uuid>())..)?
|
||||||
|
.try_into()
|
||||||
|
.ok()?;
|
||||||
|
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||||
|
|
||||||
|
Some((global_id, uuid, update_id))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct UpdateKeyCodec;
|
||||||
|
|
||||||
|
impl<'a> BytesEncode<'a> for UpdateKeyCodec {
|
||||||
|
type EItem = (Uuid, u64);
|
||||||
|
|
||||||
|
fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||||
|
bytes.extend_from_slice(uuid.as_bytes());
|
||||||
|
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||||
|
Some(Cow::Owned(bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesDecode<'a> for UpdateKeyCodec {
|
||||||
|
type DItem = (Uuid, u64);
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?;
|
||||||
|
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||||
|
|
||||||
|
let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?;
|
||||||
|
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||||
|
|
||||||
|
Some((uuid, update_id))
|
||||||
|
}
|
||||||
|
}
|
189
meilisearch-http/src/index_controller/update_actor/store/dump.rs
Normal file
189
meilisearch-http/src/index_controller/update_actor/store/dump.rs
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
use std::{
|
||||||
|
collections::HashSet,
|
||||||
|
fs::{create_dir_all, File},
|
||||||
|
io::{BufRead, BufReader, Write},
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
|
use heed::{EnvOpenOptions, RoTxn};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use super::UpdateStore;
|
||||||
|
use super::{codec::UpdateKeyCodec, State};
|
||||||
|
use crate::index_controller::{
|
||||||
|
index_actor::IndexActorHandle, update_actor::store::update_uuid_to_file_path, Enqueued,
|
||||||
|
UpdateStatus,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct UpdateEntry {
|
||||||
|
uuid: Uuid,
|
||||||
|
update: UpdateStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UpdateStore {
|
||||||
|
pub fn dump(
|
||||||
|
&self,
|
||||||
|
uuids: &HashSet<Uuid>,
|
||||||
|
path: PathBuf,
|
||||||
|
handle: impl IndexActorHandle,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let state_lock = self.state.write();
|
||||||
|
state_lock.swap(State::Dumping);
|
||||||
|
|
||||||
|
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||||
|
let txn = self.env.write_txn()?;
|
||||||
|
|
||||||
|
let dump_path = path.join("updates");
|
||||||
|
create_dir_all(&dump_path)?;
|
||||||
|
|
||||||
|
self.dump_updates(&txn, uuids, &dump_path)?;
|
||||||
|
|
||||||
|
let fut = dump_indexes(uuids, handle, &path);
|
||||||
|
tokio::runtime::Handle::current().block_on(fut)?;
|
||||||
|
|
||||||
|
state_lock.swap(State::Idle);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dump_updates(
|
||||||
|
&self,
|
||||||
|
txn: &RoTxn,
|
||||||
|
uuids: &HashSet<Uuid>,
|
||||||
|
path: impl AsRef<Path>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let dump_data_path = path.as_ref().join("data.jsonl");
|
||||||
|
let mut dump_data_file = File::create(dump_data_path)?;
|
||||||
|
|
||||||
|
let update_files_path = path.as_ref().join(super::UPDATE_DIR);
|
||||||
|
create_dir_all(&update_files_path)?;
|
||||||
|
|
||||||
|
self.dump_pending(&txn, uuids, &mut dump_data_file, &path)?;
|
||||||
|
self.dump_completed(&txn, uuids, &mut dump_data_file)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dump_pending(
|
||||||
|
&self,
|
||||||
|
txn: &RoTxn,
|
||||||
|
uuids: &HashSet<Uuid>,
|
||||||
|
mut file: &mut File,
|
||||||
|
dst_path: impl AsRef<Path>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
|
||||||
|
|
||||||
|
for pending in pendings {
|
||||||
|
let ((_, uuid, _), data) = pending?;
|
||||||
|
if uuids.contains(&uuid) {
|
||||||
|
let update = data.decode()?;
|
||||||
|
|
||||||
|
if let Some(ref update_uuid) = update.content {
|
||||||
|
let src = super::update_uuid_to_file_path(&self.path, *update_uuid);
|
||||||
|
let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid);
|
||||||
|
std::fs::copy(src, dst)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let update_json = UpdateEntry {
|
||||||
|
uuid,
|
||||||
|
update: update.into(),
|
||||||
|
};
|
||||||
|
|
||||||
|
serde_json::to_writer(&mut file, &update_json)?;
|
||||||
|
file.write_all(b"\n")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dump_completed(
|
||||||
|
&self,
|
||||||
|
txn: &RoTxn,
|
||||||
|
uuids: &HashSet<Uuid>,
|
||||||
|
mut file: &mut File,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let updates = self
|
||||||
|
.updates
|
||||||
|
.iter(txn)?
|
||||||
|
.remap_key_type::<UpdateKeyCodec>()
|
||||||
|
.lazily_decode_data();
|
||||||
|
|
||||||
|
for update in updates {
|
||||||
|
let ((uuid, _), data) = update?;
|
||||||
|
if uuids.contains(&uuid) {
|
||||||
|
let update = data.decode()?;
|
||||||
|
|
||||||
|
let update_json = UpdateEntry { uuid, update };
|
||||||
|
|
||||||
|
serde_json::to_writer(&mut file, &update_json)?;
|
||||||
|
file.write_all(b"\n")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_dump(
|
||||||
|
src: impl AsRef<Path>,
|
||||||
|
dst: impl AsRef<Path>,
|
||||||
|
db_size: usize,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let dst_update_path = dst.as_ref().join("updates/");
|
||||||
|
create_dir_all(&dst_update_path)?;
|
||||||
|
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(db_size as usize);
|
||||||
|
let (store, _) = UpdateStore::new(options, &dst_update_path)?;
|
||||||
|
|
||||||
|
let src_update_path = src.as_ref().join("updates");
|
||||||
|
let update_data = File::open(&src_update_path.join("data.jsonl"))?;
|
||||||
|
let mut update_data = BufReader::new(update_data);
|
||||||
|
|
||||||
|
std::fs::create_dir_all(dst_update_path.join("update_files/"))?;
|
||||||
|
|
||||||
|
let mut wtxn = store.env.write_txn()?;
|
||||||
|
let mut line = String::new();
|
||||||
|
loop {
|
||||||
|
match update_data.read_line(&mut line) {
|
||||||
|
Ok(0) => break,
|
||||||
|
Ok(_) => {
|
||||||
|
let UpdateEntry { uuid, update } = serde_json::from_str(&line)?;
|
||||||
|
store.register_raw_updates(&mut wtxn, &update, uuid)?;
|
||||||
|
|
||||||
|
// Copy ascociated update path if it exists
|
||||||
|
if let UpdateStatus::Enqueued(Enqueued {
|
||||||
|
content: Some(uuid),
|
||||||
|
..
|
||||||
|
}) = update
|
||||||
|
{
|
||||||
|
let src = update_uuid_to_file_path(&src_update_path, uuid);
|
||||||
|
let dst = update_uuid_to_file_path(&dst_update_path, uuid);
|
||||||
|
std::fs::copy(src, dst)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
|
||||||
|
line.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
wtxn.commit()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn dump_indexes(
|
||||||
|
uuids: &HashSet<Uuid>,
|
||||||
|
handle: impl IndexActorHandle,
|
||||||
|
path: impl AsRef<Path>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
for uuid in uuids {
|
||||||
|
handle.dump(*uuid, path.as_ref().to_owned()).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -1,38 +1,35 @@
|
|||||||
use std::borrow::Cow;
|
mod codec;
|
||||||
use std::collections::{BTreeMap, HashSet};
|
pub mod dump;
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::fs::{copy, create_dir_all, remove_file, File};
|
use std::fs::{copy, create_dir_all, remove_file, File};
|
||||||
use std::mem::size_of;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::{
|
||||||
|
collections::{BTreeMap, HashSet},
|
||||||
|
path::PathBuf,
|
||||||
|
};
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use arc_swap::ArcSwap;
|
use arc_swap::ArcSwap;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use heed::types::{ByteSlice, OwnedType, SerdeJson};
|
use heed::types::{ByteSlice, OwnedType, SerdeJson};
|
||||||
use heed::zerocopy::U64;
|
use heed::zerocopy::U64;
|
||||||
use heed::{BytesDecode, BytesEncode, CompactionOption, Database, Env, EnvOpenOptions};
|
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||||
|
use log::error;
|
||||||
use parking_lot::{Mutex, MutexGuard};
|
use parking_lot::{Mutex, MutexGuard};
|
||||||
use tokio::runtime::Handle;
|
use tokio::runtime::Handle;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use codec::*;
|
||||||
|
|
||||||
use super::UpdateMeta;
|
use super::UpdateMeta;
|
||||||
use crate::index_controller::{updates::*, IndexActorHandle};
|
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle};
|
||||||
use crate::{
|
use crate::{helpers::EnvSizer, index_controller::index_actor::IndexResult};
|
||||||
helpers::EnvSizer,
|
|
||||||
index_controller::index_actor::{IndexResult, CONCURRENT_INDEX_MSG},
|
|
||||||
};
|
|
||||||
|
|
||||||
#[allow(clippy::upper_case_acronyms)]
|
#[allow(clippy::upper_case_acronyms)]
|
||||||
type BEU64 = U64<heed::byteorder::BE>;
|
type BEU64 = U64<heed::byteorder::BE>;
|
||||||
|
|
||||||
struct NextIdCodec;
|
const UPDATE_DIR: &str = "update_files";
|
||||||
|
|
||||||
enum NextIdKey {
|
|
||||||
Global,
|
|
||||||
Index(Uuid),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct UpdateStoreInfo {
|
pub struct UpdateStoreInfo {
|
||||||
/// Size of the update store in bytes.
|
/// Size of the update store in bytes.
|
||||||
@ -47,13 +44,13 @@ pub struct StateLock {
|
|||||||
data: ArcSwap<State>,
|
data: ArcSwap<State>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct StateLockGuard<'a> {
|
pub struct StateLockGuard<'a> {
|
||||||
_lock: MutexGuard<'a, ()>,
|
_lock: MutexGuard<'a, ()>,
|
||||||
state: &'a StateLock,
|
state: &'a StateLock,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StateLockGuard<'_> {
|
impl StateLockGuard<'_> {
|
||||||
fn swap(&self, state: State) -> Arc<State> {
|
pub fn swap(&self, state: State) -> Arc<State> {
|
||||||
self.state.data.swap(Arc::new(state))
|
self.state.data.swap(Arc::new(state))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -65,11 +62,11 @@ impl StateLock {
|
|||||||
Self { lock, data }
|
Self { lock, data }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read(&self) -> Arc<State> {
|
pub fn read(&self) -> Arc<State> {
|
||||||
self.data.load().clone()
|
self.data.load().clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write(&self) -> StateLockGuard {
|
pub fn write(&self) -> StateLockGuard {
|
||||||
let _lock = self.lock.lock();
|
let _lock = self.lock.lock();
|
||||||
let state = &self;
|
let state = &self;
|
||||||
StateLockGuard { _lock, state }
|
StateLockGuard { _lock, state }
|
||||||
@ -81,81 +78,7 @@ pub enum State {
|
|||||||
Idle,
|
Idle,
|
||||||
Processing(Uuid, Processing),
|
Processing(Uuid, Processing),
|
||||||
Snapshoting,
|
Snapshoting,
|
||||||
}
|
Dumping,
|
||||||
|
|
||||||
impl<'a> BytesEncode<'a> for NextIdCodec {
|
|
||||||
type EItem = NextIdKey;
|
|
||||||
|
|
||||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
|
||||||
match item {
|
|
||||||
NextIdKey::Global => Some(Cow::Borrowed(b"__global__")),
|
|
||||||
NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct PendingKeyCodec;
|
|
||||||
|
|
||||||
impl<'a> BytesEncode<'a> for PendingKeyCodec {
|
|
||||||
type EItem = (u64, Uuid, u64);
|
|
||||||
|
|
||||||
fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
|
||||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
|
||||||
bytes.extend_from_slice(&global_id.to_be_bytes());
|
|
||||||
bytes.extend_from_slice(uuid.as_bytes());
|
|
||||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
|
||||||
Some(Cow::Owned(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> BytesDecode<'a> for PendingKeyCodec {
|
|
||||||
type DItem = (u64, Uuid, u64);
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?;
|
|
||||||
let global_id = u64::from_be_bytes(global_id_bytes);
|
|
||||||
|
|
||||||
let uuid_bytes = bytes
|
|
||||||
.get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))?
|
|
||||||
.try_into()
|
|
||||||
.ok()?;
|
|
||||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
|
||||||
|
|
||||||
let update_id_bytes = bytes
|
|
||||||
.get((size_of::<u64>() + size_of::<Uuid>())..)?
|
|
||||||
.try_into()
|
|
||||||
.ok()?;
|
|
||||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
|
||||||
|
|
||||||
Some((global_id, uuid, update_id))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct UpdateKeyCodec;
|
|
||||||
|
|
||||||
impl<'a> BytesEncode<'a> for UpdateKeyCodec {
|
|
||||||
type EItem = (Uuid, u64);
|
|
||||||
|
|
||||||
fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
|
||||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
|
||||||
bytes.extend_from_slice(uuid.as_bytes());
|
|
||||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
|
||||||
Some(Cow::Owned(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> BytesDecode<'a> for UpdateKeyCodec {
|
|
||||||
type DItem = (Uuid, u64);
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?;
|
|
||||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
|
||||||
|
|
||||||
let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?;
|
|
||||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
|
||||||
|
|
||||||
Some((uuid, update_id))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -175,45 +98,60 @@ pub struct UpdateStore {
|
|||||||
/// | 16-bytes | 8-bytes |
|
/// | 16-bytes | 8-bytes |
|
||||||
updates: Database<ByteSlice, SerdeJson<UpdateStatus>>,
|
updates: Database<ByteSlice, SerdeJson<UpdateStatus>>,
|
||||||
/// Indicates the current state of the update store,
|
/// Indicates the current state of the update store,
|
||||||
state: Arc<StateLock>,
|
pub state: Arc<StateLock>,
|
||||||
/// Wake up the loop when a new event occurs.
|
/// Wake up the loop when a new event occurs.
|
||||||
notification_sender: mpsc::Sender<()>,
|
notification_sender: mpsc::Sender<()>,
|
||||||
|
path: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UpdateStore {
|
impl UpdateStore {
|
||||||
pub fn open(
|
fn new(
|
||||||
mut options: EnvOpenOptions,
|
mut options: EnvOpenOptions,
|
||||||
path: impl AsRef<Path>,
|
path: impl AsRef<Path>,
|
||||||
index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static,
|
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
|
||||||
) -> anyhow::Result<Arc<Self>> {
|
|
||||||
options.max_dbs(5);
|
options.max_dbs(5);
|
||||||
|
|
||||||
let env = options.open(path)?;
|
let env = options.open(&path)?;
|
||||||
let pending_queue = env.create_database(Some("pending-queue"))?;
|
let pending_queue = env.create_database(Some("pending-queue"))?;
|
||||||
let next_update_id = env.create_database(Some("next-update-id"))?;
|
let next_update_id = env.create_database(Some("next-update-id"))?;
|
||||||
let updates = env.create_database(Some("updates"))?;
|
let updates = env.create_database(Some("updates"))?;
|
||||||
|
|
||||||
let (notification_sender, mut notification_receiver) = mpsc::channel(10);
|
|
||||||
// Send a first notification to trigger the process.
|
|
||||||
let _ = notification_sender.send(());
|
|
||||||
|
|
||||||
let state = Arc::new(StateLock::from_state(State::Idle));
|
let state = Arc::new(StateLock::from_state(State::Idle));
|
||||||
|
|
||||||
// Init update loop to perform any pending updates at launch.
|
let (notification_sender, notification_receiver) = mpsc::channel(10);
|
||||||
// Since we just launched the update store, and we still own the receiving end of the
|
|
||||||
// channel, this call is guaranteed to succeed.
|
|
||||||
notification_sender
|
|
||||||
.try_send(())
|
|
||||||
.expect("Failed to init update store");
|
|
||||||
|
|
||||||
let update_store = Arc::new(UpdateStore {
|
Ok((
|
||||||
|
Self {
|
||||||
env,
|
env,
|
||||||
pending_queue,
|
pending_queue,
|
||||||
next_update_id,
|
next_update_id,
|
||||||
updates,
|
updates,
|
||||||
state,
|
state,
|
||||||
notification_sender,
|
notification_sender,
|
||||||
});
|
path: path.as_ref().to_owned(),
|
||||||
|
},
|
||||||
|
notification_receiver,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn open(
|
||||||
|
options: EnvOpenOptions,
|
||||||
|
path: impl AsRef<Path>,
|
||||||
|
index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static,
|
||||||
|
) -> anyhow::Result<Arc<Self>> {
|
||||||
|
let (update_store, mut notification_receiver) = Self::new(options, path)?;
|
||||||
|
let update_store = Arc::new(update_store);
|
||||||
|
|
||||||
|
// Send a first notification to trigger the process.
|
||||||
|
let _ = update_store.notification_sender.send(());
|
||||||
|
|
||||||
|
// Init update loop to perform any pending updates at launch.
|
||||||
|
// Since we just launched the update store, and we still own the receiving end of the
|
||||||
|
// channel, this call is guaranteed to succeed.
|
||||||
|
update_store
|
||||||
|
.notification_sender
|
||||||
|
.try_send(())
|
||||||
|
.expect("Failed to init update store");
|
||||||
|
|
||||||
// We need a weak reference so we can take ownership on the arc later when we
|
// We need a weak reference so we can take ownership on the arc later when we
|
||||||
// want to close the index.
|
// want to close the index.
|
||||||
@ -233,7 +171,7 @@ impl UpdateStore {
|
|||||||
match res {
|
match res {
|
||||||
Ok(Some(_)) => (),
|
Ok(Some(_)) => (),
|
||||||
Ok(None) => break,
|
Ok(None) => break,
|
||||||
Err(e) => eprintln!("error while processing update: {}", e),
|
Err(e) => error!("error while processing update: {}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the ownership on the arc has been taken, we need to exit.
|
// the ownership on the arc has been taken, we need to exit.
|
||||||
@ -253,21 +191,31 @@ impl UpdateStore {
|
|||||||
.get(txn, &NextIdKey::Global)?
|
.get(txn, &NextIdKey::Global)?
|
||||||
.map(U64::get)
|
.map(U64::get)
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
self.next_update_id
|
||||||
|
.put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?;
|
||||||
|
|
||||||
|
let update_id = self.next_update_id_raw(txn, index_uuid)?;
|
||||||
|
|
||||||
|
Ok((global_id, update_id))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the next next update id for a given `index_uuid` without
|
||||||
|
/// incrementing the global update id. This is useful for the dumps.
|
||||||
|
fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<u64> {
|
||||||
let update_id = self
|
let update_id = self
|
||||||
.next_update_id
|
.next_update_id
|
||||||
.get(txn, &NextIdKey::Index(index_uuid))?
|
.get(txn, &NextIdKey::Index(index_uuid))?
|
||||||
.map(U64::get)
|
.map(U64::get)
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
|
||||||
self.next_update_id
|
|
||||||
.put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?;
|
|
||||||
self.next_update_id.put(
|
self.next_update_id.put(
|
||||||
txn,
|
txn,
|
||||||
&NextIdKey::Index(index_uuid),
|
&NextIdKey::Index(index_uuid),
|
||||||
&BEU64::new(update_id + 1),
|
&BEU64::new(update_id + 1),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok((global_id, update_id))
|
Ok(update_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Registers the update content in the pending store and the meta
|
/// Registers the update content in the pending store and the meta
|
||||||
@ -275,13 +223,13 @@ impl UpdateStore {
|
|||||||
pub fn register_update(
|
pub fn register_update(
|
||||||
&self,
|
&self,
|
||||||
meta: UpdateMeta,
|
meta: UpdateMeta,
|
||||||
content: Option<impl AsRef<Path>>,
|
content: Option<Uuid>,
|
||||||
index_uuid: Uuid,
|
index_uuid: Uuid,
|
||||||
) -> heed::Result<Enqueued> {
|
) -> heed::Result<Enqueued> {
|
||||||
let mut txn = self.env.write_txn()?;
|
let mut txn = self.env.write_txn()?;
|
||||||
|
|
||||||
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
|
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
|
||||||
let meta = Enqueued::new(meta, update_id, content.map(|p| p.as_ref().to_owned()));
|
let meta = Enqueued::new(meta, update_id, content);
|
||||||
|
|
||||||
self.pending_queue
|
self.pending_queue
|
||||||
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
|
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
|
||||||
@ -294,6 +242,35 @@ impl UpdateStore {
|
|||||||
Ok(meta)
|
Ok(meta)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Push already processed update in the UpdateStore without triggering the notification
|
||||||
|
/// process. This is useful for the dumps.
|
||||||
|
pub fn register_raw_updates(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut heed::RwTxn,
|
||||||
|
update: &UpdateStatus,
|
||||||
|
index_uuid: Uuid,
|
||||||
|
) -> heed::Result<()> {
|
||||||
|
match update {
|
||||||
|
UpdateStatus::Enqueued(enqueued) => {
|
||||||
|
let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
|
||||||
|
self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
|
||||||
|
wtxn,
|
||||||
|
&(global_id, index_uuid, enqueued.id()),
|
||||||
|
&enqueued,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
|
||||||
|
self.updates.remap_key_type::<UpdateKeyCodec>().put(
|
||||||
|
wtxn,
|
||||||
|
&(index_uuid, update.id()),
|
||||||
|
&update,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
||||||
/// This is asynchronous as it let the user process the update with a read-only txn and
|
/// This is asynchronous as it let the user process the update with a read-only txn and
|
||||||
/// only writing the result meta to the processed-meta store *after* it has been processed.
|
/// only writing the result meta to the processed-meta store *after* it has been processed.
|
||||||
@ -314,13 +291,14 @@ impl UpdateStore {
|
|||||||
let processing = pending.processing();
|
let processing = pending.processing();
|
||||||
|
|
||||||
// Acquire the state lock and set the current state to processing.
|
// Acquire the state lock and set the current state to processing.
|
||||||
|
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||||
let state = self.state.write();
|
let state = self.state.write();
|
||||||
state.swap(State::Processing(index_uuid, processing.clone()));
|
state.swap(State::Processing(index_uuid, processing.clone()));
|
||||||
|
|
||||||
let file = match content_path {
|
let file = match content_path {
|
||||||
Some(ref path) => {
|
Some(uuid) => {
|
||||||
let file = File::open(path)
|
let path = update_uuid_to_file_path(&self.path, uuid);
|
||||||
.with_context(|| format!("file at path: {:?}", &content_path))?;
|
let file = File::open(path)?;
|
||||||
Some(file)
|
Some(file)
|
||||||
}
|
}
|
||||||
None => None,
|
None => None,
|
||||||
@ -336,7 +314,8 @@ impl UpdateStore {
|
|||||||
self.pending_queue
|
self.pending_queue
|
||||||
.delete(&mut wtxn, &(global_id, index_uuid, update_id))?;
|
.delete(&mut wtxn, &(global_id, index_uuid, update_id))?;
|
||||||
|
|
||||||
if let Some(path) = content_path {
|
if let Some(uuid) = content_path {
|
||||||
|
let path = update_uuid_to_file_path(&self.path, uuid);
|
||||||
remove_file(&path)?;
|
remove_file(&path)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -436,7 +415,7 @@ impl UpdateStore {
|
|||||||
pub fn delete_all(&self, index_uuid: Uuid) -> anyhow::Result<()> {
|
pub fn delete_all(&self, index_uuid: Uuid) -> anyhow::Result<()> {
|
||||||
let mut txn = self.env.write_txn()?;
|
let mut txn = self.env.write_txn()?;
|
||||||
// Contains all the content file paths that we need to be removed if the deletion was successful.
|
// Contains all the content file paths that we need to be removed if the deletion was successful.
|
||||||
let mut paths_to_remove = Vec::new();
|
let mut uuids_to_remove = Vec::new();
|
||||||
|
|
||||||
let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data();
|
let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data();
|
||||||
|
|
||||||
@ -444,8 +423,8 @@ impl UpdateStore {
|
|||||||
if uuid == index_uuid {
|
if uuid == index_uuid {
|
||||||
pendings.del_current()?;
|
pendings.del_current()?;
|
||||||
let mut pending = pending.decode()?;
|
let mut pending = pending.decode()?;
|
||||||
if let Some(path) = pending.content.take() {
|
if let Some(update_uuid) = pending.content.take() {
|
||||||
paths_to_remove.push(path);
|
uuids_to_remove.push(update_uuid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -465,7 +444,10 @@ impl UpdateStore {
|
|||||||
|
|
||||||
txn.commit()?;
|
txn.commit()?;
|
||||||
|
|
||||||
paths_to_remove.iter().for_each(|path| {
|
uuids_to_remove
|
||||||
|
.iter()
|
||||||
|
.map(|uuid| update_uuid_to_file_path(&self.path, *uuid))
|
||||||
|
.for_each(|path| {
|
||||||
let _ = remove_file(path);
|
let _ = remove_file(path);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -496,7 +478,7 @@ impl UpdateStore {
|
|||||||
// create db snapshot
|
// create db snapshot
|
||||||
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
|
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
|
||||||
|
|
||||||
let update_files_path = update_path.join("update_files");
|
let update_files_path = update_path.join(UPDATE_DIR);
|
||||||
create_dir_all(&update_files_path)?;
|
create_dir_all(&update_files_path)?;
|
||||||
|
|
||||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||||
@ -504,10 +486,13 @@ impl UpdateStore {
|
|||||||
for entry in pendings {
|
for entry in pendings {
|
||||||
let ((_, uuid, _), pending) = entry?;
|
let ((_, uuid, _), pending) = entry?;
|
||||||
if uuids.contains(&uuid) {
|
if uuids.contains(&uuid) {
|
||||||
if let Some(path) = pending.decode()?.content_path() {
|
if let Enqueued {
|
||||||
let name = path.file_name().unwrap();
|
content: Some(uuid),
|
||||||
let to = update_files_path.join(name);
|
..
|
||||||
copy(path, to)?;
|
} = pending.decode()?
|
||||||
|
{
|
||||||
|
let path = update_uuid_to_file_path(&self.path, uuid);
|
||||||
|
copy(path, &update_files_path)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -533,14 +518,17 @@ impl UpdateStore {
|
|||||||
pub fn get_info(&self) -> anyhow::Result<UpdateStoreInfo> {
|
pub fn get_info(&self) -> anyhow::Result<UpdateStoreInfo> {
|
||||||
let mut size = self.env.size();
|
let mut size = self.env.size();
|
||||||
let txn = self.env.read_txn()?;
|
let txn = self.env.read_txn()?;
|
||||||
|
|
||||||
for entry in self.pending_queue.iter(&txn)? {
|
for entry in self.pending_queue.iter(&txn)? {
|
||||||
let (_, pending) = entry?;
|
let (_, pending) = entry?;
|
||||||
if let Some(path) = pending.content_path() {
|
if let Enqueued {
|
||||||
|
content: Some(uuid),
|
||||||
|
..
|
||||||
|
} = pending
|
||||||
|
{
|
||||||
|
let path = update_uuid_to_file_path(&self.path, uuid);
|
||||||
size += File::open(path)?.metadata()?.len();
|
size += File::open(path)?.metadata()?.len();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let processing = match *self.state.read() {
|
let processing = match *self.state.read() {
|
||||||
State::Processing(uuid, _) => Some(uuid),
|
State::Processing(uuid, _) => Some(uuid),
|
||||||
_ => None,
|
_ => None,
|
||||||
@ -550,6 +538,12 @@ impl UpdateStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_uuid_to_file_path(root: impl AsRef<Path>, uuid: Uuid) -> PathBuf {
|
||||||
|
root.as_ref()
|
||||||
|
.join(UPDATE_DIR)
|
||||||
|
.join(format!("update_{}", uuid))
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -595,9 +589,7 @@ mod test {
|
|||||||
let uuid = Uuid::new_v4();
|
let uuid = Uuid::new_v4();
|
||||||
let store_clone = update_store.clone();
|
let store_clone = update_store.clone();
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
store_clone
|
store_clone.register_update(meta, None, uuid).unwrap();
|
||||||
.register_update(meta, Some("here"), uuid)
|
|
||||||
.unwrap();
|
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
@ -1,10 +1,9 @@
|
|||||||
use std::path::{Path, PathBuf};
|
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat};
|
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::index::{Checked, Settings};
|
use crate::index::{Unchecked, Settings};
|
||||||
|
|
||||||
pub type UpdateError = String;
|
pub type UpdateError = String;
|
||||||
|
|
||||||
@ -25,7 +24,7 @@ pub enum UpdateMeta {
|
|||||||
},
|
},
|
||||||
ClearDocuments,
|
ClearDocuments,
|
||||||
DeleteDocuments,
|
DeleteDocuments,
|
||||||
Settings(Settings<Checked>),
|
Settings(Settings<Unchecked>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
@ -34,11 +33,11 @@ pub struct Enqueued {
|
|||||||
pub update_id: u64,
|
pub update_id: u64,
|
||||||
pub meta: UpdateMeta,
|
pub meta: UpdateMeta,
|
||||||
pub enqueued_at: DateTime<Utc>,
|
pub enqueued_at: DateTime<Utc>,
|
||||||
pub content: Option<PathBuf>,
|
pub content: Option<Uuid>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Enqueued {
|
impl Enqueued {
|
||||||
pub fn new(meta: UpdateMeta, update_id: u64, content: Option<PathBuf>) -> Self {
|
pub fn new(meta: UpdateMeta, update_id: u64, content: Option<Uuid>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
enqueued_at: Utc::now(),
|
enqueued_at: Utc::now(),
|
||||||
meta,
|
meta,
|
||||||
@ -68,10 +67,6 @@ impl Enqueued {
|
|||||||
pub fn id(&self) -> u64 {
|
pub fn id(&self) -> u64 {
|
||||||
self.update_id
|
self.update_id
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn content_path(&self) -> Option<&Path> {
|
|
||||||
self.content.as_deref()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
@ -152,7 +147,7 @@ impl Failed {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
#[serde(tag = "status", rename_all = "camelCase")]
|
#[serde(tag = "status", rename_all = "camelCase")]
|
||||||
pub enum UpdateStatus {
|
pub enum UpdateStatus {
|
||||||
Processing(Processing),
|
Processing(Processing),
|
||||||
|
@ -4,7 +4,7 @@ use log::{info, warn};
|
|||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::{Result, UuidError, UuidResolveMsg, UuidStore};
|
use super::{Result, UuidResolveMsg, UuidResolverError, UuidStore};
|
||||||
|
|
||||||
pub struct UuidResolverActor<S> {
|
pub struct UuidResolverActor<S> {
|
||||||
inbox: mpsc::Receiver<UuidResolveMsg>,
|
inbox: mpsc::Receiver<UuidResolveMsg>,
|
||||||
@ -44,6 +44,9 @@ impl<S: UuidStore> UuidResolverActor<S> {
|
|||||||
Some(GetSize { ret }) => {
|
Some(GetSize { ret }) => {
|
||||||
let _ = ret.send(self.handle_get_size().await);
|
let _ = ret.send(self.handle_get_size().await);
|
||||||
}
|
}
|
||||||
|
Some(DumpRequest { path, ret }) => {
|
||||||
|
let _ = ret.send(self.handle_dump(path).await);
|
||||||
|
}
|
||||||
// all senders have been dropped, need to quit.
|
// all senders have been dropped, need to quit.
|
||||||
None => break,
|
None => break,
|
||||||
}
|
}
|
||||||
@ -54,7 +57,7 @@ impl<S: UuidStore> UuidResolverActor<S> {
|
|||||||
|
|
||||||
async fn handle_create(&self, uid: String) -> Result<Uuid> {
|
async fn handle_create(&self, uid: String) -> Result<Uuid> {
|
||||||
if !is_index_uid_valid(&uid) {
|
if !is_index_uid_valid(&uid) {
|
||||||
return Err(UuidError::BadlyFormatted(uid));
|
return Err(UuidResolverError::BadlyFormatted(uid));
|
||||||
}
|
}
|
||||||
self.store.create_uuid(uid, true).await
|
self.store.create_uuid(uid, true).await
|
||||||
}
|
}
|
||||||
@ -63,14 +66,14 @@ impl<S: UuidStore> UuidResolverActor<S> {
|
|||||||
self.store
|
self.store
|
||||||
.get_uuid(uid.clone())
|
.get_uuid(uid.clone())
|
||||||
.await?
|
.await?
|
||||||
.ok_or(UuidError::UnexistingIndex(uid))
|
.ok_or(UuidResolverError::UnexistingIndex(uid))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_delete(&self, uid: String) -> Result<Uuid> {
|
async fn handle_delete(&self, uid: String) -> Result<Uuid> {
|
||||||
self.store
|
self.store
|
||||||
.delete(uid.clone())
|
.delete(uid.clone())
|
||||||
.await?
|
.await?
|
||||||
.ok_or(UuidError::UnexistingIndex(uid))
|
.ok_or(UuidResolverError::UnexistingIndex(uid))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
|
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||||
@ -82,9 +85,13 @@ impl<S: UuidStore> UuidResolverActor<S> {
|
|||||||
self.store.snapshot(path).await
|
self.store.snapshot(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
|
self.store.dump(path).await
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
|
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
|
||||||
if !is_index_uid_valid(&uid) {
|
if !is_index_uid_valid(&uid) {
|
||||||
return Err(UuidError::BadlyFormatted(uid));
|
return Err(UuidResolverError::BadlyFormatted(uid));
|
||||||
}
|
}
|
||||||
self.store.insert(uid, uuid).await?;
|
self.store.insert(uid, uuid).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -85,4 +85,12 @@ impl UuidResolverHandle for UuidResolverHandleImpl {
|
|||||||
.await
|
.await
|
||||||
.expect("Uuid resolver actor has been killed")?)
|
.expect("Uuid resolver actor has been killed")?)
|
||||||
}
|
}
|
||||||
|
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
|
let (ret, receiver) = oneshot::channel();
|
||||||
|
let msg = UuidResolveMsg::DumpRequest { ret, path };
|
||||||
|
let _ = self.sender.send(msg).await;
|
||||||
|
Ok(receiver
|
||||||
|
.await
|
||||||
|
.expect("Uuid resolver actor has been killed")?)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,4 +34,8 @@ pub enum UuidResolveMsg {
|
|||||||
GetSize {
|
GetSize {
|
||||||
ret: oneshot::Sender<Result<u64>>,
|
ret: oneshot::Sender<Result<u64>>,
|
||||||
},
|
},
|
||||||
|
DumpRequest {
|
||||||
|
path: PathBuf,
|
||||||
|
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
mod actor;
|
mod actor;
|
||||||
mod handle_impl;
|
mod handle_impl;
|
||||||
mod message;
|
mod message;
|
||||||
mod store;
|
pub mod store;
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@ -11,16 +11,17 @@ use uuid::Uuid;
|
|||||||
|
|
||||||
use actor::UuidResolverActor;
|
use actor::UuidResolverActor;
|
||||||
use message::UuidResolveMsg;
|
use message::UuidResolveMsg;
|
||||||
use store::{HeedUuidStore, UuidStore};
|
use store::UuidStore;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use mockall::automock;
|
use mockall::automock;
|
||||||
|
|
||||||
pub use handle_impl::UuidResolverHandleImpl;
|
pub use handle_impl::UuidResolverHandleImpl;
|
||||||
|
pub use store::HeedUuidStore;
|
||||||
|
|
||||||
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
|
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, UuidError>;
|
pub type Result<T> = std::result::Result<T, UuidResolverError>;
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
#[cfg_attr(test, automock)]
|
#[cfg_attr(test, automock)]
|
||||||
@ -32,20 +33,37 @@ pub trait UuidResolverHandle {
|
|||||||
async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>;
|
async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>;
|
||||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||||
async fn get_size(&self) -> Result<u64>;
|
async fn get_size(&self) -> Result<u64>;
|
||||||
|
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum UuidError {
|
pub enum UuidResolverError {
|
||||||
#[error("Name already exist.")]
|
#[error("Name already exist.")]
|
||||||
NameAlreadyExist,
|
NameAlreadyExist,
|
||||||
#[error("Index \"{0}\" doesn't exist.")]
|
#[error("Index \"{0}\" doesn't exist.")]
|
||||||
UnexistingIndex(String),
|
UnexistingIndex(String),
|
||||||
#[error("Error performing task: {0}")]
|
|
||||||
TokioTask(#[from] tokio::task::JoinError),
|
|
||||||
#[error("Database error: {0}")]
|
|
||||||
Heed(#[from] heed::Error),
|
|
||||||
#[error("Uuid error: {0}")]
|
|
||||||
Uuid(#[from] uuid::Error),
|
|
||||||
#[error("Badly formatted index uid: {0}")]
|
#[error("Badly formatted index uid: {0}")]
|
||||||
BadlyFormatted(String),
|
BadlyFormatted(String),
|
||||||
|
#[error("Internal error resolving index uid: {0}")]
|
||||||
|
Internal(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! internal_error {
|
||||||
|
($($other:path), *) => {
|
||||||
|
$(
|
||||||
|
impl From<$other> for UuidResolverError {
|
||||||
|
fn from(other: $other) -> Self {
|
||||||
|
Self::Internal(other.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal_error!(
|
||||||
|
heed::Error,
|
||||||
|
uuid::Error,
|
||||||
|
std::io::Error,
|
||||||
|
tokio::task::JoinError,
|
||||||
|
serde_json::Error
|
||||||
|
);
|
||||||
|
@ -1,18 +1,26 @@
|
|||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::create_dir_all;
|
use std::fs::{create_dir_all, File};
|
||||||
|
use std::io::{BufRead, BufReader, Write};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use heed::{
|
use heed::types::{ByteSlice, Str};
|
||||||
types::{ByteSlice, Str},
|
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||||
CompactionOption, Database, Env, EnvOpenOptions,
|
use serde::{Deserialize, Serialize};
|
||||||
};
|
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::{Result, UuidError, UUID_STORE_SIZE};
|
use super::{Result, UuidResolverError, UUID_STORE_SIZE};
|
||||||
use crate::helpers::EnvSizer;
|
use crate::helpers::EnvSizer;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct DumpEntry {
|
||||||
|
uuid: Uuid,
|
||||||
|
uid: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
const UUIDS_DB_PATH: &str = "index_uuids";
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
pub trait UuidStore {
|
pub trait UuidStore: Sized {
|
||||||
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
||||||
// the uuid otherwise.
|
// the uuid otherwise.
|
||||||
async fn create_uuid(&self, uid: String, err: bool) -> Result<Uuid>;
|
async fn create_uuid(&self, uid: String, err: bool) -> Result<Uuid>;
|
||||||
@ -22,8 +30,10 @@ pub trait UuidStore {
|
|||||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
||||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||||
async fn get_size(&self) -> Result<u64>;
|
async fn get_size(&self) -> Result<u64>;
|
||||||
|
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct HeedUuidStore {
|
pub struct HeedUuidStore {
|
||||||
env: Env,
|
env: Env,
|
||||||
db: Database<Str, ByteSlice>,
|
db: Database<Str, ByteSlice>,
|
||||||
@ -31,7 +41,7 @@ pub struct HeedUuidStore {
|
|||||||
|
|
||||||
impl HeedUuidStore {
|
impl HeedUuidStore {
|
||||||
pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
|
pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
|
||||||
let path = path.as_ref().join("index_uuids");
|
let path = path.as_ref().join(UUIDS_DB_PATH);
|
||||||
create_dir_all(&path)?;
|
create_dir_all(&path)?;
|
||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(UUID_STORE_SIZE); // 1GB
|
options.map_size(UUID_STORE_SIZE); // 1GB
|
||||||
@ -39,19 +49,15 @@ impl HeedUuidStore {
|
|||||||
let db = env.create_database(None)?;
|
let db = env.create_database(None)?;
|
||||||
Ok(Self { env, db })
|
Ok(Self { env, db })
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
pub fn create_uuid(&self, name: String, err: bool) -> Result<Uuid> {
|
||||||
impl UuidStore for HeedUuidStore {
|
|
||||||
async fn create_uuid(&self, name: String, err: bool) -> Result<Uuid> {
|
|
||||||
let env = self.env.clone();
|
let env = self.env.clone();
|
||||||
let db = self.db;
|
let db = self.db;
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
let mut txn = env.write_txn()?;
|
let mut txn = env.write_txn()?;
|
||||||
match db.get(&txn, &name)? {
|
match db.get(&txn, &name)? {
|
||||||
Some(uuid) => {
|
Some(uuid) => {
|
||||||
if err {
|
if err {
|
||||||
Err(UuidError::NameAlreadyExist)
|
Err(UuidResolverError::NameAlreadyExist)
|
||||||
} else {
|
} else {
|
||||||
let uuid = Uuid::from_slice(uuid)?;
|
let uuid = Uuid::from_slice(uuid)?;
|
||||||
Ok(uuid)
|
Ok(uuid)
|
||||||
@ -64,14 +70,10 @@ impl UuidStore for HeedUuidStore {
|
|||||||
Ok(uuid)
|
Ok(uuid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
}
|
||||||
|
pub fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
||||||
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
|
||||||
let env = self.env.clone();
|
let env = self.env.clone();
|
||||||
let db = self.db;
|
let db = self.db;
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
let txn = env.read_txn()?;
|
let txn = env.read_txn()?;
|
||||||
match db.get(&txn, &name)? {
|
match db.get(&txn, &name)? {
|
||||||
Some(uuid) => {
|
Some(uuid) => {
|
||||||
@ -80,14 +82,11 @@ impl UuidStore for HeedUuidStore {
|
|||||||
}
|
}
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||||
let env = self.env.clone();
|
let env = self.env.clone();
|
||||||
let db = self.db;
|
let db = self.db;
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
let mut txn = env.write_txn()?;
|
let mut txn = env.write_txn()?;
|
||||||
match db.get(&txn, &uid)? {
|
match db.get(&txn, &uid)? {
|
||||||
Some(uuid) => {
|
Some(uuid) => {
|
||||||
@ -98,14 +97,11 @@ impl UuidStore for HeedUuidStore {
|
|||||||
}
|
}
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||||
let env = self.env.clone();
|
let env = self.env.clone();
|
||||||
let db = self.db;
|
let db = self.db;
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
let txn = env.read_txn()?;
|
let txn = env.read_txn()?;
|
||||||
let mut entries = Vec::new();
|
let mut entries = Vec::new();
|
||||||
for entry in db.iter(&txn)? {
|
for entry in db.iter(&txn)? {
|
||||||
@ -114,26 +110,20 @@ impl UuidStore for HeedUuidStore {
|
|||||||
entries.push((name.to_owned(), uuid))
|
entries.push((name.to_owned(), uuid))
|
||||||
}
|
}
|
||||||
Ok(entries)
|
Ok(entries)
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||||
let env = self.env.clone();
|
let env = self.env.clone();
|
||||||
let db = self.db;
|
let db = self.db;
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
let mut txn = env.write_txn()?;
|
let mut txn = env.write_txn()?;
|
||||||
db.put(&mut txn, &name, uuid.as_bytes())?;
|
db.put(&mut txn, &name, uuid.as_bytes())?;
|
||||||
txn.commit()?;
|
txn.commit()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
|
||||||
.await?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
let env = self.env.clone();
|
let env = self.env.clone();
|
||||||
let db = self.db;
|
let db = self.db;
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
// Write transaction to acquire a lock on the database.
|
// Write transaction to acquire a lock on the database.
|
||||||
let txn = env.write_txn()?;
|
let txn = env.write_txn()?;
|
||||||
let mut entries = HashSet::new();
|
let mut entries = HashSet::new();
|
||||||
@ -145,17 +135,112 @@ impl UuidStore for HeedUuidStore {
|
|||||||
|
|
||||||
// only perform snapshot if there are indexes
|
// only perform snapshot if there are indexes
|
||||||
if !entries.is_empty() {
|
if !entries.is_empty() {
|
||||||
path.push("index_uuids");
|
path.push(UUIDS_DB_PATH);
|
||||||
create_dir_all(&path).unwrap();
|
create_dir_all(&path).unwrap();
|
||||||
path.push("data.mdb");
|
path.push("data.mdb");
|
||||||
env.copy_to_path(path, CompactionOption::Enabled)?;
|
env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||||
}
|
}
|
||||||
Ok(entries)
|
Ok(entries)
|
||||||
})
|
}
|
||||||
.await?
|
|
||||||
|
pub fn get_size(&self) -> Result<u64> {
|
||||||
|
Ok(self.env.size())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
|
let dump_path = path.join(UUIDS_DB_PATH);
|
||||||
|
create_dir_all(&dump_path)?;
|
||||||
|
let dump_file_path = dump_path.join("data.jsonl");
|
||||||
|
let mut dump_file = File::create(&dump_file_path)?;
|
||||||
|
let mut uuids = HashSet::new();
|
||||||
|
|
||||||
|
let txn = self.env.read_txn()?;
|
||||||
|
for entry in self.db.iter(&txn)? {
|
||||||
|
let (uid, uuid) = entry?;
|
||||||
|
let uid = uid.to_string();
|
||||||
|
let uuid = Uuid::from_slice(uuid)?;
|
||||||
|
|
||||||
|
let entry = DumpEntry { uuid, uid };
|
||||||
|
serde_json::to_writer(&mut dump_file, &entry)?;
|
||||||
|
dump_file.write_all(b"\n").unwrap();
|
||||||
|
|
||||||
|
uuids.insert(uuid);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(uuids)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||||
|
let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH);
|
||||||
|
std::fs::create_dir_all(&uuid_resolver_path)?;
|
||||||
|
|
||||||
|
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
|
||||||
|
let indexes = File::open(&src_indexes)?;
|
||||||
|
let mut indexes = BufReader::new(indexes);
|
||||||
|
let mut line = String::new();
|
||||||
|
|
||||||
|
let db = Self::new(dst)?;
|
||||||
|
let mut txn = db.env.write_txn()?;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match indexes.read_line(&mut line) {
|
||||||
|
Ok(0) => break,
|
||||||
|
Ok(_) => {
|
||||||
|
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
|
||||||
|
println!("importing {} {}", uid, uuid);
|
||||||
|
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
|
||||||
|
}
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
|
||||||
|
line.clear();
|
||||||
|
}
|
||||||
|
txn.commit()?;
|
||||||
|
|
||||||
|
db.env.prepare_for_closing().wait();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl UuidStore for HeedUuidStore {
|
||||||
|
async fn create_uuid(&self, name: String, err: bool) -> Result<Uuid> {
|
||||||
|
let this = self.clone();
|
||||||
|
tokio::task::spawn_blocking(move || this.create_uuid(name, err)).await?
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
||||||
|
let this = self.clone();
|
||||||
|
tokio::task::spawn_blocking(move || this.get_uuid(name)).await?
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||||
|
let this = self.clone();
|
||||||
|
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||||
|
let this = self.clone();
|
||||||
|
tokio::task::spawn_blocking(move || this.list()).await?
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||||
|
let this = self.clone();
|
||||||
|
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
|
let this = self.clone();
|
||||||
|
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_size(&self) -> Result<u64> {
|
async fn get_size(&self) -> Result<u64> {
|
||||||
Ok(self.env.size())
|
self.get_size()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||||
|
let this = self.clone();
|
||||||
|
tokio::task::spawn_blocking(move || this.dump(path)).await?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,8 +46,8 @@ macro_rules! create_app {
|
|||||||
.configure(synonym::services)
|
.configure(synonym::services)
|
||||||
.configure(health::services)
|
.configure(health::services)
|
||||||
.configure(stats::services)
|
.configure(stats::services)
|
||||||
.configure(key::services);
|
.configure(key::services)
|
||||||
//.configure(routes::dump::services);
|
.configure(dump::services);
|
||||||
#[cfg(feature = "mini-dashboard")]
|
#[cfg(feature = "mini-dashboard")]
|
||||||
let app = if $enable_frontend {
|
let app = if $enable_frontend {
|
||||||
let generated = dashboard::generate();
|
let generated = dashboard::generate();
|
||||||
@ -66,7 +66,7 @@ macro_rules! create_app {
|
|||||||
.allowed_headers(vec!["content-type", "x-meili-api-key"])
|
.allowed_headers(vec!["content-type", "x-meili-api-key"])
|
||||||
.allow_any_origin()
|
.allow_any_origin()
|
||||||
.allow_any_method()
|
.allow_any_method()
|
||||||
.max_age(86_400) // 24h
|
.max_age(86_400), // 24h
|
||||||
)
|
)
|
||||||
.wrap(middleware::Logger::default())
|
.wrap(middleware::Logger::default())
|
||||||
.wrap(middleware::Compress::default())
|
.wrap(middleware::Compress::default())
|
||||||
|
@ -202,10 +202,6 @@ pub struct Opt {
|
|||||||
#[structopt(long, conflicts_with = "import-snapshot")]
|
#[structopt(long, conflicts_with = "import-snapshot")]
|
||||||
pub import_dump: Option<PathBuf>,
|
pub import_dump: Option<PathBuf>,
|
||||||
|
|
||||||
/// The batch size used in the importation process, the bigger it is the faster the dump is created.
|
|
||||||
#[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")]
|
|
||||||
pub dump_batch_size: usize,
|
|
||||||
|
|
||||||
#[structopt(flatten)]
|
#[structopt(flatten)]
|
||||||
pub indexer_options: IndexerOpts,
|
pub indexer_options: IndexerOpts,
|
||||||
}
|
}
|
||||||
|
@ -1,25 +1,20 @@
|
|||||||
use std::fs::File;
|
use actix_web::HttpResponse;
|
||||||
use std::path::Path;
|
use actix_web::{get, post, web};
|
||||||
|
|
||||||
use actix_web::{get, post};
|
|
||||||
use actix_web::{HttpResponse, web};
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::dump::{DumpInfo, DumpStatus, compressed_dumps_dir, init_dump_process};
|
use crate::error::ResponseError;
|
||||||
use crate::Data;
|
|
||||||
use crate::error::{Error, ResponseError};
|
|
||||||
use crate::helpers::Authentication;
|
use crate::helpers::Authentication;
|
||||||
|
use crate::Data;
|
||||||
|
|
||||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(trigger_dump)
|
cfg.service(create_dump).service(get_dump_status);
|
||||||
.service(get_dump_status);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[post("/dumps", wrap = "Authentication::Private")]
|
#[post("/dumps", wrap = "Authentication::Private")]
|
||||||
async fn trigger_dump(
|
async fn create_dump(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
|
||||||
data: web::Data<Data>,
|
let res = data.create_dump().await?;
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
|
||||||
todo!()
|
Ok(HttpResponse::Accepted().json(res))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
@ -38,5 +33,7 @@ async fn get_dump_status(
|
|||||||
data: web::Data<Data>,
|
data: web::Data<Data>,
|
||||||
path: web::Path<DumpParam>,
|
path: web::Path<DumpParam>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
todo!()
|
let res = data.dump_status(path.dump_uid.clone()).await?;
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().json(res))
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use actix_web::{delete, get, post, put};
|
use actix_web::{delete, get, post, put};
|
||||||
use actix_web::{web, HttpResponse};
|
use actix_web::{web, HttpResponse};
|
||||||
use serde::Deserialize;
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::error::ResponseError;
|
use crate::error::ResponseError;
|
||||||
use crate::helpers::Authentication;
|
use crate::helpers::Authentication;
|
||||||
@ -68,6 +69,16 @@ struct UpdateIndexRequest {
|
|||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct UpdateIndexResponse {
|
||||||
|
name: String,
|
||||||
|
uid: String,
|
||||||
|
created_at: DateTime<Utc>,
|
||||||
|
updated_at: DateTime<Utc>,
|
||||||
|
primary_key: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
#[put("/indexes/{index_uid}", wrap = "Authentication::Private")]
|
#[put("/indexes/{index_uid}", wrap = "Authentication::Private")]
|
||||||
async fn update_index(
|
async fn update_index(
|
||||||
data: web::Data<Data>,
|
data: web::Data<Data>,
|
||||||
|
@ -2,6 +2,7 @@ use actix_web::{get, HttpResponse};
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
pub mod document;
|
pub mod document;
|
||||||
|
pub mod dump;
|
||||||
pub mod health;
|
pub mod health;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
pub mod key;
|
pub mod key;
|
||||||
@ -9,7 +10,6 @@ pub mod search;
|
|||||||
pub mod settings;
|
pub mod settings;
|
||||||
pub mod stats;
|
pub mod stats;
|
||||||
pub mod synonym;
|
pub mod synonym;
|
||||||
//pub mod dump;
|
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
pub struct IndexParam {
|
pub struct IndexParam {
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
use actix_web::{delete, get, post, web, HttpResponse};
|
use actix_web::{delete, get, post, web, HttpResponse};
|
||||||
|
|
||||||
use crate::{error::ResponseError, index::Unchecked};
|
|
||||||
use crate::helpers::Authentication;
|
use crate::helpers::Authentication;
|
||||||
use crate::index::Settings;
|
use crate::index::Settings;
|
||||||
use crate::Data;
|
use crate::Data;
|
||||||
|
use crate::{error::ResponseError, index::Unchecked};
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! make_setting_route {
|
macro_rules! make_setting_route {
|
||||||
|
@ -68,7 +68,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
Opt {
|
Opt {
|
||||||
db_path: dir.as_ref().join("db"),
|
db_path: dir.as_ref().join("db"),
|
||||||
dumps_dir: dir.as_ref().join("dump"),
|
dumps_dir: dir.as_ref().join("dump"),
|
||||||
dump_batch_size: 16,
|
|
||||||
http_addr: "127.0.0.1:7700".to_owned(),
|
http_addr: "127.0.0.1:7700".to_owned(),
|
||||||
master_key: None,
|
master_key: None,
|
||||||
env: "development".to_owned(),
|
env: "development".to_owned(),
|
||||||
|
@ -73,7 +73,7 @@ async fn reset_all_settings() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = server.index("test");
|
let index = server.index("test");
|
||||||
index
|
index
|
||||||
.update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"], "stopWords": ["the"] }))
|
.update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"], "stopWords": ["the"], "attributesForFaceting": { "toto": "string" } }))
|
||||||
.await;
|
.await;
|
||||||
index.wait_update_id(0).await;
|
index.wait_update_id(0).await;
|
||||||
let (response, code) = index.settings().await;
|
let (response, code) = index.settings().await;
|
||||||
@ -81,6 +81,7 @@ async fn reset_all_settings() {
|
|||||||
assert_eq!(response["displayedAttributes"], json!(["foo"]));
|
assert_eq!(response["displayedAttributes"], json!(["foo"]));
|
||||||
assert_eq!(response["searchableAttributes"], json!(["bar"]));
|
assert_eq!(response["searchableAttributes"], json!(["bar"]));
|
||||||
assert_eq!(response["stopWords"], json!(["the"]));
|
assert_eq!(response["stopWords"], json!(["the"]));
|
||||||
|
assert_eq!(response["attributesForFaceting"], json!({"toto": "string"}));
|
||||||
|
|
||||||
index.delete_settings().await;
|
index.delete_settings().await;
|
||||||
index.wait_update_id(1).await;
|
index.wait_update_id(1).await;
|
||||||
@ -90,6 +91,7 @@ async fn reset_all_settings() {
|
|||||||
assert_eq!(response["displayedAttributes"], json!(["*"]));
|
assert_eq!(response["displayedAttributes"], json!(["*"]));
|
||||||
assert_eq!(response["searchableAttributes"], json!(["*"]));
|
assert_eq!(response["searchableAttributes"], json!(["*"]));
|
||||||
assert_eq!(response["stopWords"], json!([]));
|
assert_eq!(response["stopWords"], json!([]));
|
||||||
|
assert_eq!(response["attributesForFaceting"], json!({}));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user