Marin Postma 2021-12-02 16:03:26 +01:00
parent c9f3726447
commit a30e02c18c
88 changed files with 5553 additions and 4496 deletions

View file

@ -9,18 +9,16 @@ use log::{error, trace};
use tokio::sync::{mpsc, oneshot, RwLock};
use super::error::{DumpActorError, Result};
use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask};
use crate::index_controller::index_resolver::index_store::IndexStore;
use crate::index_controller::index_resolver::uuid_store::UuidStore;
use crate::index_controller::index_resolver::IndexResolver;
use crate::index_controller::updates::UpdateSender;
use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus};
use crate::tasks::TaskStore;
use crate::update_file_store::UpdateFileStore;
pub const CONCURRENT_DUMP_MSG: usize = 10;
pub struct DumpActor<U, I> {
pub struct DumpActor {
inbox: Option<mpsc::Receiver<DumpMsg>>,
index_resolver: Arc<IndexResolver<U, I>>,
update: UpdateSender,
update_file_store: UpdateFileStore,
task_store: TaskStore,
dump_path: PathBuf,
analytics_path: PathBuf,
lock: Arc<Mutex<()>>,
@ -34,15 +32,11 @@ fn generate_uid() -> String {
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
}
impl<U, I> DumpActor<U, I>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
impl DumpActor {
pub fn new(
inbox: mpsc::Receiver<DumpMsg>,
index_resolver: Arc<IndexResolver<U, I>>,
update: UpdateSender,
update_file_store: UpdateFileStore,
task_store: TaskStore,
dump_path: impl AsRef<Path>,
analytics_path: impl AsRef<Path>,
index_db_size: usize,
@ -52,8 +46,8 @@ where
let lock = Arc::new(Mutex::new(()));
Self {
inbox: Some(inbox),
index_resolver,
update,
task_store,
update_file_store,
dump_path: dump_path.as_ref().into(),
analytics_path: analytics_path.as_ref().into(),
dump_infos,
@ -120,11 +114,11 @@ where
ret.send(Ok(info)).expect("Dump actor is dead");
let task = DumpTask {
let task = DumpJob {
dump_path: self.dump_path.clone(),
db_path: self.analytics_path.clone(),
index_resolver: self.index_resolver.clone(),
update_sender: self.update.clone(),
update_file_store: self.update_file_store.clone(),
task_store: self.task_store.clone(),
uid: uid.clone(),
update_db_size: self.update_db_size,
index_db_size: self.index_db_size,

View file

@ -0,0 +1,16 @@
pub mod v2;
pub mod v3;
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}

View file

@ -0,0 +1,147 @@
use anyhow::bail;
use chrono::{DateTime, Utc};
use meilisearch_error::Code;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::index::{Settings, Unchecked};
#[derive(Serialize, Deserialize)]
pub struct UpdateEntry {
pub uuid: Uuid,
pub update: UpdateStatus,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateFormat {
Json,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct DocumentAdditionResult {
pub nb_documents: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments {
ids: Vec<String>,
},
Settings(Settings<Unchecked>),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: UpdateMeta,
pub enqueued_at: DateTime<Utc>,
pub content: Option<Uuid>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: UpdateResult,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
pub started_processing_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Aborted {
#[serde(flatten)]
pub from: Enqueued,
pub aborted_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub error: ResponseError,
pub failed_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Aborted(Aborted),
Failed(Failed),
}
type StatusCode = ();
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ResponseError {
#[serde(skip)]
pub code: StatusCode,
pub message: String,
pub error_code: String,
pub error_type: String,
pub error_link: String,
}
pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> {
let code = match s {
"index_creation_failed" => Code::CreateIndex,
"index_already_exists" => Code::IndexAlreadyExists,
"index_not_found" => Code::IndexNotFound,
"invalid_index_uid" => Code::InvalidIndexUid,
"invalid_state" => Code::InvalidState,
"missing_primary_key" => Code::MissingPrimaryKey,
"primary_key_already_present" => Code::PrimaryKeyAlreadyPresent,
"invalid_request" => Code::InvalidRankingRule,
"max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded,
"missing_document_id" => Code::MissingDocumentId,
"invalid_facet" => Code::Filter,
"invalid_filter" => Code::Filter,
"invalid_sort" => Code::Sort,
"bad_parameter" => Code::BadParameter,
"bad_request" => Code::BadRequest,
"document_not_found" => Code::DocumentNotFound,
"internal" => Code::Internal,
"invalid_geo_field" => Code::InvalidGeoField,
"invalid_token" => Code::InvalidToken,
"missing_authorization_header" => Code::MissingAuthorizationHeader,
"payload_too_large" => Code::PayloadTooLarge,
"unretrievable_document" => Code::RetrieveDocument,
"search_error" => Code::SearchDocuments,
"unsupported_media_type" => Code::UnsupportedMediaType,
"dump_already_in_progress" => Code::DumpAlreadyInProgress,
"dump_process_failed" => Code::DumpProcessFailed,
_ => bail!("unknow error code."),
};
Ok(code)
}

View file

@ -0,0 +1,198 @@
use chrono::{DateTime, Utc};
use meilisearch_error::{Code, ResponseError};
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::index::{Settings, Unchecked};
use crate::index_resolver::IndexUid;
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult};
use super::v2;
#[derive(Serialize, Deserialize)]
pub struct DumpEntry {
pub uuid: Uuid,
pub uid: String,
}
#[derive(Serialize, Deserialize)]
pub struct UpdateEntry {
pub uuid: Uuid,
pub update: UpdateStatus,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Failed(Failed),
}
impl From<v2::UpdateResult> for TaskResult {
fn from(other: v2::UpdateResult) -> Self {
match other {
v2::UpdateResult::DocumentsAddition(result) => TaskResult::DocumentAddition {
indexed_documents: result.nb_documents as u64,
},
v2::UpdateResult::DocumentDeletion { deleted } => TaskResult::DocumentDeletion {
deleted_documents: deleted,
},
v2::UpdateResult::Other => TaskResult::Other,
}
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Update {
DeleteDocuments(Vec<String>),
DocumentAddition {
primary_key: Option<String>,
method: IndexDocumentsMethod,
content_uuid: Uuid,
},
Settings(Settings<Unchecked>),
ClearDocuments,
}
impl From<Update> for TaskContent {
fn from(other: Update) -> Self {
match other {
Update::DeleteDocuments(ids) => {
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids))
}
Update::DocumentAddition {
primary_key,
method,
..
} => TaskContent::DocumentAddition {
content_uuid: Uuid::default(),
merge_strategy: method,
primary_key,
// document count is unknown for legacy updates
documents_count: 0,
},
Update::Settings(settings) => TaskContent::SettingsUpdate {
settings,
// There is no way to know now, so we assume it isn't
is_deletion: false,
},
Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear),
}
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments {
ids: Vec<String>,
},
Settings(Settings<Unchecked>),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: Update,
pub enqueued_at: DateTime<Utc>,
}
impl Enqueued {
fn update_task(self, task: &mut Task) {
// we do not erase the `TaskId` that was given to us.
task.content = self.meta.into();
task.events.push(TaskEvent::Created(self.enqueued_at));
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: v2::UpdateResult,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing,
}
impl Processed {
fn update_task(self, task: &mut Task) {
self.from.update_task(task);
let event = TaskEvent::Succeded {
result: TaskResult::from(self.success),
timestamp: self.processed_at,
};
task.events.push(event);
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
pub started_processing_at: DateTime<Utc>,
}
impl Processing {
fn update_task(self, task: &mut Task) {
self.from.update_task(task);
let event = TaskEvent::Processing(self.started_processing_at);
task.events.push(event);
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub msg: String,
pub code: Code,
pub failed_at: DateTime<Utc>,
}
impl Failed {
fn update_task(self, task: &mut Task) {
self.from.update_task(task);
let event = TaskEvent::Failed {
error: ResponseError::from_msg(self.msg, self.code),
timestamp: self.failed_at,
};
task.events.push(event);
}
}
impl From<(UpdateStatus, String, TaskId)> for Task {
fn from((update, uid, task_id): (UpdateStatus, String, TaskId)) -> Self {
// Dummy task
let mut task = Task {
id: task_id,
index_uid: IndexUid::new(uid).unwrap(),
content: TaskContent::IndexDeletion,
events: Vec::new(),
};
match update {
UpdateStatus::Processing(u) => u.update_task(&mut task),
UpdateStatus::Enqueued(u) => u.update_task(&mut task),
UpdateStatus::Processed(u) => u.update_task(&mut task),
UpdateStatus::Failed(u) => u.update_task(&mut task),
}
task
}
}

View file

@ -1,7 +1,6 @@
use meilisearch_error::{Code, ErrorCode};
use crate::index_controller::index_resolver::error::IndexResolverError;
use crate::index_controller::updates::error::UpdateLoopError;
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
pub type Result<T> = std::result::Result<T, DumpActorError>;
@ -15,8 +14,6 @@ pub enum DumpActorError {
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
IndexResolver(#[from] IndexResolverError),
#[error("{0}")]
UpdateLoop(#[from] UpdateLoopError),
}
macro_rules! internal_error {
@ -35,8 +32,11 @@ internal_error!(
heed::Error,
std::io::Error,
tokio::task::JoinError,
tokio::sync::oneshot::error::RecvError,
serde_json::error::Error,
tempfile::PersistError
tempfile::PersistError,
fs_extra::error::Error,
TaskError
);
impl ErrorCode for DumpActorError {
@ -46,7 +46,6 @@ impl ErrorCode for DumpActorError {
DumpActorError::DumpDoesNotExist(_) => Code::DumpNotFound,
DumpActorError::Internal(_) => Code::Internal,
DumpActorError::IndexResolver(e) => e.error_code(),
DumpActorError::UpdateLoop(e) => e.error_code(),
}
}
}

View file

@ -1,16 +1,11 @@
use std::path::Path;
use std::sync::Arc;
use tokio::sync::{mpsc, oneshot};
use crate::index_controller::index_resolver::HardStateIndexResolver;
use super::error::Result;
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg};
use super::{DumpActorHandle, DumpInfo, DumpMsg};
#[derive(Clone)]
pub struct DumpActorHandleImpl {
sender: mpsc::Sender<DumpMsg>,
pub sender: mpsc::Sender<DumpMsg>,
}
#[async_trait::async_trait]
@ -29,29 +24,3 @@ impl DumpActorHandle for DumpActorHandleImpl {
receiver.await.expect("IndexActor has been killed")
}
}
impl DumpActorHandleImpl {
pub fn new(
path: impl AsRef<Path>,
analytics_path: impl AsRef<Path>,
index_resolver: Arc<HardStateIndexResolver>,
update: crate::index_controller::updates::UpdateSender,
index_db_size: usize,
update_db_size: usize,
) -> anyhow::Result<Self> {
let (sender, receiver) = mpsc::channel(10);
let actor = DumpActor::new(
receiver,
index_resolver,
update,
path,
analytics_path,
index_db_size,
update_db_size,
);
tokio::task::spawn(actor.run());
Ok(Self { sender })
}
}

View file

@ -1,19 +1,4 @@
pub mod v1;
pub mod v2;
pub mod v3;
mod compat {
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
}
pub mod v4;

View file

@ -14,8 +14,7 @@ use uuid::Uuid;
use crate::document_formats::read_ndjson;
use crate::index::apply_settings_to_builder;
use crate::index::update_handler::UpdateHandler;
use crate::index_controller::dump_actor::loaders::compat::{asc_ranking_rule, desc_ranking_rule};
use crate::index_controller::index_resolver::uuid_store::HeedUuidStore;
use crate::index_controller::dump_actor::compat;
use crate::index_controller::{self, IndexMetadata};
use crate::{index::Unchecked, options::IndexerOpts};
@ -27,6 +26,7 @@ pub struct MetadataV1 {
}
impl MetadataV1 {
#[allow(dead_code, unreachable_code, unused_variables)]
pub fn load_dump(
self,
src: impl AsRef<Path>,
@ -34,22 +34,29 @@ impl MetadataV1 {
size: usize,
indexer_options: &IndexerOpts,
) -> anyhow::Result<()> {
let uuid_store = HeedUuidStore::new(&dst)?;
for index in self.indexes {
let uuid = Uuid::new_v4();
uuid_store.insert(index.uid.clone(), uuid)?;
let src = src.as_ref().join(index.uid);
load_index(
&src,
&dst,
uuid,
index.meta.primary_key.as_deref(),
size,
indexer_options,
)?;
}
unreachable!("dump v1 not implemented");
// log::info!("Patching dump V2 to dump V3...");
// let uuid_store = todo!(); // HeedMetaStore::new(&dst)?;
// for index in self.indexes {
// let uuid = Uuid::new_v4();
// // Since we don't know when the index was created, we assume it's from 0
// let meta = IndexMeta {
// uuid,
// creation_task_id: 0,
// };
// // uuid_store.insert(index.uid.clone(), meta)?;
// let src = src.as_ref().join(index.uid);
// load_index(
// &src,
// &dst,
// uuid,
// index.meta.primary_key.as_deref(),
// size,
// indexer_options,
// )?;
// }
Ok(())
// Ok(())
}
}
@ -81,6 +88,7 @@ struct Settings {
pub attributes_for_faceting: Option<Option<Vec<String>>>,
}
#[allow(dead_code)]
fn load_index(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
@ -105,7 +113,7 @@ fn load_index(
let handler = UpdateHandler::new(indexer_options)?;
let mut builder = handler.update_builder(0).settings(&mut txn, &index);
let mut builder = handler.update_builder().settings(&mut txn, &index);
if let Some(primary_key) = primary_key {
builder.set_primary_key(primary_key.to_string());
@ -113,7 +121,7 @@ fn load_index(
apply_settings_to_builder(&settings.check(), &mut builder);
builder.execute(|_, _| ())?;
builder.execute(|_| ())?;
let reader = BufReader::new(File::open(&src.as_ref().join("documents.jsonl"))?);
@ -129,9 +137,9 @@ fn load_index(
//a primary key error to be thrown.
if !documents_reader.is_empty() {
let builder = update_handler
.update_builder(0)
.update_builder()
.index_documents(&mut txn, &index);
builder.execute(documents_reader, |_, _| ())?;
builder.execute(documents_reader, |_| ())?;
}
txn.commit()?;
@ -174,8 +182,8 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| {
match criterion.as_str() {
"words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion),
s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
s if s.starts_with("asc") => compat::asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
s if s.starts_with("desc") => compat::desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
"wordsPosition" => {
warn!("The criteria `attribute` and `wordsPosition` have been merged \
into a single criterion `attribute` so `wordsPositon` will be \

View file

@ -5,17 +5,10 @@ use std::path::{Path, PathBuf};
use serde_json::{Deserializer, Value};
use tempfile::NamedTempFile;
use crate::index_controller::dump_actor::loaders::compat::{asc_ranking_rule, desc_ranking_rule};
use crate::index_controller::dump_actor::compat::{self, v2, v3};
use crate::index_controller::dump_actor::Metadata;
use crate::index_controller::updates::status::{
Aborted, Enqueued, Failed, Processed, Processing, UpdateResult, UpdateStatus,
};
use crate::index_controller::updates::store::dump::UpdateEntry;
use crate::index_controller::updates::store::Update;
use crate::options::IndexerOpts;
use super::v3;
/// The dump v2 reads the dump folder and patches all the needed file to make it compatible with a
/// dump v3, then calls the dump v3 to actually handle the dump.
pub fn load_dump(
@ -26,6 +19,7 @@ pub fn load_dump(
update_db_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
log::info!("Patching dump V2 to dump V3...");
let indexes_path = src.as_ref().join("indexes");
let dir_entries = std::fs::read_dir(indexes_path)?;
@ -47,7 +41,7 @@ pub fn load_dump(
let update_path = update_dir.join("data.jsonl");
patch_updates(update_dir, update_path)?;
v3::load_dump(
super::v3::load_dump(
meta,
src,
dst,
@ -84,12 +78,12 @@ fn patch_updates(dir: impl AsRef<Path>, path: impl AsRef<Path>) -> anyhow::Resul
let mut output_update_file = NamedTempFile::new_in(&dir)?;
let update_file = File::open(&path)?;
let stream = Deserializer::from_reader(update_file).into_iter::<compat::UpdateEntry>();
let stream = Deserializer::from_reader(update_file).into_iter::<v2::UpdateEntry>();
for update in stream {
let update_entry = update?;
let update_entry = UpdateEntry::from(update_entry);
let update_entry = v3::UpdateEntry::from(update_entry);
serde_json::to_writer(&mut output_update_file, &update_entry)?;
output_update_file.write_all(b"\n")?;
@ -110,10 +104,10 @@ fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
Value::Array(values) => values
.into_iter()
.filter_map(|value| match value {
Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
Value::String(s) if s.starts_with("asc") => compat::asc_ranking_rule(&s)
.map(|f| format!("{}:asc", f))
.map(Value::String),
Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
Value::String(s) if s.starts_with("desc") => compat::desc_ranking_rule(&s)
.map(|f| format!("{}:desc", f))
.map(Value::String),
otherwise => Some(otherwise),
@ -123,23 +117,23 @@ fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
}
}
impl From<compat::UpdateEntry> for UpdateEntry {
fn from(compat::UpdateEntry { uuid, update }: compat::UpdateEntry) -> Self {
impl From<v2::UpdateEntry> for v3::UpdateEntry {
fn from(v2::UpdateEntry { uuid, update }: v2::UpdateEntry) -> Self {
let update = match update {
compat::UpdateStatus::Processing(meta) => UpdateStatus::Processing(meta.into()),
compat::UpdateStatus::Enqueued(meta) => UpdateStatus::Enqueued(meta.into()),
compat::UpdateStatus::Processed(meta) => UpdateStatus::Processed(meta.into()),
compat::UpdateStatus::Aborted(meta) => UpdateStatus::Aborted(meta.into()),
compat::UpdateStatus::Failed(meta) => UpdateStatus::Failed(meta.into()),
v2::UpdateStatus::Processing(meta) => v3::UpdateStatus::Processing(meta.into()),
v2::UpdateStatus::Enqueued(meta) => v3::UpdateStatus::Enqueued(meta.into()),
v2::UpdateStatus::Processed(meta) => v3::UpdateStatus::Processed(meta.into()),
v2::UpdateStatus::Aborted(_) => unreachable!("Updates could never be aborted."),
v2::UpdateStatus::Failed(meta) => v3::UpdateStatus::Failed(meta.into()),
};
Self { uuid, update }
}
}
impl From<compat::Failed> for Failed {
fn from(other: compat::Failed) -> Self {
let compat::Failed {
impl From<v2::Failed> for v3::Failed {
fn from(other: v2::Failed) -> Self {
let v2::Failed {
from,
error,
failed_at,
@ -148,27 +142,16 @@ impl From<compat::Failed> for Failed {
Self {
from: from.into(),
msg: error.message,
code: compat::error_code_from_str(&error.error_code)
code: v2::error_code_from_str(&error.error_code)
.expect("Invalid update: Invalid error code"),
failed_at,
}
}
}
impl From<compat::Aborted> for Aborted {
fn from(other: compat::Aborted) -> Self {
let compat::Aborted { from, aborted_at } = other;
Self {
from: from.into(),
aborted_at,
}
}
}
impl From<compat::Processing> for Processing {
fn from(other: compat::Processing) -> Self {
let compat::Processing {
impl From<v2::Processing> for v3::Processing {
fn from(other: v2::Processing) -> Self {
let v2::Processing {
from,
started_processing_at,
} = other;
@ -180,9 +163,9 @@ impl From<compat::Processing> for Processing {
}
}
impl From<compat::Enqueued> for Enqueued {
fn from(other: compat::Enqueued) -> Self {
let compat::Enqueued {
impl From<v2::Enqueued> for v3::Enqueued {
fn from(other: v2::Enqueued) -> Self {
let v2::Enqueued {
update_id,
meta,
enqueued_at,
@ -190,12 +173,12 @@ impl From<compat::Enqueued> for Enqueued {
} = other;
let meta = match meta {
compat::UpdateMeta::DocumentsAddition {
v2::UpdateMeta::DocumentsAddition {
method,
primary_key,
..
} => {
Update::DocumentAddition {
v3::Update::DocumentAddition {
primary_key,
method,
// Just ignore if the uuid is no present. If it is needed later, an error will
@ -203,9 +186,9 @@ impl From<compat::Enqueued> for Enqueued {
content_uuid: content.unwrap_or_default(),
}
}
compat::UpdateMeta::ClearDocuments => Update::ClearDocuments,
compat::UpdateMeta::DeleteDocuments { ids } => Update::DeleteDocuments(ids),
compat::UpdateMeta::Settings(settings) => Update::Settings(settings),
v2::UpdateMeta::ClearDocuments => v3::Update::ClearDocuments,
v2::UpdateMeta::DeleteDocuments { ids } => v3::Update::DeleteDocuments(ids),
v2::UpdateMeta::Settings(settings) => v3::Update::Settings(settings),
};
Self {
@ -216,176 +199,18 @@ impl From<compat::Enqueued> for Enqueued {
}
}
impl From<compat::Processed> for Processed {
fn from(other: compat::Processed) -> Self {
let compat::Processed {
impl From<v2::Processed> for v3::Processed {
fn from(other: v2::Processed) -> Self {
let v2::Processed {
from,
success,
processed_at,
} = other;
Self {
success: success.into(),
success,
processed_at,
from: from.into(),
}
}
}
impl From<compat::UpdateResult> for UpdateResult {
fn from(other: compat::UpdateResult) -> Self {
match other {
compat::UpdateResult::DocumentsAddition(r) => Self::DocumentsAddition(r),
compat::UpdateResult::DocumentDeletion { deleted } => {
Self::DocumentDeletion { deleted }
}
compat::UpdateResult::Other => Self::Other,
}
}
}
/// compat structure from pre-dumpv3 meilisearch
mod compat {
use anyhow::bail;
use chrono::{DateTime, Utc};
use meilisearch_error::Code;
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::index::{Settings, Unchecked};
#[derive(Serialize, Deserialize)]
pub struct UpdateEntry {
pub uuid: Uuid,
pub update: UpdateStatus,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateFormat {
Json,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments {
ids: Vec<String>,
},
Settings(Settings<Unchecked>),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: UpdateMeta,
pub enqueued_at: DateTime<Utc>,
pub content: Option<Uuid>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: UpdateResult,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
pub started_processing_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Aborted {
#[serde(flatten)]
pub from: Enqueued,
pub aborted_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub error: ResponseError,
pub failed_at: DateTime<Utc>,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Aborted(Aborted),
Failed(Failed),
}
type StatusCode = ();
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ResponseError {
#[serde(skip)]
pub code: StatusCode,
pub message: String,
pub error_code: String,
pub error_type: String,
pub error_link: String,
}
pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> {
let code = match s {
"index_creation_failed" => Code::CreateIndex,
"index_already_exists" => Code::IndexAlreadyExists,
"index_not_found" => Code::IndexNotFound,
"invalid_index_uid" => Code::InvalidIndexUid,
"invalid_state" => Code::InvalidState,
"missing_primary_key" => Code::MissingPrimaryKey,
"primary_key_already_present" => Code::PrimaryKeyAlreadyPresent,
"invalid_request" => Code::InvalidRankingRule,
"max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded,
"missing_document_id" => Code::MissingDocumentId,
"invalid_facet" => Code::Filter,
"invalid_filter" => Code::Filter,
"invalid_sort" => Code::Sort,
"bad_parameter" => Code::BadParameter,
"bad_request" => Code::BadRequest,
"document_not_found" => Code::DocumentNotFound,
"internal" => Code::Internal,
"invalid_geo_field" => Code::InvalidGeoField,
"invalid_token" => Code::InvalidToken,
"missing_authorization_header" => Code::MissingAuthorizationHeader,
"payload_too_large" => Code::PayloadTooLarge,
"unretrievable_document" => Code::RetrieveDocument,
"search_error" => Code::SearchDocuments,
"unsupported_media_type" => Code::UnsupportedMediaType,
"dump_already_in_progress" => Code::DumpAlreadyInProgress,
"dump_process_failed" => Code::DumpProcessFailed,
_ => bail!("unknow error code."),
};
Ok(code)
}
}

View file

@ -1,33 +1,136 @@
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{BufReader, BufWriter, Write};
use std::path::Path;
use anyhow::Context;
use fs_extra::dir::{self, CopyOptions};
use log::info;
use tempfile::tempdir;
use uuid::Uuid;
use crate::analytics;
use crate::index_controller::dump_actor::compat::v3;
use crate::index_controller::dump_actor::Metadata;
use crate::index_controller::index_resolver::IndexResolver;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::index_controller::updates::store::UpdateStore;
use crate::index_resolver::meta_store::{DumpEntry, IndexMeta};
use crate::options::IndexerOpts;
use crate::tasks::task::{Task, TaskId};
/// dump structure for V3:
/// .
/// ├── indexes
/// │   └── 25f10bb8-6ea8-42f0-bd48-ad5857f77648
/// │   ├── documents.jsonl
/// │   └── meta.json
/// ├── index_uuids
/// │   └── data.jsonl
/// ├── metadata.json
/// └── updates
/// └── data.jsonl
pub fn load_dump(
meta: Metadata,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
update_db_size: usize,
meta_env_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
info!(
"Loading dump from {}, dump database version: {}, dump version: V3",
meta.dump_date, meta.db_version
);
info!("Patching dump V3 to dump V4...");
IndexResolver::load_dump(src.as_ref(), &dst, index_db_size, indexing_options)?;
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
UpdateStore::load_dump(&src, &dst, update_db_size)?;
analytics::copy_user_id(src.as_ref(), dst.as_ref());
let patched_dir = tempdir()?;
info!("Loading indexes.");
let options = CopyOptions::default();
dir::copy(src.as_ref().join("indexes"), patched_dir.path(), &options)?;
dir::copy(
src.as_ref().join("index_uuids"),
patched_dir.path(),
&options,
)?;
let uuid_map = patch_index_meta(
src.as_ref().join("index_uuids/data.jsonl"),
patched_dir.path(),
)?;
fs::copy(
src.as_ref().join("metadata.json"),
patched_dir.path().join("metadata.json"),
)?;
patch_updates(&src, patched_dir.path(), uuid_map)?;
super::v4::load_dump(
meta,
patched_dir.path(),
dst,
index_db_size,
meta_env_size,
indexing_options,
)
}
fn patch_index_meta(
path: impl AsRef<Path>,
dst: impl AsRef<Path>,
) -> anyhow::Result<HashMap<Uuid, String>> {
let file = BufReader::new(File::open(path)?);
let dst = dst.as_ref().join("index_uuids");
fs::create_dir_all(&dst)?;
let mut dst_file = File::create(dst.join("data.jsonl"))?;
let map = serde_json::Deserializer::from_reader(file)
.into_iter::<v3::DumpEntry>()
.try_fold(HashMap::new(), |mut map, entry| -> anyhow::Result<_> {
let entry = entry?;
map.insert(entry.uuid, entry.uid.clone());
let meta = IndexMeta {
uuid: entry.uuid,
// This is lost information, we patch it to 0;
creation_task_id: 0,
};
let entry = DumpEntry {
uid: entry.uid,
index_meta: meta,
};
serde_json::to_writer(&mut dst_file, &entry)?;
dst_file.write_all(b"\n")?;
Ok(map)
})?;
dst_file.flush()?;
Ok(map)
}
fn patch_updates(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
uuid_map: HashMap<Uuid, String>,
) -> anyhow::Result<()> {
let dst = dst.as_ref().join("updates");
fs::create_dir_all(&dst)?;
let mut dst_file = BufWriter::new(File::create(dst.join("data.jsonl"))?);
let src_file = BufReader::new(File::open(src.as_ref().join("updates/data.jsonl"))?);
serde_json::Deserializer::from_reader(src_file)
.into_iter::<v3::UpdateEntry>()
.enumerate()
.try_for_each(|(task_id, entry)| -> anyhow::Result<()> {
let entry = entry?;
let name = uuid_map
.get(&entry.uuid)
.with_context(|| format!("Unknown index uuid: {}", entry.uuid))?
.clone();
serde_json::to_writer(
&mut dst_file,
&Task::from((entry.update, name, task_id as TaskId)),
)?;
dst_file.write_all(b"\n")?;
Ok(())
})?;
dst_file.flush()?;
Ok(())
}

View file

@ -0,0 +1,45 @@
use std::path::Path;
use heed::EnvOpenOptions;
use log::info;
use crate::analytics;
use crate::index_controller::dump_actor::Metadata;
use crate::index_resolver::IndexResolver;
use crate::options::IndexerOpts;
use crate::tasks::TaskStore;
use crate::update_file_store::UpdateFileStore;
pub fn load_dump(
meta: Metadata,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
meta_env_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
info!(
"Loading dump from {}, dump database version: {}, dump version: V4",
meta.dump_date, meta.db_version
);
let mut options = EnvOpenOptions::new();
options.map_size(meta_env_size);
options.max_dbs(100);
let env = options.open(&dst)?;
IndexResolver::load_dump(
src.as_ref(),
&dst,
index_db_size,
env.clone(),
indexing_options,
)?;
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
TaskStore::load_dump(&src, env)?;
analytics::copy_user_id(src.as_ref(), dst.as_ref());
info!("Loading indexes.");
Ok(())
}

View file

@ -1,31 +1,30 @@
use std::fs::File;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use chrono::{DateTime, Utc};
use log::{info, trace, warn};
use serde::{Deserialize, Serialize};
use tokio::fs::create_dir_all;
use loaders::v1::MetadataV1;
pub use actor::DumpActor;
pub use handle_impl::*;
pub use message::DumpMsg;
use tokio::fs::create_dir_all;
use tokio::sync::oneshot;
use super::index_resolver::index_store::IndexStore;
use super::index_resolver::uuid_store::UuidStore;
use super::index_resolver::IndexResolver;
use super::updates::UpdateSender;
use crate::analytics;
use crate::compression::{from_tar_gz, to_tar_gz};
use crate::index_controller::dump_actor::error::DumpActorError;
use crate::index_controller::dump_actor::loaders::{v2, v3};
use crate::index_controller::updates::UpdateMsg;
use crate::index_controller::dump_actor::loaders::{v2, v3, v4};
use crate::options::IndexerOpts;
use crate::tasks::task::Job;
use crate::tasks::TaskStore;
use crate::update_file_store::UpdateFileStore;
use error::Result;
mod actor;
mod compat;
pub mod error;
mod handle_impl;
mod loaders;
@ -71,18 +70,19 @@ pub enum MetadataVersion {
V1(MetadataV1),
V2(Metadata),
V3(Metadata),
V4(Metadata),
}
impl MetadataVersion {
pub fn new_v3(index_db_size: usize, update_db_size: usize) -> Self {
pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self {
let meta = Metadata::new(index_db_size, update_db_size);
Self::V3(meta)
Self::V4(meta)
}
pub fn db_version(&self) -> &str {
match self {
Self::V1(meta) => &meta.db_version,
Self::V2(meta) | Self::V3(meta) => &meta.db_version,
Self::V2(meta) | Self::V3(meta) | Self::V4(meta) => &meta.db_version,
}
}
@ -91,13 +91,16 @@ impl MetadataVersion {
MetadataVersion::V1(_) => "V1",
MetadataVersion::V2(_) => "V2",
MetadataVersion::V3(_) => "V3",
MetadataVersion::V4(_) => "V4",
}
}
pub fn dump_date(&self) -> Option<&DateTime<Utc>> {
match self {
MetadataVersion::V1(_) => None,
MetadataVersion::V2(meta) | MetadataVersion::V3(meta) => Some(&meta.dump_date),
MetadataVersion::V2(meta) | MetadataVersion::V3(meta) | MetadataVersion::V4(meta) => {
Some(&meta.dump_date)
}
}
}
}
@ -190,8 +193,9 @@ pub fn load_dump(
);
match meta {
MetadataVersion::V1(meta) => {
meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)?
MetadataVersion::V1(_meta) => {
anyhow::bail!("This version (v1) of the dump is too old to be imported.")
// meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer _opts)?
}
MetadataVersion::V2(meta) => v2::load_dump(
meta,
@ -209,6 +213,14 @@ pub fn load_dump(
update_db_size,
indexer_opts,
)?,
MetadataVersion::V4(meta) => v4::load_dump(
meta,
&tmp_src_path,
tmp_dst.path(),
index_db_size,
update_db_size,
indexer_opts,
)?,
}
// Persist and atomically rename the db
let persisted_dump = tmp_dst.into_path();
@ -222,21 +234,17 @@ pub fn load_dump(
Ok(())
}
struct DumpTask<U, I> {
struct DumpJob {
dump_path: PathBuf,
db_path: PathBuf,
index_resolver: Arc<IndexResolver<U, I>>,
update_sender: UpdateSender,
update_file_store: UpdateFileStore,
task_store: TaskStore,
uid: String,
update_db_size: usize,
index_db_size: usize,
}
impl<U, I> DumpTask<U, I>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
impl DumpJob {
async fn run(self) -> Result<()> {
trace!("Performing dump.");
@ -245,18 +253,32 @@ where
let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
let temp_dump_path = temp_dump_dir.path().to_owned();
let meta = MetadataVersion::new_v3(self.index_db_size, self.update_db_size);
let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size);
let meta_path = temp_dump_path.join(META_FILE_NAME);
let mut meta_file = File::create(&meta_path)?;
serde_json::to_writer(&mut meta_file, &meta)?;
analytics::copy_user_id(&self.db_path, &temp_dump_path);
create_dir_all(&temp_dump_path.join("indexes")).await?;
let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?;
UpdateMsg::dump(&self.update_sender, uuids, temp_dump_path.clone()).await?;
let (sender, receiver) = oneshot::channel();
self.task_store
.register_job(Job::Dump {
ret: sender,
path: temp_dump_path.clone(),
})
.await;
receiver.await??;
self.task_store
.dump(&temp_dump_path, self.update_file_store.clone())
.await?;
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
// for now we simply copy the updates/updates_files
// FIXME: We may copy more files than necessary, if new files are added while we are
// performing the dump. We need a way to filter them out.
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?;
to_tar_gz(temp_dump_path, temp_dump_file.path())
.map_err(|e| DumpActorError::Internal(e.into()))?;
@ -279,17 +301,17 @@ mod test {
use std::collections::HashSet;
use futures::future::{err, ok};
use nelson::Mocker;
use once_cell::sync::Lazy;
use uuid::Uuid;
use super::*;
use crate::index::error::Result as IndexResult;
use crate::index::test::Mocker;
use crate::index::Index;
use crate::index_controller::index_resolver::error::IndexResolverError;
use crate::index_controller::index_resolver::index_store::MockIndexStore;
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
use crate::index_controller::updates::create_update_handler;
use crate::index_resolver::error::IndexResolverError;
use crate::index_resolver::index_store::MockIndexStore;
use crate::index_resolver::meta_store::MockIndexMetaStore;
use crate::update_file_store::UpdateFileStore;
fn setup() {
static SETUP: Lazy<()> = Lazy::new(|| {
@ -305,6 +327,7 @@ mod test {
}
#[actix_rt::test]
#[ignore]
async fn test_dump_normal() {
setup();
@ -313,12 +336,11 @@ mod test {
let uuids = std::iter::repeat_with(Uuid::new_v4)
.take(4)
.collect::<HashSet<_>>();
let mut uuid_store = MockUuidStore::new();
let uuids_cloned = uuids.clone();
let mut uuid_store = MockIndexMetaStore::new();
uuid_store
.expect_dump()
.once()
.returning(move |_| Box::pin(ok(uuids_cloned.clone())));
.returning(move |_| Box::pin(ok(())));
let mut index_store = MockIndexStore::new();
index_store.expect_get().times(4).returning(move |uuid| {
@ -332,20 +354,25 @@ mod test {
.when::<&Path, IndexResult<()>>("dump")
.once()
.then(move |_| Ok(()));
Box::pin(ok(Some(Index::faux(mocker))))
Box::pin(ok(Some(Index::mock(mocker))))
});
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let mocker = Mocker::default();
let update_file_store = UpdateFileStore::mock(mocker);
let update_sender =
create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
//let update_sender =
// create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
let task = DumpTask {
//TODO: fix dump tests
let mocker = Mocker::default();
let task_store = TaskStore::mock(mocker);
let task = DumpJob {
dump_path: tmp.path().into(),
// this should do nothing
update_file_store,
db_path: tmp.path().into(),
index_resolver,
update_sender,
task_store,
uid: String::from("test"),
update_db_size: 4096 * 10,
index_db_size: 4096 * 10,
@ -355,27 +382,28 @@ mod test {
}
#[actix_rt::test]
#[ignore]
async fn error_performing_dump() {
let tmp = tempfile::tempdir().unwrap();
let mut uuid_store = MockUuidStore::new();
let mut uuid_store = MockIndexMetaStore::new();
uuid_store
.expect_dump()
.once()
.returning(move |_| Box::pin(err(IndexResolverError::ExistingPrimaryKey)));
let index_store = MockIndexStore::new();
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let mocker = Mocker::default();
let file_store = UpdateFileStore::mock(mocker);
let update_sender =
create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
let mocker = Mocker::default();
let task_store = TaskStore::mock(mocker);
let task = DumpTask {
let task = DumpJob {
dump_path: tmp.path().into(),
// this should do nothing
db_path: tmp.path().into(),
index_resolver,
update_sender,
update_file_store: file_store,
task_store,
uid: String::from("test"),
update_db_size: 4096 * 10,
index_db_size: 4096 * 10,

View file

@ -4,11 +4,14 @@ use meilisearch_error::Code;
use meilisearch_error::ErrorCode;
use tokio::task::JoinError;
use super::DocumentAdditionFormat;
use crate::document_formats::DocumentFormatError;
use crate::index::error::IndexError;
use crate::tasks::error::TaskError;
use crate::update_file_store::UpdateFileStoreError;
use super::dump_actor::error::DumpActorError;
use super::index_resolver::error::IndexResolverError;
use super::updates::error::UpdateLoopError;
use crate::index_resolver::error::IndexResolverError;
pub type Result<T> = std::result::Result<T, IndexControllerError>;
@ -19,26 +22,47 @@ pub enum IndexControllerError {
#[error("{0}")]
IndexResolver(#[from] IndexResolverError),
#[error("{0}")]
UpdateLoop(#[from] UpdateLoopError),
#[error("{0}")]
DumpActor(#[from] DumpActorError),
#[error("{0}")]
IndexError(#[from] IndexError),
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn Error + Send + Sync + 'static>),
#[error("{0}")]
TaskError(#[from] TaskError),
#[error("{0}")]
DumpError(#[from] DumpActorError),
#[error("{0}")]
DocumentFormatError(#[from] DocumentFormatError),
#[error("A {0} payload is missing.")]
MissingPayload(DocumentAdditionFormat),
#[error("The provided payload reached the size limit.")]
PayloadTooLarge,
}
internal_error!(IndexControllerError: JoinError);
internal_error!(IndexControllerError: JoinError, UpdateFileStoreError);
impl From<actix_web::error::PayloadError> for IndexControllerError {
fn from(other: actix_web::error::PayloadError) -> Self {
match other {
actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge,
_ => Self::Internal(Box::new(other)),
}
}
}
impl ErrorCode for IndexControllerError {
fn error_code(&self) -> Code {
match self {
IndexControllerError::MissingUid => Code::BadRequest,
IndexControllerError::IndexResolver(e) => e.error_code(),
IndexControllerError::UpdateLoop(e) => e.error_code(),
IndexControllerError::DumpActor(e) => e.error_code(),
IndexControllerError::IndexError(e) => e.error_code(),
IndexControllerError::Internal(_) => Code::Internal,
IndexControllerError::TaskError(e) => e.error_code(),
IndexControllerError::DocumentFormatError(e) => e.error_code(),
IndexControllerError::MissingPayload(_) => Code::MissingPayload,
IndexControllerError::PayloadTooLarge => Code::PayloadTooLarge,
IndexControllerError::DumpError(DumpActorError::DumpAlreadyRunning) => {
Code::DumpAlreadyInProgress
}
IndexControllerError::DumpError(_) => Code::DumpProcessFailed,
}
}
}

View file

@ -1,68 +0,0 @@
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use tokio::sync::mpsc::error::SendError as MpscSendError;
use tokio::sync::oneshot::error::RecvError as OneshotRecvError;
use uuid::Uuid;
use crate::{error::MilliError, index::error::IndexError};
pub type Result<T> = std::result::Result<T, IndexResolverError>;
#[derive(thiserror::Error, Debug)]
pub enum IndexResolverError {
#[error("{0}")]
IndexError(#[from] IndexError),
#[error("Index `{0}` already exists.")]
IndexAlreadyExists(String),
#[error("Index `{0}` not found.")]
UnexistingIndex(String),
#[error("A primary key is already present. It's impossible to update it")]
ExistingPrimaryKey,
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("The creation of the `{0}` index has failed due to `Index uuid is already assigned`.")]
UuidAlreadyExists(Uuid),
#[error("{0}")]
Milli(#[from] milli::Error),
#[error("`{0}` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).")]
BadlyFormatted(String),
}
impl<T> From<MpscSendError<T>> for IndexResolverError
where
T: Send + Sync + 'static + fmt::Debug,
{
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<OneshotRecvError> for IndexResolverError {
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
Self::Internal(Box::new(other))
}
}
internal_error!(
IndexResolverError: heed::Error,
uuid::Error,
std::io::Error,
tokio::task::JoinError,
serde_json::Error
);
impl ErrorCode for IndexResolverError {
fn error_code(&self) -> Code {
match self {
IndexResolverError::IndexError(e) => e.error_code(),
IndexResolverError::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound,
IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
IndexResolverError::Internal(_) => Code::Internal,
IndexResolverError::UuidAlreadyExists(_) => Code::CreateIndex,
IndexResolverError::Milli(e) => MilliError(e).error_code(),
IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
}
}
}

View file

@ -1,125 +0,0 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use milli::update::UpdateBuilder;
use tokio::fs;
use tokio::sync::RwLock;
use tokio::task::spawn_blocking;
use uuid::Uuid;
use super::error::{IndexResolverError, Result};
use crate::index::update_handler::UpdateHandler;
use crate::index::Index;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::options::IndexerOpts;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
#[async_trait::async_trait]
#[cfg_attr(test, mockall::automock)]
pub trait IndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
}
pub struct MapIndexStore {
index_store: AsyncMap<Uuid, Index>,
path: PathBuf,
index_size: usize,
update_file_store: Arc<UpdateFileStore>,
update_handler: Arc<UpdateHandler>,
}
impl MapIndexStore {
pub fn new(
path: impl AsRef<Path>,
index_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<Self> {
let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?);
let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap());
let path = path.as_ref().join("indexes/");
let index_store = Arc::new(RwLock::new(HashMap::new()));
Ok(Self {
index_store,
path,
index_size,
update_file_store,
update_handler,
})
}
}
#[async_trait::async_trait]
impl IndexStore for MapIndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
// We need to keep the lock until we are sure the db file has been opened correclty, to
// ensure that another db is not created at the same time.
let mut lock = self.index_store.write().await;
if let Some(index) = lock.get(&uuid) {
return Ok(index.clone());
}
let path = self.path.join(format!("{}", uuid));
if path.exists() {
return Err(IndexResolverError::UuidAlreadyExists(uuid));
}
let index_size = self.index_size;
let file_store = self.update_file_store.clone();
let update_handler = self.update_handler.clone();
let index = spawn_blocking(move || -> Result<Index> {
let index = Index::open(path, index_size, file_store, uuid, update_handler)?;
if let Some(primary_key) = primary_key {
let inner = index.inner();
let mut txn = inner.write_txn()?;
let mut builder = UpdateBuilder::new(0).settings(&mut txn, index.inner());
builder.set_primary_key(primary_key);
builder.execute(|_, _| ())?;
txn.commit()?;
}
Ok(index)
})
.await??;
lock.insert(uuid, index.clone());
Ok(index)
}
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
let guard = self.index_store.read().await;
match guard.get(&uuid) {
Some(index) => Ok(Some(index.clone())),
None => {
// drop the guard here so we can perform the write after without deadlocking;
drop(guard);
let path = self.path.join(format!("{}", uuid));
if !path.exists() {
return Ok(None);
}
let index_size = self.index_size;
let file_store = self.update_file_store.clone();
let update_handler = self.update_handler.clone();
let index = spawn_blocking(move || {
Index::open(path, index_size, file_store, uuid, update_handler)
})
.await??;
self.index_store.write().await.insert(uuid, index.clone());
Ok(Some(index))
}
}
}
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
let db_path = self.path.join(format!("{}", uuid));
fs::remove_dir_all(db_path).await?;
let index = self.index_store.write().await.remove(&uuid);
Ok(index)
}
}

View file

@ -1,37 +0,0 @@
use std::{collections::HashSet, path::PathBuf};
use tokio::sync::oneshot;
use uuid::Uuid;
use crate::index::Index;
use super::error::Result;
pub enum IndexResolverMsg {
Get {
uid: String,
ret: oneshot::Sender<Result<Index>>,
},
Delete {
uid: String,
ret: oneshot::Sender<Result<Index>>,
},
List {
ret: oneshot::Sender<Result<Vec<(String, Index)>>>,
},
Insert {
uuid: Uuid,
name: String,
ret: oneshot::Sender<Result<()>>,
},
SnapshotRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Index>>>,
},
GetSize {
ret: oneshot::Sender<Result<u64>>,
},
DumpRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Index>>>,
},
}

View file

@ -1,185 +0,0 @@
pub mod error;
pub mod index_store;
pub mod uuid_store;
use std::path::Path;
use error::{IndexResolverError, Result};
use index_store::{IndexStore, MapIndexStore};
use log::error;
use uuid::Uuid;
use uuid_store::{HeedUuidStore, UuidStore};
use crate::{
index::{update_handler::UpdateHandler, Index},
options::IndexerOpts,
};
pub type HardStateIndexResolver = IndexResolver<HeedUuidStore, MapIndexStore>;
pub fn create_index_resolver(
path: impl AsRef<Path>,
index_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<HardStateIndexResolver> {
let uuid_store = HeedUuidStore::new(&path)?;
let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?;
Ok(IndexResolver::new(uuid_store, index_store))
}
pub struct IndexResolver<U, I> {
index_uuid_store: U,
index_store: I,
}
impl IndexResolver<HeedUuidStore, MapIndexStore> {
pub fn load_dump(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<()> {
HeedUuidStore::load_dump(&src, &dst)?;
let indexes_path = src.as_ref().join("indexes");
let indexes = indexes_path.read_dir()?;
let update_handler = UpdateHandler::new(indexer_opts)?;
for index in indexes {
let index = index?;
Index::load_dump(&index.path(), &dst, index_db_size, &update_handler)?;
}
Ok(())
}
}
impl<U, I> IndexResolver<U, I>
where
U: UuidStore,
I: IndexStore,
{
pub fn new(index_uuid_store: U, index_store: I) -> Self {
Self {
index_uuid_store,
index_store,
}
}
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
let uuids = self.index_uuid_store.dump(path.as_ref().to_owned()).await?;
let mut indexes = Vec::new();
for uuid in uuids {
indexes.push(self.get_index_by_uuid(uuid).await?);
}
Ok(indexes)
}
pub async fn get_uuids_size(&self) -> Result<u64> {
Ok(self.index_uuid_store.get_size().await?)
}
pub async fn snapshot(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
let uuids = self
.index_uuid_store
.snapshot(path.as_ref().to_owned())
.await?;
let mut indexes = Vec::new();
for uuid in uuids {
indexes.push(self.get_index_by_uuid(uuid).await?);
}
Ok(indexes)
}
pub async fn create_index(&self, uid: String, primary_key: Option<String>) -> Result<Index> {
if !is_index_uid_valid(&uid) {
return Err(IndexResolverError::BadlyFormatted(uid));
}
let uuid = Uuid::new_v4();
let index = self.index_store.create(uuid, primary_key).await?;
match self.index_uuid_store.insert(uid, uuid).await {
Err(e) => {
match self.index_store.delete(uuid).await {
Ok(Some(index)) => {
index.inner().clone().prepare_for_closing();
}
Ok(None) => (),
Err(e) => error!("Error while deleting index: {:?}", e),
}
Err(e)
}
Ok(()) => Ok(index),
}
}
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
let uuids = self.index_uuid_store.list().await?;
let mut indexes = Vec::new();
for (name, uuid) in uuids {
match self.index_store.get(uuid).await? {
Some(index) => indexes.push((name, index)),
None => {
// we found an unexisting index, we remove it from the uuid store
let _ = self.index_uuid_store.delete(name).await;
}
}
}
Ok(indexes)
}
pub async fn delete_index(&self, uid: String) -> Result<Uuid> {
match self.index_uuid_store.delete(uid.clone()).await? {
Some(uuid) => {
match self.index_store.delete(uuid).await {
Ok(Some(index)) => {
index.inner().clone().prepare_for_closing();
}
Ok(None) => (),
Err(e) => error!("Error while deleting index: {:?}", e),
}
Ok(uuid)
}
None => Err(IndexResolverError::UnexistingIndex(uid)),
}
}
pub async fn get_index_by_uuid(&self, uuid: Uuid) -> Result<Index> {
// TODO: Handle this error better.
self.index_store
.get(uuid)
.await?
.ok_or_else(|| IndexResolverError::UnexistingIndex(String::new()))
}
pub async fn get_index(&self, uid: String) -> Result<Index> {
match self.index_uuid_store.get_uuid(uid).await? {
(name, Some(uuid)) => {
match self.index_store.get(uuid).await? {
Some(index) => Ok(index),
None => {
// For some reason we got a uuid to an unexisting index, we return an error,
// and remove the uuid from the uuid store.
let _ = self.index_uuid_store.delete(name.clone()).await;
Err(IndexResolverError::UnexistingIndex(name))
}
}
}
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
}
}
pub async fn get_uuid(&self, uid: String) -> Result<Uuid> {
match self.index_uuid_store.get_uuid(uid).await? {
(_, Some(uuid)) => Ok(uuid),
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
}
}
}
fn is_index_uid_valid(uid: &str) -> bool {
uid.chars()
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
}

View file

@ -1,227 +0,0 @@
use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use heed::types::{ByteSlice, Str};
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::error::{IndexResolverError, Result};
use crate::EnvSizer;
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
#[derive(Serialize, Deserialize)]
struct DumpEntry {
uuid: Uuid,
uid: String,
}
const UUIDS_DB_PATH: &str = "index_uuids";
#[async_trait::async_trait]
#[cfg_attr(test, mockall::automock)]
pub trait UuidStore: Sized {
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
// the uuid otherwise.
async fn get_uuid(&self, uid: String) -> Result<(String, Option<Uuid>)>;
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
}
#[derive(Clone)]
pub struct HeedUuidStore {
env: Env,
db: Database<Str, ByteSlice>,
}
impl HeedUuidStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join(UUIDS_DB_PATH);
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(UUID_STORE_SIZE); // 1GB
options.max_dbs(1);
let env = options.open(path)?;
let db = env.create_database(Some("uuids"))?;
Ok(Self { env, db })
}
pub fn get_uuid(&self, name: &str) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
let txn = env.read_txn()?;
match db.get(&txn, name)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
let mut txn = env.write_txn()?;
match db.get(&txn, &uid)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
db.delete(&mut txn, &uid)?;
txn.commit()?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
let env = self.env.clone();
let db = self.db;
let txn = env.read_txn()?;
let mut entries = Vec::new();
for entry in db.iter(&txn)? {
let (name, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.push((name.to_owned(), uuid))
}
Ok(entries)
}
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let env = self.env.clone();
let db = self.db;
let mut txn = env.write_txn()?;
if db.get(&txn, &name)?.is_some() {
return Err(IndexResolverError::IndexAlreadyExists(name));
}
db.put(&mut txn, &name, uuid.as_bytes())?;
txn.commit()?;
Ok(())
}
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
let env = self.env.clone();
let db = self.db;
// Write transaction to acquire a lock on the database.
let txn = env.write_txn()?;
let mut entries = HashSet::new();
for entry in db.iter(&txn)? {
let (_, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.insert(uuid);
}
// only perform snapshot if there are indexes
if !entries.is_empty() {
path.push(UUIDS_DB_PATH);
create_dir_all(&path).unwrap();
path.push("data.mdb");
env.copy_to_path(path, CompactionOption::Enabled)?;
}
Ok(entries)
}
pub fn get_size(&self) -> Result<u64> {
Ok(self.env.size())
}
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let dump_path = path.join(UUIDS_DB_PATH);
create_dir_all(&dump_path)?;
let dump_file_path = dump_path.join("data.jsonl");
let mut dump_file = File::create(&dump_file_path)?;
let mut uuids = HashSet::new();
let txn = self.env.read_txn()?;
for entry in self.db.iter(&txn)? {
let (uid, uuid) = entry?;
let uid = uid.to_string();
let uuid = Uuid::from_slice(uuid)?;
let entry = DumpEntry { uuid, uid };
serde_json::to_writer(&mut dump_file, &entry)?;
dump_file.write_all(b"\n").unwrap();
uuids.insert(uuid);
}
Ok(uuids)
}
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH);
std::fs::create_dir_all(&uuid_resolver_path)?;
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
let indexes = File::open(&src_indexes)?;
let mut indexes = BufReader::new(indexes);
let mut line = String::new();
let db = Self::new(dst)?;
let mut txn = db.env.write_txn()?;
loop {
match indexes.read_line(&mut line) {
Ok(0) => break,
Ok(_) => {
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
}
Err(e) => return Err(e.into()),
}
line.clear();
}
txn.commit()?;
db.env.prepare_for_closing().wait();
Ok(())
}
}
#[async_trait::async_trait]
impl UuidStore for HeedUuidStore {
async fn get_uuid(&self, name: String) -> Result<(String, Option<Uuid>)> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.get_uuid(&name).map(|res| (name, res))).await?
}
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.delete(uid)).await?
}
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.list()).await?
}
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
}
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
}
async fn get_size(&self) -> Result<u64> {
self.get_size()
}
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.dump(path)).await?
}
}

View file

@ -1,5 +1,6 @@
use std::collections::BTreeMap;
use std::fmt;
use std::io::Cursor;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
@ -8,44 +9,39 @@ use actix_web::error::PayloadError;
use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::Stream;
use log::info;
use futures::StreamExt;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use tokio::task::spawn_blocking;
use tokio::time::sleep;
use uuid::Uuid;
use dump_actor::DumpActorHandle;
pub use dump_actor::{DumpInfo, DumpStatus};
use snapshot::load_snapshot;
use crate::index::error::Result as IndexResult;
use crate::document_formats::{read_csv, read_json, read_ndjson};
use crate::index::{
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
};
use crate::index_controller::index_resolver::create_index_resolver;
use crate::index_controller::snapshot::SnapshotService;
use crate::index_controller::dump_actor::{load_dump, DumpActor, DumpActorHandleImpl};
use crate::options::IndexerOpts;
use crate::snapshot::{load_snapshot, SnapshotService};
use crate::tasks::create_task_store;
use crate::tasks::error::TaskError;
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId};
use crate::tasks::{TaskFilter, TaskStore};
use error::Result;
use self::dump_actor::load_dump;
use self::index_resolver::error::IndexResolverError;
use self::index_resolver::index_store::{IndexStore, MapIndexStore};
use self::index_resolver::uuid_store::{HeedUuidStore, UuidStore};
use self::index_resolver::IndexResolver;
use self::updates::status::UpdateStatus;
use self::updates::UpdateMsg;
use self::dump_actor::{DumpActorHandle, DumpInfo};
use self::error::IndexControllerError;
use crate::index_resolver::index_store::{IndexStore, MapIndexStore};
use crate::index_resolver::meta_store::{HeedMetaStore, IndexMetaStore};
use crate::index_resolver::{create_index_resolver, IndexResolver, IndexUid};
use crate::update_file_store::UpdateFileStore;
mod dump_actor;
pub mod error;
mod index_resolver;
mod snapshot;
pub mod update_file_store;
pub mod updates;
/// Concrete implementation of the IndexController, exposed by meilisearch-lib
pub type MeiliSearch =
IndexController<HeedUuidStore, MapIndexStore, dump_actor::DumpActorHandleImpl>;
pub type MeiliSearch = IndexController<HeedMetaStore, MapIndexStore>;
pub type Payload = Box<
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
@ -68,6 +64,25 @@ pub struct IndexSettings {
pub primary_key: Option<String>,
}
pub struct IndexController<U, I> {
index_resolver: Arc<IndexResolver<U, I>>,
task_store: TaskStore,
dump_handle: dump_actor::DumpActorHandleImpl,
update_file_store: UpdateFileStore,
}
/// Need a custom implementation for clone because deriving require that U and I are clone.
impl<U, I> Clone for IndexController<U, I> {
fn clone(&self) -> Self {
Self {
index_resolver: self.index_resolver.clone(),
task_store: self.task_store.clone(),
dump_handle: self.dump_handle.clone(),
update_file_store: self.update_file_store.clone(),
}
}
}
#[derive(Debug)]
pub enum DocumentAdditionFormat {
Json,
@ -99,7 +114,11 @@ pub struct Stats {
pub enum Update {
DeleteDocuments(Vec<String>),
ClearDocuments,
Settings(Settings<Unchecked>),
Settings {
settings: Settings<Unchecked>,
/// Indicates whether the update was a deletion
is_deletion: bool,
},
DocumentAddition {
#[derivative(Debug = "ignore")]
payload: Payload,
@ -107,12 +126,19 @@ pub enum Update {
method: IndexDocumentsMethod,
format: DocumentAdditionFormat,
},
DeleteIndex,
CreateIndex {
primary_key: Option<String>,
},
UpdateIndex {
primary_key: Option<String>,
},
}
#[derive(Default, Debug)]
pub struct IndexControllerBuilder {
max_index_size: Option<usize>,
max_update_store_size: Option<usize>,
max_task_store_size: Option<usize>,
snapshot_dir: Option<PathBuf>,
import_snapshot: Option<PathBuf>,
snapshot_interval: Option<Duration>,
@ -132,12 +158,12 @@ impl IndexControllerBuilder {
let index_size = self
.max_index_size
.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
let update_store_size = self
.max_index_size
let task_store_size = self
.max_task_store_size
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
if let Some(ref path) = self.import_snapshot {
info!("Loading from snapshot {:?}", path);
log::info!("Loading from snapshot {:?}", path);
load_snapshot(
db_path.as_ref(),
path,
@ -149,67 +175,84 @@ impl IndexControllerBuilder {
db_path.as_ref(),
src_path,
index_size,
update_store_size,
task_store_size,
&indexer_options,
)?;
}
std::fs::create_dir_all(db_path.as_ref())?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(task_store_size);
options.max_dbs(20);
let meta_env = options.open(&db_path)?;
let update_file_store = UpdateFileStore::new(&db_path)?;
let index_resolver = Arc::new(create_index_resolver(
&db_path,
index_size,
&indexer_options,
meta_env.clone(),
update_file_store.clone(),
)?);
#[allow(unreachable_code)]
let update_sender =
updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?;
let task_store =
create_task_store(meta_env, index_resolver.clone()).map_err(|e| anyhow::anyhow!(e))?;
let dump_path = self
.dump_dst
.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?;
let analytics_path = db_path.as_ref().join("instance-uid");
let dump_handle = dump_actor::DumpActorHandleImpl::new(
dump_path,
analytics_path,
index_resolver.clone(),
update_sender.clone(),
index_size,
update_store_size,
)?;
let dump_handle = {
let analytics_path = &db_path;
let (sender, receiver) = mpsc::channel(10);
let actor = DumpActor::new(
receiver,
update_file_store.clone(),
task_store.clone(),
dump_path,
analytics_path,
index_size,
task_store_size,
);
let dump_handle = Arc::new(dump_handle);
tokio::task::spawn(actor.run());
DumpActorHandleImpl { sender }
};
if self.schedule_snapshot {
let snapshot_service = SnapshotService::new(
index_resolver.clone(),
update_sender.clone(),
self.snapshot_interval
.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?,
self.snapshot_dir
.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?,
db_path.as_ref().into(),
db_path
.as_ref()
.file_name()
.map(|n| n.to_owned().into_string().expect("invalid path"))
.unwrap_or_else(|| String::from("data.ms")),
);
let snapshot_period = self
.snapshot_interval
.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?;
let snapshot_path = self
.snapshot_dir
.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?;
let snapshot_service = SnapshotService {
db_path: db_path.as_ref().to_path_buf(),
snapshot_period,
snapshot_path,
index_size,
meta_env_size: task_store_size,
task_store: task_store.clone(),
};
tokio::task::spawn(snapshot_service.run());
}
Ok(IndexController {
index_resolver,
update_sender,
task_store,
dump_handle,
update_file_store,
})
}
/// Set the index controller builder's max update store size.
pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
self.max_update_store_size.replace(max_update_store_size);
pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
self.max_task_store_size.replace(max_update_store_size);
self
}
@ -270,61 +313,133 @@ impl IndexControllerBuilder {
}
}
// We are using derivative here to derive Clone, because U, I and D do not necessarily implement
// Clone themselves.
#[derive(derivative::Derivative)]
#[derivative(Clone(bound = ""))]
pub struct IndexController<U, I, D> {
index_resolver: Arc<IndexResolver<U, I>>,
update_sender: updates::UpdateSender,
dump_handle: Arc<D>,
}
impl<U, I, D> IndexController<U, I, D>
impl<U, I> IndexController<U, I>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
D: DumpActorHandle + Send + Sync,
U: IndexMetaStore,
I: IndexStore,
{
pub fn builder() -> IndexControllerBuilder {
IndexControllerBuilder::default()
}
pub async fn register_update(
&self,
uid: String,
update: Update,
create_index: bool,
) -> Result<UpdateStatus> {
match self.index_resolver.get_uuid(uid).await {
Ok(uuid) => {
let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?;
Ok(update_result)
pub async fn register_update(&self, uid: String, update: Update) -> Result<Task> {
let uid = IndexUid::new(uid)?;
let content = match update {
Update::DeleteDocuments(ids) => {
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids))
}
Err(IndexResolverError::UnexistingIndex(name)) => {
if create_index {
let index = self.index_resolver.create_index(name, None).await?;
let update_result =
UpdateMsg::update(&self.update_sender, index.uuid(), update).await?;
Ok(update_result)
} else {
Err(IndexResolverError::UnexistingIndex(name).into())
Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear),
Update::Settings {
settings,
is_deletion,
} => TaskContent::SettingsUpdate {
settings,
is_deletion,
},
Update::DocumentAddition {
mut payload,
primary_key,
format,
method,
} => {
let mut buffer = Vec::new();
while let Some(bytes) = payload.next().await {
let bytes = bytes?;
buffer.extend_from_slice(&bytes);
}
let (content_uuid, mut update_file) = self.update_file_store.new_update()?;
let documents_count = tokio::task::spawn_blocking(move || -> Result<_> {
// check if the payload is empty, and return an error
if buffer.is_empty() {
return Err(IndexControllerError::MissingPayload(format));
}
let reader = Cursor::new(buffer);
let count = match format {
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
};
update_file.persist()?;
Ok(count)
})
.await??;
TaskContent::DocumentAddition {
content_uuid,
merge_strategy: method,
primary_key,
documents_count,
}
}
Err(e) => Err(e.into()),
Update::DeleteIndex => TaskContent::IndexDeletion,
Update::CreateIndex { primary_key } => TaskContent::IndexCreation { primary_key },
Update::UpdateIndex { primary_key } => TaskContent::IndexUpdate { primary_key },
};
let task = self.task_store.register(uid, content).await?;
Ok(task)
}
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
let task = self.task_store.get_task(id, filter).await?;
Ok(task)
}
pub async fn get_index_task(&self, index_uid: String, task_id: TaskId) -> Result<Task> {
let creation_task_id = self
.index_resolver
.get_index_creation_task_id(index_uid.clone())
.await?;
if task_id < creation_task_id {
return Err(TaskError::UnexistingTask(task_id).into());
}
let mut filter = TaskFilter::default();
filter.filter_index(index_uid);
let task = self.task_store.get_task(task_id, Some(filter)).await?;
Ok(task)
}
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
let uuid = self.index_resolver.get_uuid(uid).await?;
let result = UpdateMsg::get_update(&self.update_sender, uuid, id).await?;
Ok(result)
pub async fn list_tasks(
&self,
filter: Option<TaskFilter>,
limit: Option<usize>,
offset: Option<TaskId>,
) -> Result<Vec<Task>> {
let tasks = self.task_store.list_tasks(offset, filter, limit).await?;
Ok(tasks)
}
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
let uuid = self.index_resolver.get_uuid(uid).await?;
let result = UpdateMsg::list_updates(&self.update_sender, uuid).await?;
Ok(result)
pub async fn list_index_task(
&self,
index_uid: String,
limit: Option<usize>,
offset: Option<TaskId>,
) -> Result<Vec<Task>> {
let task_id = self
.index_resolver
.get_index_creation_task_id(index_uid.clone())
.await?;
let mut filter = TaskFilter::default();
filter.filter_index(index_uid);
let tasks = self
.task_store
.list_tasks(
Some(offset.unwrap_or_default() + task_id),
Some(filter),
limit,
)
.await?;
Ok(tasks)
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
@ -377,28 +492,8 @@ where
Ok(document)
}
pub async fn update_index(
&self,
uid: String,
mut index_settings: IndexSettings,
) -> Result<IndexMetadata> {
index_settings.uid.take();
let index = self.index_resolver.get_index(uid.clone()).await?;
let uuid = index.uuid();
let meta =
spawn_blocking(move || index.update_primary_key(index_settings.primary_key)).await??;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
let index = self.index_resolver.get_index(uid.clone()).await?;
let index = self.index_resolver.get_index(uid).await?;
let result = spawn_blocking(move || index.perform_search(query)).await??;
Ok(result)
}
@ -417,45 +512,50 @@ where
}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
let index = self.index_resolver.get_index(uid).await?;
let uuid = index.uuid();
let mut stats = spawn_blocking(move || index.stats()).await??;
let last_task = self.task_store.get_processing_task().await?;
// Check if the currently indexing update is from our index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
let is_indexing = last_task
.map(|task| task.index_uid.into_inner() == uid)
.unwrap_or_default();
let index = self.index_resolver.get_index(uid).await?;
let mut stats = spawn_blocking(move || index.stats()).await??;
stats.is_indexing = Some(is_indexing);
Ok(stats)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
let mut database_size = self.index_resolver.get_uuids_size().await? + update_infos.size;
let mut last_update: Option<DateTime<_>> = None;
let mut last_task: Option<DateTime<_>> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let processing_task = self.task_store.get_processing_task().await?;
for (index_uid, index) in self.index_resolver.list().await? {
let uuid = index.uuid();
let (mut stats, meta) = spawn_blocking::<_, IndexResult<_>>(move || {
let stats = index.stats()?;
let meta = index.meta()?;
Ok((stats, meta))
})
.await??;
let (mut stats, meta) =
spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || {
Ok((index.stats()?, index.meta()?))
})
.await??;
database_size += stats.size;
last_update = last_update.map_or(Some(meta.updated_at), |last| {
last_task = last_task.map_or(Some(meta.updated_at), |last| {
Some(last.max(meta.updated_at))
});
// Check if the currently indexing update is from our index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
stats.is_indexing = processing_task
.as_ref()
.map(|p| p.index_uid.as_str() == index_uid)
.or(Some(false));
indexes.insert(index_uid, stats);
}
Ok(Stats {
database_size,
last_update,
last_update: last_task,
indexes,
})
}
@ -467,41 +567,6 @@ where
pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
Ok(self.dump_handle.dump_info(uid).await?)
}
pub async fn create_index(
&self,
uid: String,
primary_key: Option<String>,
) -> Result<IndexMetadata> {
let index = self
.index_resolver
.create_index(uid.clone(), primary_key)
.await?;
let meta = spawn_blocking(move || -> IndexResult<_> {
let meta = index.meta()?;
let meta = IndexMetadata {
uuid: index.uuid(),
uid: uid.clone(),
name: uid,
meta,
};
Ok(meta)
})
.await??;
Ok(meta)
}
pub async fn delete_index(&self, uid: String) -> Result<()> {
let uuid = self.index_resolver.delete_index(uid).await?;
let update_sender = self.update_sender.clone();
tokio::spawn(async move {
let _ = UpdateMsg::delete(&update_sender, uuid).await;
});
Ok(())
}
}
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
@ -521,28 +586,28 @@ pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
mod test {
use futures::future::ok;
use mockall::predicate::eq;
use tokio::sync::mpsc;
use nelson::Mocker;
use crate::index::error::Result as IndexResult;
use crate::index::test::Mocker;
use crate::index::Index;
use crate::index_controller::dump_actor::MockDumpActorHandle;
use crate::index_controller::index_resolver::index_store::MockIndexStore;
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
use crate::index_resolver::index_store::MockIndexStore;
use crate::index_resolver::meta_store::MockIndexMetaStore;
use crate::index_resolver::IndexResolver;
use super::updates::UpdateSender;
use super::*;
impl<D: DumpActorHandle> IndexController<MockUuidStore, MockIndexStore, D> {
impl IndexController<MockIndexMetaStore, MockIndexStore> {
pub fn mock(
index_resolver: IndexResolver<MockUuidStore, MockIndexStore>,
update_sender: UpdateSender,
dump_handle: D,
index_resolver: IndexResolver<MockIndexMetaStore, MockIndexStore>,
task_store: TaskStore,
update_file_store: UpdateFileStore,
dump_handle: DumpActorHandleImpl,
) -> Self {
IndexController {
index_resolver: Arc::new(index_resolver),
update_sender,
dump_handle: Arc::new(dump_handle),
task_store,
dump_handle,
update_file_store,
}
}
}
@ -577,11 +642,19 @@ mod test {
exhaustive_facets_count: Some(true),
};
let mut uuid_store = MockUuidStore::new();
let mut uuid_store = MockIndexMetaStore::new();
uuid_store
.expect_get_uuid()
.expect_get()
.with(eq(index_uid.to_owned()))
.returning(move |s| Box::pin(ok((s, Some(index_uuid)))));
.returning(move |s| {
Box::pin(ok((
s,
Some(crate::index_resolver::meta_store::IndexMeta {
uuid: index_uuid,
creation_task_id: 0,
}),
)))
});
let mut index_store = MockIndexStore::new();
let result_clone = result.clone();
@ -600,14 +673,20 @@ mod test {
assert_eq!(&q, &query);
Ok(result.clone())
});
let index = Index::faux(mocker);
let index = Index::mock(mocker);
Box::pin(ok(Some(index)))
});
let index_resolver = IndexResolver::new(uuid_store, index_store);
let (update_sender, _) = mpsc::channel(1);
let dump_actor = MockDumpActorHandle::new();
let index_controller = IndexController::mock(index_resolver, update_sender, dump_actor);
let task_store_mocker = nelson::Mocker::default();
let mocker = Mocker::default();
let update_file_store = UpdateFileStore::mock(mocker);
let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store.clone());
let task_store = TaskStore::mock(task_store_mocker);
// let dump_actor = MockDumpActorHandle::new();
let (sender, _) = mpsc::channel(1);
let dump_handle = DumpActorHandleImpl { sender };
let index_controller =
IndexController::mock(index_resolver, task_store, update_file_store, dump_handle);
let r = index_controller
.search(index_uid.to_owned(), query.clone())

View file

@ -1,312 +0,0 @@
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::bail;
use log::{error, info, trace};
use tokio::fs;
use tokio::task::spawn_blocking;
use tokio::time::sleep;
use crate::analytics;
use crate::compression::from_tar_gz;
use crate::index_controller::updates::UpdateMsg;
use super::index_resolver::index_store::IndexStore;
use super::index_resolver::uuid_store::UuidStore;
use super::index_resolver::IndexResolver;
use super::updates::UpdateSender;
pub struct SnapshotService<U, I> {
index_resolver: Arc<IndexResolver<U, I>>,
update_sender: UpdateSender,
snapshot_period: Duration,
snapshot_path: PathBuf,
db_path: PathBuf,
db_name: String,
}
impl<U, I> SnapshotService<U, I>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
pub fn new(
index_resolver: Arc<IndexResolver<U, I>>,
update_sender: UpdateSender,
snapshot_period: Duration,
snapshot_path: PathBuf,
db_path: PathBuf,
db_name: String,
) -> Self {
Self {
index_resolver,
update_sender,
snapshot_period,
snapshot_path,
db_path,
db_name,
}
}
pub async fn run(self) {
info!(
"Snapshot scheduled every {}s.",
self.snapshot_period.as_secs()
);
loop {
if let Err(e) = self.perform_snapshot().await {
error!("Error while performing snapshot: {}", e);
}
sleep(self.snapshot_period).await;
}
}
async fn perform_snapshot(&self) -> anyhow::Result<()> {
trace!("Performing snapshot.");
let snapshot_dir = self.snapshot_path.clone();
fs::create_dir_all(&snapshot_dir).await?;
let temp_snapshot_dir = spawn_blocking(tempfile::tempdir).await??;
let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
let indexes = self
.index_resolver
.snapshot(temp_snapshot_path.clone())
.await?;
analytics::copy_user_id(&self.db_path, &temp_snapshot_path.clone());
if indexes.is_empty() {
return Ok(());
}
UpdateMsg::snapshot(&self.update_sender, temp_snapshot_path.clone(), indexes).await?;
let snapshot_path = self
.snapshot_path
.join(format!("{}.snapshot", self.db_name));
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
temp_snapshot_file.persist(&snapshot_path)?;
Ok(snapshot_path)
})
.await??;
trace!("Created snapshot in {:?}.", snapshot_path);
Ok(())
}
}
pub fn load_snapshot(
db_path: impl AsRef<Path>,
snapshot_path: impl AsRef<Path>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
) -> anyhow::Result<()> {
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
match from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()),
Err(e) => {
//clean created db folder
std::fs::remove_dir_all(&db_path)?;
Err(e)
}
}
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
db_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| db_path.as_ref().to_owned())
)
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
bail!(
"snapshot doesn't exist at {:?}",
snapshot_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
)
} else {
Ok(())
}
}
#[cfg(test)]
mod test {
use std::{collections::HashSet, sync::Arc};
use futures::future::{err, ok};
use once_cell::sync::Lazy;
use rand::Rng;
use uuid::Uuid;
use crate::index::error::IndexError;
use crate::index::test::Mocker;
use crate::index::{error::Result as IndexResult, Index};
use crate::index_controller::index_resolver::error::IndexResolverError;
use crate::index_controller::index_resolver::index_store::MockIndexStore;
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
use crate::index_controller::index_resolver::IndexResolver;
use crate::index_controller::updates::create_update_handler;
use super::*;
fn setup() {
static SETUP: Lazy<()> = Lazy::new(|| {
if cfg!(windows) {
std::env::set_var("TMP", ".");
} else {
std::env::set_var("TMPDIR", ".");
}
});
// just deref to make sure the env is setup
*SETUP
}
#[actix_rt::test]
async fn test_normal() {
setup();
let mut rng = rand::thread_rng();
let uuids_num: usize = rng.gen_range(5..10);
let uuids = (0..uuids_num)
.map(|_| Uuid::new_v4())
.collect::<HashSet<_>>();
let mut uuid_store = MockUuidStore::new();
let uuids_clone = uuids.clone();
uuid_store
.expect_snapshot()
.times(1)
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
let mut indexes = uuids.clone().into_iter().map(|uuid| {
let mocker = Mocker::default();
mocker
.when("snapshot")
.times(1)
.then(|_: &Path| -> IndexResult<()> { Ok(()) });
mocker.when("uuid").then(move |_: ()| uuid);
Index::faux(mocker)
});
let uuids_clone = uuids.clone();
let mut index_store = MockIndexStore::new();
index_store
.expect_get()
.withf(move |uuid| uuids_clone.contains(uuid))
.times(uuids_num)
.returning(move |_| Box::pin(ok(Some(indexes.next().unwrap()))));
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let dir = tempfile::tempdir().unwrap();
let update_sender =
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir().unwrap();
let snapshot_service = SnapshotService::new(
index_resolver,
update_sender,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
// this should do nothing
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
snapshot_service.perform_snapshot().await.unwrap();
}
#[actix_rt::test]
async fn error_performing_uuid_snapshot() {
setup();
let mut uuid_store = MockUuidStore::new();
uuid_store.expect_snapshot().once().returning(move |_| {
Box::pin(err(IndexResolverError::IndexAlreadyExists(
"test".to_string(),
)))
});
let mut index_store = MockIndexStore::new();
index_store.expect_get().never();
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let dir = tempfile::tempdir().unwrap();
let update_sender =
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir().unwrap();
let snapshot_service = SnapshotService::new(
index_resolver,
update_sender,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
// this should do nothing
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
}
#[actix_rt::test]
async fn error_performing_index_snapshot() {
setup();
let uuids: HashSet<Uuid> = vec![Uuid::new_v4()].into_iter().collect();
let mut uuid_store = MockUuidStore::new();
let uuids_clone = uuids.clone();
uuid_store
.expect_snapshot()
.once()
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
let mut indexes = uuids.clone().into_iter().map(|uuid| {
let mocker = Mocker::default();
// index returns random error
mocker.when("snapshot").then(|_: &Path| -> IndexResult<()> {
Err(IndexError::DocumentNotFound("1".to_string()))
});
mocker.when("uuid").then(move |_: ()| uuid);
Index::faux(mocker)
});
let uuids_clone = uuids.clone();
let mut index_store = MockIndexStore::new();
index_store
.expect_get()
.withf(move |uuid| uuids_clone.contains(uuid))
.once()
.returning(move |_| Box::pin(ok(Some(indexes.next().unwrap()))));
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let dir = tempfile::tempdir().unwrap();
let update_sender =
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir().unwrap();
let snapshot_service = SnapshotService::new(
index_resolver,
update_sender,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
// this should do nothing
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
}
}

View file

@ -1,177 +0,0 @@
use std::fs::{create_dir_all, File};
use std::io::{self, BufReader, BufWriter, Write};
use std::ops::{Deref, DerefMut};
use std::path::{Path, PathBuf};
use milli::documents::DocumentBatchReader;
use serde_json::Map;
use tempfile::{NamedTempFile, PersistError};
use uuid::Uuid;
const UPDATE_FILES_PATH: &str = "updates/updates_files";
use crate::document_formats::read_ndjson;
pub struct UpdateFile {
path: PathBuf,
file: NamedTempFile,
}
#[derive(Debug, thiserror::Error)]
#[error("Error while persisting update to disk: {0}")]
pub struct UpdateFileStoreError(Box<dyn std::error::Error + Sync + Send + 'static>);
type Result<T> = std::result::Result<T, UpdateFileStoreError>;
macro_rules! into_update_store_error {
($($other:path),*) => {
$(
impl From<$other> for UpdateFileStoreError {
fn from(other: $other) -> Self {
Self(Box::new(other))
}
}
)*
};
}
into_update_store_error!(
PersistError,
io::Error,
serde_json::Error,
milli::documents::Error
);
impl UpdateFile {
pub fn persist(self) -> Result<()> {
self.file.persist(&self.path)?;
Ok(())
}
}
impl Deref for UpdateFile {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for UpdateFile {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[derive(Clone, Debug)]
pub struct UpdateFileStore {
path: PathBuf,
}
impl UpdateFileStore {
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH);
let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH);
// No update files to load
if !src_update_files_path.exists() {
return Ok(());
}
create_dir_all(&dst_update_files_path)?;
let entries = std::fs::read_dir(src_update_files_path)?;
for entry in entries {
let entry = entry?;
let update_file = BufReader::new(File::open(entry.path())?);
let file_uuid = entry.file_name();
let file_uuid = file_uuid
.to_str()
.ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
let dst_path = dst_update_files_path.join(file_uuid);
let dst_file = BufWriter::new(File::create(dst_path)?);
read_ndjson(update_file, dst_file)?;
}
Ok(())
}
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&path)?;
Ok(Self { path })
}
/// Creates a new temporary update file.
///
/// A call to `persist` is needed to persist the file in the database.
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::new_v4();
let path = self.path.join(uuid.to_string());
let update_file = UpdateFile { file, path };
Ok((uuid, update_file))
}
/// Returns the file corresponding to the requested uuid.
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
let path = self.path.join(uuid.to_string());
let file = File::open(path)?;
Ok(file)
}
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
let src = self.path.join(uuid.to_string());
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&dst)?;
dst.push(uuid.to_string());
std::fs::copy(src, dst)?;
Ok(())
}
/// Peforms a dump of the given update file uuid into the provided dump path.
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
let uuid_string = uuid.to_string();
let update_file_path = self.path.join(&uuid_string);
let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&dst)?;
dst.push(&uuid_string);
let update_file = File::open(update_file_path)?;
let mut dst_file = NamedTempFile::new_in(&dump_path)?;
let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
let mut document_buffer = Map::new();
// TODO: we need to find a way to do this more efficiently. (create a custom serializer
// for jsonl for example...)
while let Some((index, document)) = document_reader.next_document_with_index()? {
for (field_id, content) in document.iter() {
if let Some(field_name) = index.name(field_id) {
let content = serde_json::from_slice(content)?;
document_buffer.insert(field_name.to_string(), content);
}
}
serde_json::to_writer(&mut dst_file, &document_buffer)?;
dst_file.write_all(b"\n")?;
document_buffer.clear();
}
dst_file.persist(dst)?;
Ok(())
}
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
Ok(self.get_update(uuid)?.metadata()?.len())
}
pub fn delete(&self, uuid: Uuid) -> Result<()> {
let path = self.path.join(uuid.to_string());
std::fs::remove_file(path)?;
Ok(())
}
}

View file

@ -1,113 +0,0 @@
use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::index::Index;
use super::error::Result;
use super::{Update, UpdateStatus, UpdateStoreInfo};
#[derive(Debug)]
pub enum UpdateMsg {
Update {
uuid: Uuid,
update: Update,
ret: oneshot::Sender<Result<UpdateStatus>>,
},
ListUpdates {
uuid: Uuid,
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
},
GetUpdate {
uuid: Uuid,
ret: oneshot::Sender<Result<UpdateStatus>>,
id: u64,
},
DeleteIndex {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
Snapshot {
indexes: Vec<Index>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
Dump {
indexes: Vec<Index>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
GetInfo {
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
},
}
impl UpdateMsg {
pub async fn snapshot(
sender: &mpsc::Sender<Self>,
path: PathBuf,
indexes: Vec<Index>,
) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Snapshot { path, indexes, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn dump(
sender: &mpsc::Sender<Self>,
indexes: Vec<Index>,
path: PathBuf,
) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Dump { path, indexes, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn update(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
update: Update,
) -> Result<UpdateStatus> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Update { uuid, update, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn get_update(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
id: u64,
) -> Result<UpdateStatus> {
let (ret, rcv) = oneshot::channel();
let msg = Self::GetUpdate { uuid, id, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn list_updates(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
) -> Result<Vec<UpdateStatus>> {
let (ret, rcv) = oneshot::channel();
let msg = Self::ListUpdates { uuid, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn get_info(sender: &mpsc::Sender<Self>) -> Result<UpdateStoreInfo> {
let (ret, rcv) = oneshot::channel();
let msg = Self::GetInfo { ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn delete(sender: &mpsc::Sender<Self>, uuid: Uuid) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::DeleteIndex { ret, uuid };
sender.send(msg).await?;
rcv.await?
}
}

View file

@ -1,266 +0,0 @@
pub mod error;
mod message;
pub mod status;
pub mod store;
use std::io::Cursor;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use async_stream::stream;
use futures::StreamExt;
use log::trace;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use uuid::Uuid;
use self::error::{Result, UpdateLoopError};
pub use self::message::UpdateMsg;
use self::store::{UpdateStore, UpdateStoreInfo};
use crate::document_formats::{read_csv, read_json, read_ndjson};
use crate::index::{Index, Settings, Unchecked};
use crate::index_controller::update_file_store::UpdateFileStore;
use status::UpdateStatus;
use super::index_resolver::index_store::IndexStore;
use super::index_resolver::uuid_store::UuidStore;
use super::index_resolver::IndexResolver;
use super::{DocumentAdditionFormat, Update};
pub type UpdateSender = mpsc::Sender<UpdateMsg>;
pub fn create_update_handler<U, I>(
index_resolver: Arc<IndexResolver<U, I>>,
db_path: impl AsRef<Path>,
update_store_size: usize,
) -> anyhow::Result<UpdateSender>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
let path = db_path.as_ref().to_owned();
let (sender, receiver) = mpsc::channel(100);
let actor = UpdateLoop::new(update_store_size, receiver, path, index_resolver)?;
tokio::task::spawn(actor.run());
Ok(sender)
}
pub struct UpdateLoop {
store: Arc<UpdateStore>,
inbox: Option<mpsc::Receiver<UpdateMsg>>,
update_file_store: UpdateFileStore,
must_exit: Arc<AtomicBool>,
}
impl UpdateLoop {
pub fn new<U, I>(
update_db_size: usize,
inbox: mpsc::Receiver<UpdateMsg>,
path: impl AsRef<Path>,
index_resolver: Arc<IndexResolver<U, I>>,
) -> anyhow::Result<Self>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
let path = path.as_ref().to_owned();
std::fs::create_dir_all(&path)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(update_db_size);
let must_exit = Arc::new(AtomicBool::new(false));
let update_file_store = UpdateFileStore::new(&path).unwrap();
let store = UpdateStore::open(
options,
&path,
index_resolver,
must_exit.clone(),
update_file_store.clone(),
)?;
let inbox = Some(inbox);
Ok(Self {
store,
inbox,
must_exit,
update_file_store,
})
}
pub async fn run(mut self) {
use UpdateMsg::*;
trace!("Started update actor.");
let mut inbox = self
.inbox
.take()
.expect("A receiver should be present by now.");
let must_exit = self.must_exit.clone();
let stream = stream! {
loop {
let msg = inbox.recv().await;
if must_exit.load(std::sync::atomic::Ordering::Relaxed) {
break;
}
match msg {
Some(msg) => yield msg,
None => break,
}
}
};
stream
.for_each_concurrent(Some(10), |msg| async {
match msg {
Update { uuid, update, ret } => {
let _ = ret.send(self.handle_update(uuid, update).await);
}
ListUpdates { uuid, ret } => {
let _ = ret.send(self.handle_list_updates(uuid).await);
}
GetUpdate { uuid, ret, id } => {
let _ = ret.send(self.handle_get_update(uuid, id).await);
}
DeleteIndex { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Snapshot { indexes, path, ret } => {
let _ = ret.send(self.handle_snapshot(indexes, path).await);
}
GetInfo { ret } => {
let _ = ret.send(self.handle_get_info().await);
}
Dump { indexes, path, ret } => {
let _ = ret.send(self.handle_dump(indexes, path).await);
}
}
})
.await;
}
async fn handle_update(&self, index_uuid: Uuid, update: Update) -> Result<UpdateStatus> {
let registration = match update {
Update::DocumentAddition {
mut payload,
primary_key,
method,
format,
} => {
let mut buffer = Vec::new();
while let Some(bytes) = payload.next().await {
match bytes {
Ok(bytes) => {
buffer.extend_from_slice(&bytes);
}
Err(e) => return Err(e.into()),
}
}
let (content_uuid, mut update_file) = self.update_file_store.new_update()?;
tokio::task::spawn_blocking(move || -> Result<_> {
// check if the payload is empty, and return an error
if buffer.is_empty() {
return Err(UpdateLoopError::MissingPayload(format));
}
let reader = Cursor::new(buffer);
match format {
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
}
update_file.persist()?;
Ok(())
})
.await??;
store::Update::DocumentAddition {
primary_key,
method,
content_uuid,
}
}
Update::Settings(settings) => store::Update::Settings(settings),
Update::ClearDocuments => store::Update::ClearDocuments,
Update::DeleteDocuments(ids) => store::Update::DeleteDocuments(ids),
};
let store = self.store.clone();
let status =
tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration))
.await??;
Ok(status.into())
}
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = update_store.list(uuid)?;
Ok(result)
})
.await?
}
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = store
.meta(uuid, id)?
.ok_or(UpdateLoopError::UnexistingUpdate(id))?;
Ok(result)
})
.await?
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
Ok(())
}
async fn handle_snapshot(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || update_store.snapshot(indexes, path)).await??;
Ok(())
}
async fn handle_dump(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> Result<()> {
update_store.dump(&indexes, path.to_path_buf())?;
Ok(())
})
.await??;
Ok(())
}
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone();
let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> {
let info = update_store.get_info()?;
Ok(info)
})
.await??;
Ok(info)
}
}

View file

@ -1,251 +0,0 @@
use std::{error::Error, fmt::Display};
use chrono::{DateTime, Utc};
use meilisearch_error::{Code, ErrorCode};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use serde::{Deserialize, Serialize};
use crate::{
index::{Settings, Unchecked},
Update,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments {
ids: Vec<String>,
},
Settings(Settings<Unchecked>),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: Update,
pub enqueued_at: DateTime<Utc>,
}
impl Enqueued {
pub fn new(meta: Update, update_id: u64) -> Self {
Self {
enqueued_at: Utc::now(),
meta,
update_id,
}
}
pub fn processing(self) -> Processing {
Processing {
from: self,
started_processing_at: Utc::now(),
}
}
pub fn abort(self) -> Aborted {
Aborted {
from: self,
aborted_at: Utc::now(),
}
}
pub fn meta(&self) -> &Update {
&self.meta
}
pub fn id(&self) -> u64 {
self.update_id
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: UpdateResult,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing,
}
impl Processed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
pub started_processing_at: DateTime<Utc>,
}
impl Processing {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
pub fn process(self, success: UpdateResult) -> Processed {
Processed {
success,
from: self,
processed_at: Utc::now(),
}
}
pub fn fail(self, error: impl ErrorCode) -> Failed {
let msg = error.to_string();
let code = error.error_code();
Failed {
from: self,
msg,
code,
failed_at: Utc::now(),
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Aborted {
#[serde(flatten)]
pub from: Enqueued,
pub aborted_at: DateTime<Utc>,
}
impl Aborted {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub msg: String,
pub code: Code,
pub failed_at: DateTime<Utc>,
}
impl Display for Failed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.msg.fmt(f)
}
}
impl Error for Failed {}
impl ErrorCode for Failed {
fn error_code(&self) -> Code {
self.code
}
}
impl Failed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Aborted(Aborted),
Failed(Failed),
}
impl UpdateStatus {
pub fn id(&self) -> u64 {
match self {
UpdateStatus::Processing(u) => u.id(),
UpdateStatus::Enqueued(u) => u.id(),
UpdateStatus::Processed(u) => u.id(),
UpdateStatus::Aborted(u) => u.id(),
UpdateStatus::Failed(u) => u.id(),
}
}
pub fn meta(&self) -> &Update {
match self {
UpdateStatus::Processing(u) => u.meta(),
UpdateStatus::Enqueued(u) => u.meta(),
UpdateStatus::Processed(u) => u.meta(),
UpdateStatus::Aborted(u) => u.meta(),
UpdateStatus::Failed(u) => u.meta(),
}
}
pub fn processed(&self) -> Option<&Processed> {
match self {
UpdateStatus::Processed(p) => Some(p),
_ => None,
}
}
}
impl From<Enqueued> for UpdateStatus {
fn from(other: Enqueued) -> Self {
Self::Enqueued(other)
}
}
impl From<Aborted> for UpdateStatus {
fn from(other: Aborted) -> Self {
Self::Aborted(other)
}
}
impl From<Processed> for UpdateStatus {
fn from(other: Processed) -> Self {
Self::Processed(other)
}
}
impl From<Processing> for UpdateStatus {
fn from(other: Processing) -> Self {
Self::Processing(other)
}
}
impl From<Failed> for UpdateStatus {
fn from(other: Failed) -> Self {
Self::Failed(other)
}
}

View file

@ -1,86 +0,0 @@
use std::{borrow::Cow, convert::TryInto, mem::size_of};
use heed::{BytesDecode, BytesEncode};
use uuid::Uuid;
pub struct NextIdCodec;
pub enum NextIdKey {
Global,
Index(Uuid),
}
impl<'a> BytesEncode<'a> for NextIdCodec {
type EItem = NextIdKey;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
match item {
NextIdKey::Global => Some(Cow::Borrowed(b"__global__")),
NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())),
}
}
}
pub struct PendingKeyCodec;
impl<'a> BytesEncode<'a> for PendingKeyCodec {
type EItem = (u64, Uuid, u64);
fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
bytes.extend_from_slice(&global_id.to_be_bytes());
bytes.extend_from_slice(uuid.as_bytes());
bytes.extend_from_slice(&update_id.to_be_bytes());
Some(Cow::Owned(bytes))
}
}
impl<'a> BytesDecode<'a> for PendingKeyCodec {
type DItem = (u64, Uuid, u64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?;
let global_id = u64::from_be_bytes(global_id_bytes);
let uuid_bytes = bytes
.get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))?
.try_into()
.ok()?;
let uuid = Uuid::from_bytes(uuid_bytes);
let update_id_bytes = bytes
.get((size_of::<u64>() + size_of::<Uuid>())..)?
.try_into()
.ok()?;
let update_id = u64::from_be_bytes(update_id_bytes);
Some((global_id, uuid, update_id))
}
}
pub struct UpdateKeyCodec;
impl<'a> BytesEncode<'a> for UpdateKeyCodec {
type EItem = (Uuid, u64);
fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
bytes.extend_from_slice(uuid.as_bytes());
bytes.extend_from_slice(&update_id.to_be_bytes());
Some(Cow::Owned(bytes))
}
}
impl<'a> BytesDecode<'a> for UpdateKeyCodec {
type DItem = (Uuid, u64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?;
let uuid = Uuid::from_bytes(uuid_bytes);
let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?;
let update_id = u64::from_be_bytes(update_id_bytes);
Some((uuid, update_id))
}
}

View file

@ -1,157 +0,0 @@
use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::io::{BufReader, Write};
use std::path::{Path, PathBuf};
use heed::{EnvOpenOptions, RoTxn};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use serde_json::Deserializer;
use tempfile::{NamedTempFile, TempDir};
use uuid::Uuid;
use super::{Result, State, UpdateStore};
use crate::{
index::Index,
index_controller::{
update_file_store::UpdateFileStore,
updates::status::{Enqueued, UpdateStatus},
},
Update,
};
#[derive(Serialize, Deserialize)]
pub struct UpdateEntry {
pub uuid: Uuid,
pub update: UpdateStatus,
}
impl UpdateStore {
pub fn dump(&self, indexes: &[Index], path: PathBuf) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Dumping);
// txn must *always* be acquired after state lock, or it will dead lock.
let txn = self.env.write_txn()?;
let uuids = indexes.iter().map(|i| i.uuid()).collect();
self.dump_updates(&txn, &uuids, &path)?;
indexes
.par_iter()
.try_for_each(|index| index.dump(&path))
.unwrap();
Ok(())
}
fn dump_updates(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
path: impl AsRef<Path>,
) -> Result<()> {
let mut dump_data_file = NamedTempFile::new_in(&path)?;
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
self.dump_completed(txn, uuids, &mut dump_data_file)?;
let mut dst_path = path.as_ref().join("updates");
create_dir_all(&dst_path)?;
dst_path.push("data.jsonl");
dump_data_file.persist(dst_path).unwrap();
Ok(())
}
fn dump_pending(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
mut file: impl Write,
dst_path: impl AsRef<Path>,
) -> Result<()> {
let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
for pending in pendings {
let ((_, uuid, _), data) = pending?;
if uuids.contains(&uuid) {
let update = data.decode()?;
if let Enqueued {
meta: Update::DocumentAddition { content_uuid, .. },
..
} = update
{
self.update_file_store
.dump(content_uuid, &dst_path)
.unwrap();
}
let update_json = UpdateEntry {
uuid,
update: update.into(),
};
serde_json::to_writer(&mut file, &update_json)?;
file.write_all(b"\n")?;
}
}
Ok(())
}
fn dump_completed(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
mut file: impl Write,
) -> Result<()> {
let updates = self.updates.iter(txn)?.lazily_decode_data();
for update in updates {
let ((uuid, _), data) = update?;
if uuids.contains(&uuid) {
let update = data.decode()?;
let update_json = UpdateEntry { uuid, update };
serde_json::to_writer(&mut file, &update_json)?;
file.write_all(b"\n")?;
}
}
Ok(())
}
pub fn load_dump(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
db_size: usize,
) -> anyhow::Result<()> {
let mut options = EnvOpenOptions::new();
options.map_size(db_size as usize);
// create a dummy update fiel store, since it is not needed right now.
let tmp = TempDir::new().unwrap();
let update_file_store = UpdateFileStore::new(tmp.path()).unwrap();
let (store, _) = UpdateStore::new(options, &dst, update_file_store)?;
let src_update_path = src.as_ref().join("updates");
let update_data = File::open(&src_update_path.join("data.jsonl"))?;
let update_data = BufReader::new(update_data);
let stream = Deserializer::from_reader(update_data).into_iter::<UpdateEntry>();
let mut wtxn = store.env.write_txn()?;
for entry in stream {
let UpdateEntry { uuid, update } = entry?;
store.register_raw_updates(&mut wtxn, &update, uuid)?;
}
wtxn.commit()?;
Ok(())
}
}

View file

@ -1,784 +0,0 @@
mod codec;
pub mod dump;
use std::fs::create_dir_all;
use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::{
collections::{BTreeMap, HashSet},
path::PathBuf,
time::Duration,
};
use arc_swap::ArcSwap;
use heed::types::{ByteSlice, OwnedType, SerdeJson};
use heed::zerocopy::U64;
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
use log::error;
use parking_lot::{Mutex, MutexGuard};
use rayon::prelude::*;
use tokio::runtime::Handle;
use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError;
use tokio::time::timeout;
use uuid::Uuid;
use codec::*;
use super::error::Result;
use super::status::{Enqueued, Processing};
use crate::index::Index;
use crate::index_controller::index_resolver::index_store::IndexStore;
use crate::index_controller::index_resolver::uuid_store::UuidStore;
use crate::index_controller::updates::*;
use crate::EnvSizer;
#[allow(clippy::upper_case_acronyms)]
type BEU64 = U64<heed::byteorder::BE>;
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Update {
DeleteDocuments(Vec<String>),
DocumentAddition {
primary_key: Option<String>,
method: IndexDocumentsMethod,
content_uuid: Uuid,
},
Settings(Settings<Unchecked>),
ClearDocuments,
}
#[derive(Debug)]
pub struct UpdateStoreInfo {
/// Size of the update store in bytes.
pub size: u64,
/// Uuid of the currently processing update if it exists
pub processing: Option<Uuid>,
}
/// A data structure that allows concurrent reads AND exactly one writer.
pub struct StateLock {
lock: Mutex<()>,
data: ArcSwap<State>,
}
pub struct StateLockGuard<'a> {
_lock: MutexGuard<'a, ()>,
state: &'a StateLock,
}
impl StateLockGuard<'_> {
pub fn swap(&self, state: State) -> Arc<State> {
self.state.data.swap(Arc::new(state))
}
}
impl StateLock {
fn from_state(state: State) -> Self {
let lock = Mutex::new(());
let data = ArcSwap::from(Arc::new(state));
Self { lock, data }
}
pub fn read(&self) -> Arc<State> {
self.data.load().clone()
}
pub fn write(&self) -> StateLockGuard {
let _lock = self.lock.lock();
let state = &self;
StateLockGuard { _lock, state }
}
}
#[allow(clippy::large_enum_variant)]
pub enum State {
Idle,
Processing(Uuid, Processing),
Snapshoting,
Dumping,
}
#[derive(Clone)]
pub struct UpdateStore {
pub env: Env,
/// A queue containing the updates to process, ordered by arrival.
/// The key are built as follow:
/// | global_update_id | index_uuid | update_id |
/// | 8-bytes | 16-bytes | 8-bytes |
pending_queue: Database<PendingKeyCodec, SerdeJson<Enqueued>>,
/// Map indexes to the next available update id. If NextIdKey::Global is queried, then the next
/// global update id is returned
next_update_id: Database<NextIdCodec, OwnedType<BEU64>>,
/// Contains all the performed updates meta, be they failed, aborted, or processed.
/// The keys are built as follow:
/// | Uuid | id |
/// | 16-bytes | 8-bytes |
updates: Database<UpdateKeyCodec, SerdeJson<UpdateStatus>>,
/// Indicates the current state of the update store,
state: Arc<StateLock>,
/// Wake up the loop when a new event occurs.
notification_sender: mpsc::Sender<()>,
update_file_store: UpdateFileStore,
path: PathBuf,
}
impl UpdateStore {
fn new(
mut options: EnvOpenOptions,
path: impl AsRef<Path>,
update_file_store: UpdateFileStore,
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
options.max_dbs(5);
let update_path = path.as_ref().join("updates");
std::fs::create_dir_all(&update_path)?;
let env = options.open(update_path)?;
let pending_queue = env.create_database(Some("pending-queue"))?;
let next_update_id = env.create_database(Some("next-update-id"))?;
let updates = env.create_database(Some("updates"))?;
let state = Arc::new(StateLock::from_state(State::Idle));
let (notification_sender, notification_receiver) = mpsc::channel(1);
Ok((
Self {
env,
pending_queue,
next_update_id,
updates,
state,
notification_sender,
path: path.as_ref().to_owned(),
update_file_store,
},
notification_receiver,
))
}
pub fn open<U, I>(
options: EnvOpenOptions,
path: impl AsRef<Path>,
index_resolver: Arc<IndexResolver<U, I>>,
must_exit: Arc<AtomicBool>,
update_file_store: UpdateFileStore,
) -> anyhow::Result<Arc<Self>>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
let (update_store, mut notification_receiver) =
Self::new(options, path, update_file_store)?;
let update_store = Arc::new(update_store);
// Send a first notification to trigger the process.
if let Err(TrySendError::Closed(())) = update_store.notification_sender.try_send(()) {
panic!("Failed to init update store");
}
// We need a weak reference so we can take ownership on the arc later when we
// want to close the index.
let duration = Duration::from_secs(10 * 60); // 10 minutes
let update_store_weak = Arc::downgrade(&update_store);
tokio::task::spawn_local(async move {
// Block and wait for something to process with a timeout. The timeout
// function returns a Result and we must just unlock the loop on Result.
'outer: while timeout(duration, notification_receiver.recv())
.await
.map_or(true, |o| o.is_some())
{
loop {
match update_store_weak.upgrade() {
Some(update_store) => {
let handler = index_resolver.clone();
let res = tokio::task::spawn_blocking(move || {
update_store.process_pending_update(handler)
})
.await
.expect("Fatal error processing update.");
match res {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => {
error!("Fatal error while processing an update that requires the update store to shutdown: {}", e);
must_exit.store(true, Ordering::SeqCst);
break 'outer;
}
}
}
// the ownership on the arc has been taken, we need to exit.
None => break 'outer,
}
}
}
error!("Update store loop exited.");
});
Ok(update_store)
}
/// Returns the next global update id and the next update id for a given `index_uuid`.
fn next_update_id(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<(u64, u64)> {
let global_id = self
.next_update_id
.get(txn, &NextIdKey::Global)?
.map(U64::get)
.unwrap_or_default();
self.next_update_id
.put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?;
let update_id = self.next_update_id_raw(txn, index_uuid)?;
Ok((global_id, update_id))
}
/// Returns the next next update id for a given `index_uuid` without
/// incrementing the global update id. This is useful for the dumps.
fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<u64> {
let update_id = self
.next_update_id
.get(txn, &NextIdKey::Index(index_uuid))?
.map(U64::get)
.unwrap_or_default();
self.next_update_id.put(
txn,
&NextIdKey::Index(index_uuid),
&BEU64::new(update_id + 1),
)?;
Ok(update_id)
}
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(&self, index_uuid: Uuid, update: Update) -> heed::Result<Enqueued> {
let mut txn = self.env.write_txn()?;
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
let meta = Enqueued::new(update, update_id);
self.pending_queue
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
txn.commit()?;
if let Err(TrySendError::Closed(())) = self.notification_sender.try_send(()) {
panic!("Update store loop exited");
}
Ok(meta)
}
/// Push already processed update in the UpdateStore without triggering the notification
/// process. This is useful for the dumps.
pub fn register_raw_updates(
&self,
wtxn: &mut heed::RwTxn,
update: &UpdateStatus,
index_uuid: Uuid,
) -> heed::Result<()> {
match update {
UpdateStatus::Enqueued(enqueued) => {
let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
wtxn,
&(global_id, index_uuid, enqueued.id()),
enqueued,
)?;
}
_ => {
let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
self.updates.put(wtxn, &(index_uuid, update.id()), update)?;
}
}
Ok(())
}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update<U, I>(
&self,
index_resolver: Arc<IndexResolver<U, I>>,
) -> Result<Option<()>>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_queue.first(&rtxn)?;
drop(rtxn);
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some(((global_id, index_uuid, _), pending)) => {
let processing = pending.processing();
// Acquire the state lock and set the current state to processing.
// txn must *always* be acquired after state lock, or it will dead lock.
let state = self.state.write();
state.swap(State::Processing(index_uuid, processing.clone()));
let result = self.perform_update(processing, index_resolver, index_uuid, global_id);
state.swap(State::Idle);
result
}
None => Ok(None),
}
}
fn perform_update<U, I>(
&self,
processing: Processing,
index_resolver: Arc<IndexResolver<U, I>>,
index_uuid: Uuid,
global_id: u64,
) -> Result<Option<()>>
where
U: UuidStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
// Process the pending update using the provided user function.
let handle = Handle::current();
let update_id = processing.id();
//IndexMsg::update(index_resolver, index_uuid, processing.clone()
let result = match handle.block_on(index_resolver.get_index_by_uuid(index_uuid)) {
Ok(index) => index.handle_update(processing),
Err(e) => Err(processing.fail(e)),
};
// Once the pending update have been successfully processed
// we must remove the content from the pending and processing stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.pending_queue
.delete(&mut wtxn, &(global_id, index_uuid, update_id))?;
let result = match result {
Ok(res) => res.into(),
Err(res) => res.into(),
};
self.updates
.put(&mut wtxn, &(index_uuid, update_id), &result)?;
wtxn.commit()?;
Ok(Some(()))
}
/// List the updates for `index_uuid`.
pub fn list(&self, index_uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let mut update_list = BTreeMap::<u64, UpdateStatus>::new();
let txn = self.env.read_txn()?;
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
for entry in pendings {
let ((_, uuid, id), pending) = entry?;
if uuid == index_uuid {
update_list.insert(id, pending.decode()?.into());
}
}
let updates = self
.updates
.remap_key_type::<ByteSlice>()
.prefix_iter(&txn, index_uuid.as_bytes())?;
for entry in updates {
let (_, update) = entry?;
update_list.insert(update.id(), update);
}
// If the currently processing update is from this index, replace the corresponding pending update with this one.
match *self.state.read() {
State::Processing(uuid, ref processing) if uuid == index_uuid => {
update_list.insert(processing.id(), processing.clone().into());
}
_ => (),
}
Ok(update_list.into_iter().map(|(_, v)| v).collect())
}
/// Returns the update associated meta or `None` if the update doesn't exist.
pub fn meta(&self, index_uuid: Uuid, update_id: u64) -> heed::Result<Option<UpdateStatus>> {
// Check if the update is the one currently processing
match *self.state.read() {
State::Processing(uuid, ref processing)
if uuid == index_uuid && processing.id() == update_id =>
{
return Ok(Some(processing.clone().into()));
}
_ => (),
}
let txn = self.env.read_txn()?;
// Else, check if it is in the updates database:
let update = self.updates.get(&txn, &(index_uuid, update_id))?;
if let Some(update) = update {
return Ok(Some(update));
}
// If nothing was found yet, we resolve to iterate over the pending queue.
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
for entry in pendings {
let ((_, uuid, id), pending) = entry?;
if uuid == index_uuid && id == update_id {
return Ok(Some(pending.decode()?.into()));
}
}
// No update was found.
Ok(None)
}
/// Delete all updates for an index from the update store. If the currently processing update
/// is for `index_uuid`, the call will block until the update is terminated.
pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> {
let mut txn = self.env.write_txn()?;
// Contains all the content file paths that we need to be removed if the deletion was successful.
let mut uuids_to_remove = Vec::new();
let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data();
while let Some(Ok(((_, uuid, _), pending))) = pendings.next() {
if uuid == index_uuid {
let pending = pending.decode()?;
if let Update::DocumentAddition { content_uuid, .. } = pending.meta() {
uuids_to_remove.push(*content_uuid);
}
//Invariant check: we can only delete the current entry when we don't hold
//references to it anymore. This must be done after we have retrieved its content.
unsafe {
pendings.del_current()?;
}
}
}
drop(pendings);
let mut updates = self
.updates
.remap_key_type::<ByteSlice>()
.prefix_iter_mut(&mut txn, index_uuid.as_bytes())?
.lazily_decode_data();
while let Some(_) = updates.next() {
unsafe {
updates.del_current()?;
}
}
drop(updates);
txn.commit()?;
// If the currently processing update is from our index, we wait until it is
// finished before returning. This ensure that no write to the index occurs after we delete it.
if let State::Processing(uuid, _) = *self.state.read() {
if uuid == index_uuid {
// wait for a write lock, do nothing with it.
self.state.write();
}
}
// Finally, remove any outstanding update files. This must be done after waiting for the
// last update to ensure that the update files are not deleted before the update needs
// them.
uuids_to_remove.iter().for_each(|uuid| {
let _ = self.update_file_store.delete(*uuid);
});
Ok(())
}
pub fn snapshot(&self, indexes: Vec<Index>, path: impl AsRef<Path>) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Snapshoting);
let txn = self.env.write_txn()?;
let update_path = path.as_ref().join("updates");
create_dir_all(&update_path)?;
// acquire write lock to prevent further writes during snapshot
create_dir_all(&update_path)?;
let db_path = update_path.join("data.mdb");
// create db snapshot
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
let uuids: HashSet<_> = indexes.iter().map(|i| i.uuid()).collect();
for entry in pendings {
let ((_, uuid, _), pending) = entry?;
if uuids.contains(&uuid) {
if let Enqueued {
meta: Update::DocumentAddition { content_uuid, .. },
..
} = pending.decode()?
{
self.update_file_store.snapshot(content_uuid, &path)?;
}
}
}
let path = path.as_ref().to_owned();
indexes
.par_iter()
.try_for_each(|index| index.snapshot(&path))?;
Ok(())
}
pub fn get_info(&self) -> Result<UpdateStoreInfo> {
let mut size = self.env.size();
let txn = self.env.read_txn()?;
for entry in self.pending_queue.iter(&txn)? {
let (_, pending) = entry?;
if let Enqueued {
meta: store::Update::DocumentAddition { content_uuid, .. },
..
} = pending
{
let len = self.update_file_store.get_size(content_uuid)?;
size += len;
}
}
let processing = match *self.state.read() {
State::Processing(uuid, _) => Some(uuid),
_ => None,
};
Ok(UpdateStoreInfo { size, processing })
}
}
#[cfg(test)]
mod test {
use futures::future::ok;
use mockall::predicate::eq;
use crate::index::error::IndexError;
use crate::index::test::Mocker;
use crate::index_controller::index_resolver::index_store::MockIndexStore;
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
use crate::index_controller::updates::status::{Failed, Processed};
use super::*;
#[actix_rt::test]
async fn test_next_id() {
let dir = tempfile::tempdir_in(".").unwrap();
let mut options = EnvOpenOptions::new();
let index_store = MockIndexStore::new();
let uuid_store = MockUuidStore::new();
let index_resolver = IndexResolver::new(uuid_store, index_store);
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(
options,
dir.path(),
Arc::new(index_resolver),
Arc::new(AtomicBool::new(false)),
update_file_store,
)
.unwrap();
let index1_uuid = Uuid::new_v4();
let index2_uuid = Uuid::new_v4();
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((0, 0), ids);
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((1, 0), ids);
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((2, 1), ids);
}
#[actix_rt::test]
async fn test_register_update() {
let dir = tempfile::tempdir_in(".").unwrap();
let index_store = MockIndexStore::new();
let uuid_store = MockUuidStore::new();
let index_resolver = IndexResolver::new(uuid_store, index_store);
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(
options,
dir.path(),
Arc::new(index_resolver),
Arc::new(AtomicBool::new(false)),
update_file_store,
)
.unwrap();
let update = Update::ClearDocuments;
let uuid = Uuid::new_v4();
let store_clone = update_store.clone();
tokio::task::spawn_blocking(move || {
store_clone.register_update(uuid, update).unwrap();
})
.await
.unwrap();
let txn = update_store.env.read_txn().unwrap();
assert!(update_store
.pending_queue
.get(&txn, &(0, uuid, 0))
.unwrap()
.is_some());
}
#[actix_rt::test]
async fn test_process_update_success() {
let dir = tempfile::tempdir_in(".").unwrap();
let index_uuid = Uuid::new_v4();
let mut index_store = MockIndexStore::new();
index_store
.expect_get()
.with(eq(index_uuid))
.returning(|_uuid| {
let mocker = Mocker::default();
mocker
.when::<Processing, std::result::Result<Processed, Failed>>("handle_update")
.once()
.then(|update| Ok(update.process(status::UpdateResult::Other)));
Box::pin(ok(Some(Index::faux(mocker))))
});
let uuid_store = MockUuidStore::new();
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let store = UpdateStore::open(
options,
dir.path(),
index_resolver.clone(),
Arc::new(AtomicBool::new(false)),
update_file_store,
)
.unwrap();
// wait a bit for the event loop exit.
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
let mut txn = store.env.write_txn().unwrap();
let update = Enqueued::new(Update::ClearDocuments, 0);
store
.pending_queue
.put(&mut txn, &(0, index_uuid, 0), &update)
.unwrap();
txn.commit().unwrap();
// Process the pending, and check that it has been moved to the update databases, and
// removed from the pending database.
let store_clone = store.clone();
tokio::task::spawn_blocking(move || {
store_clone.process_pending_update(index_resolver).unwrap();
})
.await
.unwrap();
let txn = store.env.read_txn().unwrap();
assert!(store.pending_queue.first(&txn).unwrap().is_none());
let update = store.updates.get(&txn, &(index_uuid, 0)).unwrap().unwrap();
assert!(matches!(update, UpdateStatus::Processed(_)));
}
#[actix_rt::test]
async fn test_process_update_failure() {
let dir = tempfile::tempdir_in(".").unwrap();
let index_uuid = Uuid::new_v4();
let mut index_store = MockIndexStore::new();
index_store
.expect_get()
.with(eq(index_uuid))
.returning(|_uuid| {
let mocker = Mocker::default();
mocker
.when::<Processing, std::result::Result<Processed, Failed>>("handle_update")
.once()
.then(|update| Err(update.fail(IndexError::DocumentNotFound("1".to_string()))));
Box::pin(ok(Some(Index::faux(mocker))))
});
let uuid_store = MockUuidStore::new();
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let store = UpdateStore::open(
options,
dir.path(),
index_resolver.clone(),
Arc::new(AtomicBool::new(false)),
update_file_store,
)
.unwrap();
// wait a bit for the event loop exit.
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
let mut txn = store.env.write_txn().unwrap();
let update = Enqueued::new(Update::ClearDocuments, 0);
store
.pending_queue
.put(&mut txn, &(0, index_uuid, 0), &update)
.unwrap();
txn.commit().unwrap();
// Process the pending, and check that it has been moved to the update databases, and
// removed from the pending database.
let store_clone = store.clone();
tokio::task::spawn_blocking(move || {
store_clone.process_pending_update(index_resolver).unwrap();
})
.await
.unwrap();
let txn = store.env.read_txn().unwrap();
assert!(store.pending_queue.first(&txn).unwrap().is_none());
let update = store.updates.get(&txn, &(index_uuid, 0)).unwrap().unwrap();
assert!(matches!(update, UpdateStatus::Failed(_)));
}
}