MeiliSearch/meilisearch-http/src/index_controller/update_actor.rs

370 lines
12 KiB
Rust
Raw Normal View History

2021-03-05 18:34:04 +01:00
use std::collections::{hash_map::Entry, HashMap};
2021-03-06 12:57:56 +01:00
use std::fs::{create_dir_all, remove_dir_all};
2021-03-03 10:57:13 +01:00
use std::path::{Path, PathBuf};
use std::sync::Arc;
2021-03-06 12:57:56 +01:00
use super::index_actor::IndexActorHandle;
2021-03-11 20:58:51 +01:00
use log::info;
2021-02-26 17:14:11 +01:00
use thiserror::Error;
use tokio::fs::File;
2021-03-03 10:57:13 +01:00
use tokio::io::AsyncWriteExt;
use tokio::sync::{mpsc, oneshot, RwLock};
use uuid::Uuid;
2021-03-03 10:57:13 +01:00
2021-03-11 20:58:51 +01:00
use super::get_arc_ownership_blocking;
2021-03-04 11:56:32 +01:00
use crate::index::UpdateResult;
use crate::index_controller::{UpdateMeta, UpdateStatus};
2021-02-26 17:14:11 +01:00
pub type Result<T> = std::result::Result<T, UpdateError>;
type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>;
2021-02-26 17:14:11 +01:00
#[derive(Debug, Error)]
pub enum UpdateError {
#[error("error with update: {0}")]
Error(Box<dyn std::error::Error + Sync + Send + 'static>),
2021-03-05 18:34:04 +01:00
#[error("Index {0} doesn't exist.")]
UnexistingIndex(Uuid),
2021-03-15 16:52:05 +01:00
#[error("Update {0} doesn't exist.")]
UnexistingUpdate(u64),
}
2021-02-26 09:10:36 +01:00
2021-03-03 10:57:13 +01:00
enum UpdateMsg<D> {
2021-02-26 17:14:11 +01:00
Update {
uuid: Uuid,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<D>>,
ret: oneshot::Sender<Result<UpdateStatus>>,
2021-03-04 17:25:02 +01:00
},
ListUpdates {
uuid: Uuid,
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
},
2021-03-06 10:51:52 +01:00
GetUpdate {
uuid: Uuid,
2021-03-15 16:52:05 +01:00
ret: oneshot::Sender<Result<UpdateStatus>>,
2021-03-06 10:51:52 +01:00
id: u64,
2021-03-06 12:57:56 +01:00
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
Create {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
2021-03-15 18:11:10 +01:00
},
2021-02-26 09:10:36 +01:00
}
struct UpdateActor<D, S> {
2021-03-03 10:57:13 +01:00
path: PathBuf,
store: S,
2021-03-03 10:57:13 +01:00
inbox: mpsc::Receiver<UpdateMsg<D>>,
2021-02-26 17:14:11 +01:00
}
#[async_trait::async_trait]
trait UpdateStoreStore {
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>>;
2021-03-15 16:52:05 +01:00
async fn delete(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>>;
async fn get(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>>;
}
impl<D, S> UpdateActor<D, S>
where
D: AsRef<[u8]> + Sized + 'static,
S: UpdateStoreStore,
2021-03-03 10:57:13 +01:00
{
2021-03-11 20:58:51 +01:00
fn new(
store: S,
inbox: mpsc::Receiver<UpdateMsg<D>>,
path: impl AsRef<Path>,
) -> anyhow::Result<Self> {
2021-03-03 10:57:13 +01:00
let path = path.as_ref().to_owned().join("update_files");
2021-03-11 17:59:47 +01:00
create_dir_all(&path)?;
assert!(path.exists());
Ok(Self { store, inbox, path })
2021-02-26 17:14:11 +01:00
}
async fn run(mut self) {
2021-03-04 17:25:02 +01:00
use UpdateMsg::*;
2021-03-05 18:34:04 +01:00
info!("Started update actor.");
2021-02-26 17:14:11 +01:00
loop {
match self.inbox.recv().await {
Some(Update {
uuid,
meta,
data,
ret,
2021-03-11 20:58:51 +01:00
}) => {
2021-03-05 18:34:04 +01:00
let _ = ret.send(self.handle_update(uuid, meta, data).await);
}
Some(ListUpdates { uuid, ret }) => {
let _ = ret.send(self.handle_list_updates(uuid).await);
2021-03-11 20:58:51 +01:00
}
2021-03-06 10:51:52 +01:00
Some(GetUpdate { uuid, ret, id }) => {
let _ = ret.send(self.handle_get_update(uuid, id).await);
}
2021-03-06 12:57:56 +01:00
Some(Delete { uuid, ret }) => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Some(Create { uuid, ret }) => {
let _ = ret.send(self.handle_create(uuid).await);
}
None => break,
2021-02-26 17:14:11 +01:00
}
}
}
async fn handle_update(
&self,
uuid: Uuid,
meta: UpdateMeta,
mut payload: mpsc::Receiver<PayloadData<D>>,
2021-03-05 18:34:04 +01:00
) -> Result<UpdateStatus> {
let update_store = self.store.get_or_create(uuid).await?;
2021-03-03 10:57:13 +01:00
let update_file_id = uuid::Uuid::new_v4();
let path = self.path.join(format!("update_{}", update_file_id));
2021-03-11 20:58:51 +01:00
let mut file = File::create(&path)
.await
2021-03-05 18:34:04 +01:00
.map_err(|e| UpdateError::Error(Box::new(e)))?;
2021-03-03 10:57:13 +01:00
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
2021-03-11 20:58:51 +01:00
file.write_all(bytes.as_ref())
.await
2021-03-05 18:34:04 +01:00
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
2021-03-05 18:34:04 +01:00
return Err(UpdateError::Error(e));
}
}
2021-03-03 10:57:13 +01:00
}
2021-03-11 20:58:51 +01:00
file.flush()
.await
2021-03-05 18:34:04 +01:00
.map_err(|e| UpdateError::Error(Box::new(e)))?;
2021-03-03 10:57:13 +01:00
2021-03-05 18:34:04 +01:00
tokio::task::spawn_blocking(move || {
2021-03-15 16:52:05 +01:00
update_store
.register_update(meta, path, uuid)
2021-03-15 16:52:05 +01:00
.map(UpdateStatus::Pending)
.map_err(|e| UpdateError::Error(Box::new(e)))
})
2021-03-05 18:34:04 +01:00
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?
2021-02-26 17:14:11 +01:00
}
2021-03-04 17:25:02 +01:00
2021-03-11 20:58:51 +01:00
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
2021-03-15 16:52:05 +01:00
let update_store = self.store.get(uuid).await?;
2021-03-05 18:34:04 +01:00
tokio::task::spawn_blocking(move || {
2021-03-11 20:58:51 +01:00
let result = update_store
.ok_or(UpdateError::UnexistingIndex(uuid))?
.list()
.map_err(|e| UpdateError::Error(e.into()))?;
Ok(result)
})
.await
2021-03-05 18:34:04 +01:00
.map_err(|e| UpdateError::Error(Box::new(e)))?
2021-03-04 17:25:02 +01:00
}
2021-03-06 10:51:52 +01:00
2021-03-15 16:52:05 +01:00
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
2021-03-11 20:58:51 +01:00
let store = self
.store
2021-03-15 16:52:05 +01:00
.get(uuid)
2021-03-06 10:51:52 +01:00
.await?
.ok_or(UpdateError::UnexistingIndex(uuid))?;
2021-03-11 20:58:51 +01:00
let result = store
.meta(id)
2021-03-15 16:52:05 +01:00
.map_err(|e| UpdateError::Error(Box::new(e)))?
.ok_or(UpdateError::UnexistingUpdate(id))?;
2021-03-06 10:51:52 +01:00
Ok(result)
}
2021-03-06 12:57:56 +01:00
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
2021-03-15 16:52:05 +01:00
let store = self.store.delete(uuid).await?;
2021-03-06 12:57:56 +01:00
if let Some(store) = store {
tokio::task::spawn(async move {
let store = get_arc_ownership_blocking(store).await;
tokio::task::spawn_blocking(move || {
store.prepare_for_closing().wait();
info!("Update store {} was closed.", uuid);
});
});
}
Ok(())
}
async fn handle_create(&self, uuid: Uuid) -> Result<()> {
let _ = self.store.get_or_create(uuid).await?;
Ok(())
}
2021-02-26 17:14:11 +01:00
}
#[derive(Clone)]
2021-03-03 10:57:13 +01:00
pub struct UpdateActorHandle<D> {
sender: mpsc::Sender<UpdateMsg<D>>,
2021-02-26 17:14:11 +01:00
}
2021-03-03 10:57:13 +01:00
impl<D> UpdateActorHandle<D>
where
2021-03-04 19:30:13 +01:00
D: AsRef<[u8]> + Sized + 'static + Sync + Send,
2021-03-03 10:57:13 +01:00
{
2021-03-15 18:11:10 +01:00
pub fn new(
index_handle: IndexActorHandle,
path: impl AsRef<Path>,
update_store_size: usize,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned().join("updates");
2021-02-26 17:14:11 +01:00
let (sender, receiver) = mpsc::channel(100);
2021-03-13 10:09:10 +01:00
let store = MapUpdateStoreStore::new(index_handle, &path, update_store_size);
2021-03-11 17:59:47 +01:00
let actor = UpdateActor::new(store, receiver, path)?;
2021-03-04 19:30:13 +01:00
tokio::task::spawn(actor.run());
2021-03-11 17:59:47 +01:00
Ok(Self { sender })
2021-02-26 17:14:11 +01:00
}
pub async fn update(
&self,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<D>>,
uuid: Uuid,
) -> Result<UpdateStatus> {
2021-02-26 17:14:11 +01:00
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Update {
uuid,
2021-03-03 10:57:13 +01:00
data,
2021-02-26 17:14:11 +01:00
meta,
ret,
};
let _ = self.sender.send(msg).await;
2021-02-26 17:14:11 +01:00
receiver.await.expect("update actor killed.")
}
2021-03-05 18:34:04 +01:00
pub async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::ListUpdates { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
2021-03-06 10:51:52 +01:00
2021-03-15 16:52:05 +01:00
pub async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
2021-03-06 10:51:52 +01:00
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetUpdate { uuid, id, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
2021-03-06 12:57:56 +01:00
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn create(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Create { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
2021-02-26 09:10:36 +01:00
}
struct MapUpdateStoreStore {
db: Arc<RwLock<HashMap<Uuid, Arc<UpdateStore>>>>,
index_handle: IndexActorHandle,
path: PathBuf,
2021-03-13 10:09:10 +01:00
update_store_size: usize,
}
impl MapUpdateStoreStore {
2021-03-15 18:11:10 +01:00
fn new(
index_handle: IndexActorHandle,
path: impl AsRef<Path>,
update_store_size: usize,
) -> Self {
let db = Arc::new(RwLock::new(HashMap::new()));
let path = path.as_ref().to_owned();
2021-03-05 18:34:04 +01:00
Self {
db,
index_handle,
path,
2021-03-13 10:09:10 +01:00
update_store_size,
2021-03-05 18:34:04 +01:00
}
}
}
#[async_trait::async_trait]
impl UpdateStoreStore for MapUpdateStoreStore {
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>> {
match self.db.write().await.entry(uuid) {
Entry::Vacant(e) => {
let mut options = heed::EnvOpenOptions::new();
2021-03-13 10:09:10 +01:00
let update_store_size = self.update_store_size;
options.map_size(update_store_size);
let path = self.path.clone().join(format!("updates-{}", e.key()));
create_dir_all(&path).unwrap();
let index_handle = self.index_handle.clone();
let store = UpdateStore::open(options, &path, move |meta, file| {
futures::executor::block_on(index_handle.update(meta, file))
2021-03-05 18:34:04 +01:00
})
.map_err(|e| UpdateError::Error(e.into()))?;
let store = e.insert(store);
Ok(store.clone())
}
2021-03-05 18:34:04 +01:00
Entry::Occupied(e) => Ok(e.get().clone()),
}
}
2021-03-05 18:34:04 +01:00
2021-03-15 16:52:05 +01:00
async fn get(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>> {
let guard = self.db.read().await;
2021-03-15 16:52:05 +01:00
match guard.get(&uuid) {
2021-03-11 14:23:11 +01:00
Some(uuid) => Ok(Some(uuid.clone())),
None => {
// The index is not found in the found in the loaded indexes, so we attempt to load
// it from disk. We need to acquire a write lock **before** attempting to open the
// index, because someone could be trying to open it at the same time as us.
drop(guard);
2021-03-11 14:23:11 +01:00
let path = self.path.clone().join(format!("updates-{}", uuid));
if path.exists() {
let mut guard = self.db.write().await;
2021-03-15 16:52:05 +01:00
match guard.entry(uuid) {
Entry::Vacant(entry) => {
// We can safely load the index
let index_handle = self.index_handle.clone();
let mut options = heed::EnvOpenOptions::new();
2021-03-13 10:09:10 +01:00
let update_store_size = self.update_store_size;
options.map_size(update_store_size);
let store = UpdateStore::open(options, &path, move |meta, file| {
futures::executor::block_on(index_handle.update(meta, file))
})
.map_err(|e| UpdateError::Error(e.into()))?;
2021-03-15 16:52:05 +01:00
let store = entry.insert(store);
Ok(Some(store.clone()))
}
Entry::Occupied(entry) => {
// The index was loaded while we attempted to to iter
Ok(Some(entry.get().clone()))
}
}
2021-03-11 14:23:11 +01:00
} else {
Ok(None)
}
}
}
2021-03-05 18:34:04 +01:00
}
2021-03-06 12:57:56 +01:00
2021-03-15 16:52:05 +01:00
async fn delete(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>> {
2021-03-06 12:57:56 +01:00
let store = self.db.write().await.remove(&uuid);
2021-03-11 14:23:11 +01:00
let path = self.path.clone().join(format!("updates-{}", uuid));
if store.is_some() || path.exists() {
2021-03-06 12:57:56 +01:00
remove_dir_all(path).unwrap();
}
Ok(store)
}
}