fix snapshots

This commit is contained in:
Marin Postma 2021-04-14 17:53:12 +02:00
parent 2b154524bb
commit 33830d5ecf
No known key found for this signature in database
GPG Key ID: D5241F0C0C865F30
15 changed files with 109 additions and 101 deletions

View File

@ -127,7 +127,7 @@ impl Data {
stats.database_size += index_stats.size;
stats.database_size += self
.index_controller
.get_updates_size(index.uid.clone())
.get_updates_size()
.await?;
stats.last_update = Some(match stats.last_update {

View File

@ -6,9 +6,9 @@ use anyhow::{bail, Context};
use milli::obkv_to_json;
use serde_json::{Map, Value};
use crate::helpers::EnvSizer;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Facets, Settings, UpdateResult};
use crate::helpers::EnvSizer;
mod search;
mod updates;

View File

@ -19,6 +19,8 @@ use crate::option::IndexerOpts;
use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult};
pub const CONCURRENT_INDEX_MSG: usize = 10;
pub struct IndexActor<S> {
receiver: Option<mpsc::Receiver<IndexMsg>>,
update_handler: Arc<UpdateHandler>,
@ -27,10 +29,7 @@ pub struct IndexActor<S> {
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
pub fn new(
receiver: mpsc::Receiver<IndexMsg>,
store: S,
) -> Result<Self> {
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> Result<Self> {
let options = IndexerOpts::default();
let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?;
let update_handler = Arc::new(update_handler);
@ -40,7 +39,6 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
store,
update_handler,
processing: RwLock::new(None),
store,
})
}
@ -62,7 +60,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
}
};
stream.for_each_concurrent(Some(10), |msg| self.handle_message(msg)).await;
stream
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
.await;
}
async fn handle_message(&self, msg: IndexMsg) {
@ -75,7 +75,12 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
} => {
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
}
Update { ret, meta, data, uuid } => {
Update {
ret,
meta,
data,
uuid,
} => {
let _ = ret.send(self.handle_update(uuid, meta, data).await);
}
Search { ret, query, uuid } => {

View File

@ -36,7 +36,12 @@ impl IndexActorHandle for IndexActorHandleImpl {
data: std::fs::File,
) -> anyhow::Result<UpdateResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update { ret, meta, data, uuid };
let msg = IndexMsg::Update {
ret,
meta,
data,
uuid,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
@ -126,7 +131,7 @@ impl IndexActorHandle for IndexActorHandleImpl {
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetStats { uuid, ret };
let _ = self.read_sender.send(msg).await;
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
}
@ -138,8 +143,6 @@ impl IndexActorHandleImpl {
let store = MapIndexStore::new(path, index_size);
let actor = IndexActor::new(receiver, store)?;
tokio::task::spawn(actor.run());
Ok(Self {
sender,
})
Ok(Self { sender })
}
}

View File

@ -8,6 +8,7 @@ use thiserror::Error;
use uuid::Uuid;
use actor::IndexActor;
pub use actor::CONCURRENT_INDEX_MSG;
pub use handle_impl::IndexActorHandleImpl;
use message::IndexMsg;
use store::{IndexStore, MapIndexStore};

View File

@ -356,10 +356,8 @@ impl IndexController {
Ok(self.index_handle.get_index_stats(uuid).await?)
}
pub async fn get_updates_size(&self, uid: String) -> anyhow::Result<u64> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
Ok(self.update_handle.get_size(uuid).await?)
pub async fn get_updates_size(&self) -> anyhow::Result<u64> {
Ok(self.update_handle.get_size().await?)
}
pub async fn get_uuids_size(&self) -> anyhow::Result<u64> {

View File

@ -71,15 +71,10 @@ where
return Ok(());
}
let tasks = uuids
.iter()
.map(|&uuid| {
self.update_handle
.snapshot(uuid, temp_snapshot_path.clone())
})
.collect::<Vec<_>>();
futures::future::try_join_all(tasks).await?;
self.update_handle
.snapshot(uuids, temp_snapshot_path.clone())
.await?;
let snapshot_dir = self.snapshot_path.clone();
let snapshot_path = self

View File

@ -8,12 +8,12 @@ use tokio::fs;
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
use tokio::sync::mpsc;
use uuid::Uuid;
use futures::StreamExt;
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore};
use crate::index_controller::index_actor::IndexActorHandle;
use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG};
use crate::index_controller::{UpdateMeta, UpdateStatus};
pub struct UpdateActor<D, I> {
path: PathBuf,
store: Arc<UpdateStore>,
@ -32,10 +32,7 @@ where
path: impl AsRef<Path>,
index_handle: I,
) -> anyhow::Result<Self> {
let path = path
.as_ref()
.to_owned()
.join("updates");
let path = path.as_ref().to_owned().join("updates");
std::fs::create_dir_all(&path)?;
@ -81,11 +78,11 @@ where
Some(Delete { uuid, ret }) => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Some(Snapshot { uuid, path, ret }) => {
let _ = ret.send(self.handle_snapshot(uuid, path).await);
Some(Snapshot { uuids, path, ret }) => {
let _ = ret.send(self.handle_snapshot(uuids, path).await);
}
Some(GetSize { uuid, ret }) => {
let _ = ret.send(self.handle_get_size(uuid).await);
Some(GetSize { ret }) => {
let _ = ret.send(self.handle_get_size().await);
}
None => break,
}
@ -200,7 +197,7 @@ where
Ok(())
}
async fn handle_snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
async fn handle_snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
@ -210,32 +207,41 @@ where
let mut txn = update_store.env.write_txn()?;
// create db snapshot
update_store.snapshot(&mut txn, &path, uuid)?;
update_store.snapshot(&mut txn, &path)?;
futures::executor::block_on(
async move { index_handle.snapshot(uuid, path).await },
)?;
Ok(())
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
let path = &path;
let handle = &index_handle;
let mut stream = futures::stream::iter(uuids.iter())
.map(|&uuid| handle.snapshot(uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
futures::executor::block_on(async {
while let Some(res) = stream.next().await {
res?;
}
Ok(())
})
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
Ok(())
}
async fn handle_get_size(&self, uuid: Uuid) -> Result<u64> {
let size = match self.store.get(uuid).await? {
Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result<u64> {
let txn = update_store.env.read_txn()?;
async fn handle_get_size(&self) -> Result<u64> {
let update_store = self.store.clone();
let size = tokio::task::spawn_blocking(move || -> anyhow::Result<u64> {
let txn = update_store.env.read_txn()?;
update_store.get_size(&txn)
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?,
None => 0,
};
update_store.get_size(&txn)
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
Ok(size)
}

View File

@ -6,8 +6,7 @@ use uuid::Uuid;
use crate::index_controller::IndexActorHandle;
use super::{
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta,
UpdateMsg, UpdateStatus,
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStatus,
};
#[derive(Clone)]
@ -27,7 +26,7 @@ where
where
I: IndexActorHandle + Clone + Send + Sync + 'static,
{
let path = path.as_ref().to_owned().join("updates");
let path = path.as_ref().to_owned();
let (sender, receiver) = mpsc::channel(100);
let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?;
@ -64,16 +63,16 @@ where
receiver.await.expect("update actor killed.")
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Snapshot { uuid, path, ret };
let msg = UpdateMsg::Snapshot { uuids, path, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
async fn get_size(&self, uuid: Uuid) -> Result<u64> {
async fn get_size(&self) -> Result<u64> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetSize { uuid, ret };
let msg = UpdateMsg::GetSize { ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}

View File

@ -26,12 +26,11 @@ pub enum UpdateMsg<D> {
ret: oneshot::Sender<Result<()>>,
},
Snapshot {
uuid: Uuid,
uuids: Vec<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
GetSize {
uuid: Uuid,
ret: oneshot::Sender<Result<u64>>,
},
}

View File

@ -40,8 +40,8 @@ pub trait UpdateActorHandle {
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn get_size(&self, uuid: Uuid) -> Result<u64>;
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()>;
async fn get_size(&self) -> Result<u64>;
async fn update(
&self,
meta: UpdateMeta,

View File

@ -5,6 +5,7 @@ use std::mem::size_of;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::Context;
use bytemuck::{Pod, Zeroable};
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use heed::{BytesDecode, BytesEncode, CompactionOption, Database, Env, EnvOpenOptions};
@ -106,7 +107,7 @@ where
mut options: EnvOpenOptions,
path: P,
update_handler: U,
) -> heed::Result<Arc<Self>>
) -> anyhow::Result<Arc<Self>>
where
P: AsRef<Path>,
U: HandleUpdate<M, N, E> + Sync + Clone + Send + 'static,
@ -127,6 +128,11 @@ where
let update_lock = Arc::new(Mutex::new(()));
// Init update loop to perform any pending updates at launch.
// Since we just launched the update store, and we still own the receiving end of the
// channel, this call is guarenteed to succeed.
notification_sender.try_send(()).expect("Failed to init update store");
let update_store = Arc::new(UpdateStore {
env,
pending,
@ -277,8 +283,11 @@ where
// to the update handler. Processing store is non persistent to be able recover
// from a failure
let processing = pending.processing();
self.processing.write().replace((index_uuid, processing.clone()));
let file = File::open(&content_path)?;
self.processing
.write()
.replace((index_uuid, processing.clone()));
let file = File::open(&content_path)
.with_context(|| format!("file at path: {:?}", &content_path))?;
// Process the pending update using the provided user function.
let result = handler.handle_update(index_uuid, processing, file)?;
drop(rtxn);
@ -521,9 +530,10 @@ where
fn delete_all<A>(
txn: &mut heed::RwTxn,
uuid: Uuid,
db: Database<ByteSlice, A>
db: Database<ByteSlice, A>,
) -> anyhow::Result<()>
where A: for<'a> heed::BytesDecode<'a>
where
A: for<'a> heed::BytesDecode<'a>,
{
let mut iter = db.prefix_iter_mut(txn, uuid.as_bytes())?;
while let Some(_) = iter.next() {
@ -553,19 +563,17 @@ where
&self,
txn: &mut heed::RwTxn,
path: impl AsRef<Path>,
uuid: Uuid,
) -> anyhow::Result<()> {
let update_path = path.as_ref().join("updates");
create_dir_all(&update_path)?;
let mut snapshot_path = update_path.join(format!("update-{}", uuid));
// acquire write lock to prevent further writes during snapshot
create_dir_all(&snapshot_path)?;
snapshot_path.push("data.mdb");
create_dir_all(&update_path)?;
let db_path = update_path.join("data.mdb");
// create db snapshot
self.env
.copy_to_path(&snapshot_path, CompactionOption::Enabled)?;
.copy_to_path(&db_path, CompactionOption::Enabled)?;
let update_files_path = update_path.join("update_files");
create_dir_all(&update_files_path)?;

View File

@ -84,9 +84,9 @@ async fn delete_document(
.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()])
.await
{
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -163,9 +163,9 @@ async fn add_documents(
.await;
match addition_result {
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -242,9 +242,9 @@ async fn delete_documents(
.collect();
match data.delete_documents(path.index_uid.clone(), ids).await {
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -258,9 +258,9 @@ async fn clear_all_documents(
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.clear_documents(path.index_uid.clone()).await {
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}

View File

@ -143,9 +143,9 @@ async fn update_all(
.update_settings(index_uid.into_inner(), body.into_inner(), true)
.await
{
Ok(update_result) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() })))
}
Ok(update_result) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -175,9 +175,9 @@ async fn delete_all(
.update_settings(index_uid.into_inner(), settings, false)
.await
{
Ok(update_result) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() })))
}
Ok(update_result) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}

View File

@ -23,13 +23,7 @@ async fn get_settings() {
assert_eq!(settings["distinctAttribute"], json!(null));
assert_eq!(
settings["rankingRules"],
json!([
"words",
"typo",
"proximity",
"attribute",
"exactness"
])
json!(["words", "typo", "proximity", "attribute", "exactness"])
);
assert_eq!(settings["stopWords"], json!([]));
}