[WIP] rebase on main

This commit is contained in:
tamo 2021-05-10 20:24:14 +02:00
parent 1b5fc61eb6
commit 0275b36fb0
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
20 changed files with 93 additions and 174 deletions

View File

@ -5,17 +5,12 @@ use std::marker::PhantomData;
use flate2::read::GzDecoder;
use log::info;
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{Deserialize, Serialize};
use super::{deserialize_some, deserialize_wildcard, Index};
use crate::index_controller::UpdateResult;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[derive(Clone, Default, Debug)]
pub struct Checked;

View File

@ -9,8 +9,6 @@ use log::{error, info};
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use tokio::fs;
use tokio::task::spawn_blocking;
use super::update_actor::UpdateActorHandle;
use super::uuid_resolver::UuidResolverHandle;
@ -109,6 +107,7 @@ where
}
async fn perform_dump(&self) -> anyhow::Result<()> {
/*
info!("Performing dump.");
let dump_dir = self.dump_path.clone();
@ -144,6 +143,7 @@ where
.await??;
info!("Created dump in {:?}.", dump_path);
*/
Ok(())
}

View File

@ -29,13 +29,11 @@ struct Settings {
/// we need to **always** be able to convert the old settings to the settings currently being used
impl From<Settings> for index_controller::Settings {
fn from(settings: Settings) -> Self {
if settings.distinct_attribute.flatten().is_some() {
error!("`distinct_attribute` are not yet implemented and thus will be ignored");
}
if settings.synonyms.flatten().is_some() {
error!("`synonyms` are not yet implemented and thus will be ignored");
}
Self {
distinct_attribute: settings.distinct_attribute,
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())),
searchable_attributes: settings.searchable_attributes,
@ -109,7 +107,7 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow:
index.update_documents(
UpdateFormat::JsonStream,
IndexDocumentsMethod::ReplaceDocuments,
reader,
Some(reader),
update_builder,
None,
)?;

View File

@ -34,7 +34,7 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow:
index.update_documents(
UpdateFormat::JsonStream,
IndexDocumentsMethod::ReplaceDocuments,
reader,
Some(reader),
update_builder,
None,
)?;

View File

@ -312,7 +312,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Ok(())
}
async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
use tokio::fs::create_dir_all;
path.push("indexes");
@ -340,7 +340,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Ok(())
}
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
let index = self
.store
.get(uuid)

View File

@ -136,14 +136,14 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Dump { uuid, path, ret };
let _ = self.read_sender.send(msg).await;
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetStats { uuid, ret };
let _ = self.sender.send(msg).await;

View File

@ -63,7 +63,7 @@ pub enum IndexMsg {
Dump {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
ret: oneshot::Sender<IndexResult<()>>,
},
GetStats {
uuid: Uuid,

View File

@ -97,6 +97,7 @@ pub trait IndexActorHandle {
index_settings: IndexSettings,
) -> IndexResult<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats>;
}
@ -180,4 +181,5 @@ mod test {
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
}

View File

@ -5,6 +5,7 @@ use std::time::Duration;
use actix_web::web::{Bytes, Payload};
use anyhow::bail;
use chrono::{DateTime, Utc};
use futures::stream::StreamExt;
use log::info;
use milli::FieldsDistribution;
@ -22,6 +23,8 @@ use uuid_resolver::{UuidError, UuidResolverHandle};
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::option::Opt;
use self::dump::load_dump;
mod index_actor;
mod snapshot;
mod dump;

View File

@ -13,7 +13,7 @@ use tokio::sync::mpsc;
use uuid::Uuid;
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo};
use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG};
use crate::index_controller::{index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG}};
use crate::index_controller::{UpdateMeta, UpdateStatus};
pub struct UpdateActor<D, I> {
@ -71,16 +71,14 @@ where
Some(Delete { uuid, ret }) => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Some(Snapshot { uuid, path, ret }) => {
let _ = ret.send(self.handle_snapshot(uuid, path).await);
Some(Snapshot { uuids, path, ret }) => {
let _ = ret.send(self.handle_snapshot(uuids, path).await);
}
Some(Dump { uuid, path, ret }) => {
let _ = ret.send(self.handle_dump(uuid, path).await);
Some(Dump { uuids, path, ret }) => {
let _ = ret.send(self.handle_dump(uuids, path).await);
}
Some(GetInfo { ret }) => {
let _ = ret.send(self.handle_get_info().await);
Some(GetSize { uuid, ret }) => {
let _ = ret.send(self.handle_get_size(uuid).await);
}
None => break,
}
@ -199,51 +197,9 @@ where
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let store = self.store.delete(uuid).await?;
let store = self.store.clone();
if let Some(store) = store {
tokio::task::spawn(async move {
let store = get_arc_ownership_blocking(store).await;
tokio::task::spawn_blocking(move || {
store.prepare_for_closing().wait();
info!("Update store {} was closed.", uuid);
});
});
}
Ok(())
}
async fn handle_create(&self, uuid: Uuid) -> Result<()> {
let _ = self.store.get_or_create(uuid).await?;
Ok(())
}
Ok(())
}
async fn handle_create(&self, uuid: Uuid) -> Result<()> {
let _ = self.store.get_or_create(uuid).await?;
Ok(())
}
async fn handle_snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
if let Some(update_store) = self.store.get(uuid).await? {
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
// acquire write lock to prevent further writes during snapshot
// the update lock must be acquired BEFORE the write lock to prevent dead lock
let _lock = update_store.update_lock.lock();
let mut txn = update_store.env.write_txn()?;
// create db snapshot
update_store.snapshot(&mut txn, &path, uuid)?;
futures::executor::block_on(
async move { index_handle.snapshot(uuid, path).await },
)?;
Ok(())
})
tokio::task::spawn_blocking(move || store.delete_all(uuid))
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
@ -280,6 +236,35 @@ where
Ok(())
}
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
update_store.dump(&uuids, &path)?;
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
let path = &path;
let handle = &index_handle;
let mut stream = futures::stream::iter(uuids.iter())
.map(|&uuid| handle.dump(uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
Handle::current().block_on(async {
while let Some(res) = stream.next().await {
res?;
}
Ok(())
})
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
Ok(())
}
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone();
let info = tokio::task::spawn_blocking(move || -> anyhow::Result<UpdateStoreInfo> {
@ -292,42 +277,4 @@ where
Ok(info)
}
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
if let Some(update_store) = self.store.get(uuid).await? {
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
// acquire write lock to prevent further writes during the dump
// the update lock must be acquired BEFORE the write lock to prevent dead lock
let _lock = update_store.update_lock.lock();
let mut txn = update_store.env.write_txn()?;
// create db dump
update_store.dump(&mut txn, &path, uuid)?;
futures::executor::block_on(
async move { index_handle.dump(uuid, path).await },
)?;
Ok(())
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
}
Ok(())
}
async fn handle_get_size(&self, uuid: Uuid) -> Result<u64> {
let size = match self.store.get(uuid).await? {
Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result<u64> {
let txn = update_store.env.read_txn()?;
update_store.get_size(&txn)
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?,
None => 0,
};
}

View File

@ -78,16 +78,9 @@ where
receiver.await.expect("update actor killed.")
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Dump { uuid, path, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
async fn get_size(&self, uuid: Uuid) -> Result<u64> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetSize { uuid, ret };
let msg = UpdateMsg::Dump { uuids, path, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}

View File

@ -32,15 +32,11 @@ pub enum UpdateMsg<D> {
ret: oneshot::Sender<Result<()>>,
},
Dump {
uuid: Uuid,
uuids: HashSet<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
GetInfo {
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
},
GetSize {
uuid: Uuid,
ret: oneshot::Sender<Result<u64>>,
},
}

View File

@ -40,11 +40,9 @@ pub trait UpdateActorHandle {
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn create(&self, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn snapshot(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn dump(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn get_info(&self) -> Result<UpdateStoreInfo>;
async fn get_size(&self, uuid: Uuid) -> Result<u64>;
async fn update(
&self,
meta: UpdateMeta,

View File

@ -499,31 +499,37 @@ impl UpdateStore {
Ok(())
}
pub fn dump(
&self,
txn: &mut heed::RwTxn,
path: impl AsRef<Path>,
uuid: Uuid,
) -> anyhow::Result<()> {
pub fn dump(&self, uuids: &HashSet<Uuid>, path: impl AsRef<Path>) -> anyhow::Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Snapshoting); // TODO: rename the state
let txn = self.env.write_txn()?;
let update_path = path.as_ref().join("updates");
create_dir_all(&update_path)?;
let mut dump_path = update_path.join(format!("update-{}", uuid));
// acquire write lock to prevent further writes during dump
create_dir_all(&dump_path)?;
dump_path.push("data.mdb");
create_dir_all(&update_path)?;
let db_path = update_path.join("data.mdb");
// TODO: everything
// create db dump
self.env.copy_to_path(&dump_path, CompactionOption::Enabled)?;
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
let update_files_path = update_path.join("update_files");
create_dir_all(&update_files_path)?;
for path in self.pending.iter(&txn)? {
let (_, path) = path?;
let name = path.file_name().unwrap();
let to = update_files_path.join(name);
copy(path, to)?;
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
for entry in pendings {
let ((_, uuid, _), pending) = entry?;
if uuids.contains(&uuid) {
if let Some(path) = pending.decode()?.content_path() {
let name = path.file_name().unwrap();
let to = update_files_path.join(name);
copy(path, to)?;
}
}
}
Ok(())
@ -545,25 +551,6 @@ impl UpdateStore {
Ok(UpdateStoreInfo { size, processing })
}
pub fn get_size(&self, txn: &heed::RoTxn) -> anyhow::Result<u64> {
let mut size = self.env.size();
let txn = self.env.read_txn()?;
for entry in self.pending_queue.iter(&txn)? {
let (_, pending) = entry?;
if let Some(path) = pending.content_path() {
size += File::open(path)?.metadata()?.len();
}
}
let processing = match *self.state.read() {
State::Processing(uuid, _) => Some(uuid),
_ => None,
};
Ok(UpdateStoreInfo { size, processing })
}
}
#[cfg(test)]

View File

@ -85,7 +85,7 @@ impl<S: UuidStore> UuidResolverActor<S> {
self.store.snapshot(path).await
}
async fn handle_dump(&self, path: PathBuf) -> Result<Vec<Uuid>> {
async fn handle_dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
self.store.dump(path).await
}

View File

@ -78,7 +78,7 @@ impl UuidResolverHandle for UuidResolverHandleImpl {
.expect("Uuid resolver actor has been killed")?)
}
async fn dump(&self, path: PathBuf) -> Result<Vec<Uuid>> {
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::DumpRequest { path, ret };
let _ = self.sender.send(msg).await;

View File

@ -33,7 +33,7 @@ pub enum UuidResolveMsg {
},
DumpRequest {
path: PathBuf,
ret: oneshot::Sender<Result<Vec<Uuid>>>,
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
},
GetSize {
ret: oneshot::Sender<Result<u64>>,

View File

@ -32,7 +32,7 @@ pub trait UuidResolverHandle {
async fn delete(&self, name: String) -> anyhow::Result<Uuid>;
async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn dump(&self, path: PathBuf) -> Result<Vec<Uuid>>;
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
}

View File

@ -21,7 +21,7 @@ pub trait UuidStore {
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn dump(&self, path: PathBuf) -> Result<Vec<Uuid>>;
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
}
@ -116,7 +116,7 @@ impl HeedUuidStore {
// TODO: we should merge this function and the following function for the dump. it's exactly
// the same code
pub fn snapshot(&self, mut path: PathBuf) -> Result<Vec<Uuid>> {
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
let env = self.env.clone();
let db = self.db;
// Write transaction to acquire a lock on the database.
@ -138,16 +138,16 @@ impl HeedUuidStore {
Ok(entries)
}
pub fn dump(&self, mut path: PathBuf) -> Result<Vec<Uuid>> {
pub fn dump(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
let env = self.env.clone();
let db = self.db;
// Write transaction to acquire a lock on the database.
let txn = env.write_txn()?;
let mut entries = Vec::new();
let mut entries = HashSet::new();
for entry in db.iter(&txn)? {
let (_, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.push(uuid)
entries.insert(uuid);
}
// only perform dump if there are indexes
@ -192,12 +192,12 @@ impl UuidStore for HeedUuidStore {
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
}
async fn snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>> {
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
}
async fn dump(&self, path: PathBuf) -> Result<Vec<Uuid>> {
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.dump(path)).await?
}

View File

@ -1,7 +1,7 @@
use actix_web::{delete, get, post, put};
use actix_web::{web, HttpResponse};
use chrono::DateTime;
use serde::Deserialize;
use chrono::{DateTime, Utc};
use serde::{Serialize, Deserialize};
use crate::error::ResponseError;
use crate::helpers::Authentication;