mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
fix snapshots
This commit is contained in:
parent
2b154524bb
commit
33830d5ecf
@ -127,7 +127,7 @@ impl Data {
|
||||
stats.database_size += index_stats.size;
|
||||
stats.database_size += self
|
||||
.index_controller
|
||||
.get_updates_size(index.uid.clone())
|
||||
.get_updates_size()
|
||||
.await?;
|
||||
|
||||
stats.last_update = Some(match stats.last_update {
|
||||
|
@ -6,9 +6,9 @@ use anyhow::{bail, Context};
|
||||
use milli::obkv_to_json;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use crate::helpers::EnvSizer;
|
||||
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
||||
pub use updates::{Facets, Settings, UpdateResult};
|
||||
use crate::helpers::EnvSizer;
|
||||
|
||||
mod search;
|
||||
mod updates;
|
||||
|
@ -19,6 +19,8 @@ use crate::option::IndexerOpts;
|
||||
|
||||
use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult};
|
||||
|
||||
pub const CONCURRENT_INDEX_MSG: usize = 10;
|
||||
|
||||
pub struct IndexActor<S> {
|
||||
receiver: Option<mpsc::Receiver<IndexMsg>>,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
@ -27,10 +29,7 @@ pub struct IndexActor<S> {
|
||||
}
|
||||
|
||||
impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
pub fn new(
|
||||
receiver: mpsc::Receiver<IndexMsg>,
|
||||
store: S,
|
||||
) -> Result<Self> {
|
||||
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> Result<Self> {
|
||||
let options = IndexerOpts::default();
|
||||
let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?;
|
||||
let update_handler = Arc::new(update_handler);
|
||||
@ -40,7 +39,6 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
store,
|
||||
update_handler,
|
||||
processing: RwLock::new(None),
|
||||
store,
|
||||
})
|
||||
}
|
||||
|
||||
@ -62,7 +60,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
}
|
||||
};
|
||||
|
||||
stream.for_each_concurrent(Some(10), |msg| self.handle_message(msg)).await;
|
||||
stream
|
||||
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_message(&self, msg: IndexMsg) {
|
||||
@ -75,7 +75,12 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
} => {
|
||||
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
|
||||
}
|
||||
Update { ret, meta, data, uuid } => {
|
||||
Update {
|
||||
ret,
|
||||
meta,
|
||||
data,
|
||||
uuid,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_update(uuid, meta, data).await);
|
||||
}
|
||||
Search { ret, query, uuid } => {
|
||||
|
@ -36,7 +36,12 @@ impl IndexActorHandle for IndexActorHandleImpl {
|
||||
data: std::fs::File,
|
||||
) -> anyhow::Result<UpdateResult> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Update { ret, meta, data, uuid };
|
||||
let msg = IndexMsg::Update {
|
||||
ret,
|
||||
meta,
|
||||
data,
|
||||
uuid,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
@ -126,7 +131,7 @@ impl IndexActorHandle for IndexActorHandleImpl {
|
||||
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::GetStats { uuid, ret };
|
||||
let _ = self.read_sender.send(msg).await;
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
}
|
||||
@ -138,8 +143,6 @@ impl IndexActorHandleImpl {
|
||||
let store = MapIndexStore::new(path, index_size);
|
||||
let actor = IndexActor::new(receiver, store)?;
|
||||
tokio::task::spawn(actor.run());
|
||||
Ok(Self {
|
||||
sender,
|
||||
})
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use actor::IndexActor;
|
||||
pub use actor::CONCURRENT_INDEX_MSG;
|
||||
pub use handle_impl::IndexActorHandleImpl;
|
||||
use message::IndexMsg;
|
||||
use store::{IndexStore, MapIndexStore};
|
||||
|
@ -356,10 +356,8 @@ impl IndexController {
|
||||
Ok(self.index_handle.get_index_stats(uuid).await?)
|
||||
}
|
||||
|
||||
pub async fn get_updates_size(&self, uid: String) -> anyhow::Result<u64> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
|
||||
Ok(self.update_handle.get_size(uuid).await?)
|
||||
pub async fn get_updates_size(&self) -> anyhow::Result<u64> {
|
||||
Ok(self.update_handle.get_size().await?)
|
||||
}
|
||||
|
||||
pub async fn get_uuids_size(&self) -> anyhow::Result<u64> {
|
||||
|
@ -71,15 +71,10 @@ where
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let tasks = uuids
|
||||
.iter()
|
||||
.map(|&uuid| {
|
||||
self.update_handle
|
||||
.snapshot(uuid, temp_snapshot_path.clone())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
futures::future::try_join_all(tasks).await?;
|
||||
self.update_handle
|
||||
.snapshot(uuids, temp_snapshot_path.clone())
|
||||
.await?;
|
||||
|
||||
let snapshot_dir = self.snapshot_path.clone();
|
||||
let snapshot_path = self
|
||||
|
@ -8,12 +8,12 @@ use tokio::fs;
|
||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
use futures::StreamExt;
|
||||
|
||||
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore};
|
||||
use crate::index_controller::index_actor::IndexActorHandle;
|
||||
use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG};
|
||||
use crate::index_controller::{UpdateMeta, UpdateStatus};
|
||||
|
||||
|
||||
pub struct UpdateActor<D, I> {
|
||||
path: PathBuf,
|
||||
store: Arc<UpdateStore>,
|
||||
@ -32,10 +32,7 @@ where
|
||||
path: impl AsRef<Path>,
|
||||
index_handle: I,
|
||||
) -> anyhow::Result<Self> {
|
||||
let path = path
|
||||
.as_ref()
|
||||
.to_owned()
|
||||
.join("updates");
|
||||
let path = path.as_ref().to_owned().join("updates");
|
||||
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
@ -81,11 +78,11 @@ where
|
||||
Some(Delete { uuid, ret }) => {
|
||||
let _ = ret.send(self.handle_delete(uuid).await);
|
||||
}
|
||||
Some(Snapshot { uuid, path, ret }) => {
|
||||
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
||||
Some(Snapshot { uuids, path, ret }) => {
|
||||
let _ = ret.send(self.handle_snapshot(uuids, path).await);
|
||||
}
|
||||
Some(GetSize { uuid, ret }) => {
|
||||
let _ = ret.send(self.handle_get_size(uuid).await);
|
||||
Some(GetSize { ret }) => {
|
||||
let _ = ret.send(self.handle_get_size().await);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
@ -200,7 +197,7 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
async fn handle_snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let index_handle = self.index_handle.clone();
|
||||
let update_store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
|
||||
@ -210,13 +207,24 @@ where
|
||||
let mut txn = update_store.env.write_txn()?;
|
||||
|
||||
// create db snapshot
|
||||
update_store.snapshot(&mut txn, &path, uuid)?;
|
||||
update_store.snapshot(&mut txn, &path)?;
|
||||
|
||||
futures::executor::block_on(
|
||||
async move { index_handle.snapshot(uuid, path).await },
|
||||
)?;
|
||||
// Perform the snapshot of each index concurently. Only a third of the capabilities of
|
||||
// the index actor at a time not to put too much pressure on the index actor
|
||||
let path = &path;
|
||||
let handle = &index_handle;
|
||||
|
||||
let mut stream = futures::stream::iter(uuids.iter())
|
||||
.map(|&uuid| handle.snapshot(uuid, path.clone()))
|
||||
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
|
||||
|
||||
futures::executor::block_on(async {
|
||||
while let Some(res) = stream.next().await {
|
||||
res?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
})
|
||||
.await
|
||||
.map_err(|e| UpdateError::Error(e.into()))?
|
||||
.map_err(|e| UpdateError::Error(e.into()))?;
|
||||
@ -224,18 +232,16 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
let size = match self.store.get(uuid).await? {
|
||||
Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result<u64> {
|
||||
async fn handle_get_size(&self) -> Result<u64> {
|
||||
let update_store = self.store.clone();
|
||||
let size = tokio::task::spawn_blocking(move || -> anyhow::Result<u64> {
|
||||
let txn = update_store.env.read_txn()?;
|
||||
|
||||
update_store.get_size(&txn)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| UpdateError::Error(e.into()))?
|
||||
.map_err(|e| UpdateError::Error(e.into()))?,
|
||||
None => 0,
|
||||
};
|
||||
.map_err(|e| UpdateError::Error(e.into()))?;
|
||||
|
||||
Ok(size)
|
||||
}
|
||||
|
@ -6,8 +6,7 @@ use uuid::Uuid;
|
||||
use crate::index_controller::IndexActorHandle;
|
||||
|
||||
use super::{
|
||||
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta,
|
||||
UpdateMsg, UpdateStatus,
|
||||
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStatus,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -27,7 +26,7 @@ where
|
||||
where
|
||||
I: IndexActorHandle + Clone + Send + Sync + 'static,
|
||||
{
|
||||
let path = path.as_ref().to_owned().join("updates");
|
||||
let path = path.as_ref().to_owned();
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?;
|
||||
|
||||
@ -64,16 +63,16 @@ where
|
||||
receiver.await.expect("update actor killed.")
|
||||
}
|
||||
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Snapshot { uuid, path, ret };
|
||||
let msg = UpdateMsg::Snapshot { uuids, path, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("update actor killed.")
|
||||
}
|
||||
|
||||
async fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
async fn get_size(&self) -> Result<u64> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::GetSize { uuid, ret };
|
||||
let msg = UpdateMsg::GetSize { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("update actor killed.")
|
||||
}
|
||||
|
@ -26,12 +26,11 @@ pub enum UpdateMsg<D> {
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
Snapshot {
|
||||
uuid: Uuid,
|
||||
uuids: Vec<Uuid>,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
GetSize {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<u64>>,
|
||||
},
|
||||
}
|
||||
|
@ -40,8 +40,8 @@ pub trait UpdateActorHandle {
|
||||
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
|
||||
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
|
||||
async fn get_size(&self, uuid: Uuid) -> Result<u64>;
|
||||
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn update(
|
||||
&self,
|
||||
meta: UpdateMeta,
|
||||
|
@ -5,6 +5,7 @@ use std::mem::size_of;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
|
||||
use heed::{BytesDecode, BytesEncode, CompactionOption, Database, Env, EnvOpenOptions};
|
||||
@ -106,7 +107,7 @@ where
|
||||
mut options: EnvOpenOptions,
|
||||
path: P,
|
||||
update_handler: U,
|
||||
) -> heed::Result<Arc<Self>>
|
||||
) -> anyhow::Result<Arc<Self>>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
U: HandleUpdate<M, N, E> + Sync + Clone + Send + 'static,
|
||||
@ -127,6 +128,11 @@ where
|
||||
|
||||
let update_lock = Arc::new(Mutex::new(()));
|
||||
|
||||
// Init update loop to perform any pending updates at launch.
|
||||
// Since we just launched the update store, and we still own the receiving end of the
|
||||
// channel, this call is guarenteed to succeed.
|
||||
notification_sender.try_send(()).expect("Failed to init update store");
|
||||
|
||||
let update_store = Arc::new(UpdateStore {
|
||||
env,
|
||||
pending,
|
||||
@ -277,8 +283,11 @@ where
|
||||
// to the update handler. Processing store is non persistent to be able recover
|
||||
// from a failure
|
||||
let processing = pending.processing();
|
||||
self.processing.write().replace((index_uuid, processing.clone()));
|
||||
let file = File::open(&content_path)?;
|
||||
self.processing
|
||||
.write()
|
||||
.replace((index_uuid, processing.clone()));
|
||||
let file = File::open(&content_path)
|
||||
.with_context(|| format!("file at path: {:?}", &content_path))?;
|
||||
// Process the pending update using the provided user function.
|
||||
let result = handler.handle_update(index_uuid, processing, file)?;
|
||||
drop(rtxn);
|
||||
@ -521,9 +530,10 @@ where
|
||||
fn delete_all<A>(
|
||||
txn: &mut heed::RwTxn,
|
||||
uuid: Uuid,
|
||||
db: Database<ByteSlice, A>
|
||||
db: Database<ByteSlice, A>,
|
||||
) -> anyhow::Result<()>
|
||||
where A: for<'a> heed::BytesDecode<'a>
|
||||
where
|
||||
A: for<'a> heed::BytesDecode<'a>,
|
||||
{
|
||||
let mut iter = db.prefix_iter_mut(txn, uuid.as_bytes())?;
|
||||
while let Some(_) = iter.next() {
|
||||
@ -553,19 +563,17 @@ where
|
||||
&self,
|
||||
txn: &mut heed::RwTxn,
|
||||
path: impl AsRef<Path>,
|
||||
uuid: Uuid,
|
||||
) -> anyhow::Result<()> {
|
||||
let update_path = path.as_ref().join("updates");
|
||||
create_dir_all(&update_path)?;
|
||||
|
||||
let mut snapshot_path = update_path.join(format!("update-{}", uuid));
|
||||
// acquire write lock to prevent further writes during snapshot
|
||||
create_dir_all(&snapshot_path)?;
|
||||
snapshot_path.push("data.mdb");
|
||||
create_dir_all(&update_path)?;
|
||||
let db_path = update_path.join("data.mdb");
|
||||
|
||||
// create db snapshot
|
||||
self.env
|
||||
.copy_to_path(&snapshot_path, CompactionOption::Enabled)?;
|
||||
.copy_to_path(&db_path, CompactionOption::Enabled)?;
|
||||
|
||||
let update_files_path = update_path.join("update_files");
|
||||
create_dir_all(&update_files_path)?;
|
||||
|
@ -84,9 +84,9 @@ async fn delete_document(
|
||||
.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()])
|
||||
.await
|
||||
{
|
||||
Ok(update_status) => {
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
}
|
||||
Ok(update_status) => Ok(
|
||||
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
|
||||
),
|
||||
Err(e) => {
|
||||
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
|
||||
}
|
||||
@ -163,9 +163,9 @@ async fn add_documents(
|
||||
.await;
|
||||
|
||||
match addition_result {
|
||||
Ok(update_status) => {
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
}
|
||||
Ok(update_status) => Ok(
|
||||
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
|
||||
),
|
||||
Err(e) => {
|
||||
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
|
||||
}
|
||||
@ -242,9 +242,9 @@ async fn delete_documents(
|
||||
.collect();
|
||||
|
||||
match data.delete_documents(path.index_uid.clone(), ids).await {
|
||||
Ok(update_status) => {
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
}
|
||||
Ok(update_status) => Ok(
|
||||
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
|
||||
),
|
||||
Err(e) => {
|
||||
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
|
||||
}
|
||||
@ -258,9 +258,9 @@ async fn clear_all_documents(
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
match data.clear_documents(path.index_uid.clone()).await {
|
||||
Ok(update_status) => {
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
}
|
||||
Ok(update_status) => Ok(
|
||||
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
|
||||
),
|
||||
Err(e) => {
|
||||
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
|
||||
}
|
||||
|
@ -143,9 +143,9 @@ async fn update_all(
|
||||
.update_settings(index_uid.into_inner(), body.into_inner(), true)
|
||||
.await
|
||||
{
|
||||
Ok(update_result) => {
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() })))
|
||||
}
|
||||
Ok(update_result) => Ok(
|
||||
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() }))
|
||||
),
|
||||
Err(e) => {
|
||||
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
|
||||
}
|
||||
@ -175,9 +175,9 @@ async fn delete_all(
|
||||
.update_settings(index_uid.into_inner(), settings, false)
|
||||
.await
|
||||
{
|
||||
Ok(update_result) => {
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() })))
|
||||
}
|
||||
Ok(update_result) => Ok(
|
||||
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() }))
|
||||
),
|
||||
Err(e) => {
|
||||
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
|
||||
}
|
||||
|
@ -23,13 +23,7 @@ async fn get_settings() {
|
||||
assert_eq!(settings["distinctAttribute"], json!(null));
|
||||
assert_eq!(
|
||||
settings["rankingRules"],
|
||||
json!([
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
])
|
||||
json!(["words", "typo", "proximity", "attribute", "exactness"])
|
||||
);
|
||||
assert_eq!(settings["stopWords"], json!([]));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user