mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-08 20:44:30 +01:00
multi index store
create two channels for Index handler, one for writes and one for reads, so write are processed one at a time, while reads are processed in parallel.
This commit is contained in:
parent
6a0a9fec6b
commit
f090f42e7a
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -1,5 +1,7 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "actix-codec"
|
||||
version = "0.3.0"
|
||||
@ -428,7 +430,7 @@ checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1"
|
||||
[[package]]
|
||||
name = "assert-json-diff"
|
||||
version = "1.0.1"
|
||||
source = "git+https://github.com/qdequele/assert-json-diff#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4"
|
||||
source = "git+https://github.com/qdequele/assert-json-diff?branch=master#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
@ -11,6 +11,8 @@ use log::info;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::{mpsc, oneshot, RwLock};
|
||||
use uuid::Uuid;
|
||||
use std::future::Future;
|
||||
use futures::pin_mut;
|
||||
|
||||
use super::update_handler::UpdateHandler;
|
||||
use crate::index::UpdateResult as UResult;
|
||||
@ -61,7 +63,8 @@ enum IndexMsg {
|
||||
}
|
||||
|
||||
struct IndexActor<S> {
|
||||
inbox: Option<mpsc::Receiver<IndexMsg>>,
|
||||
read_receiver: Option<mpsc::Receiver<IndexMsg>>,
|
||||
write_receiver: Option<mpsc::Receiver<IndexMsg>>,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
store: S,
|
||||
}
|
||||
@ -82,60 +85,96 @@ trait IndexStore {
|
||||
}
|
||||
|
||||
impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
fn new(inbox: mpsc::Receiver<IndexMsg>, store: S) -> Self {
|
||||
fn new(
|
||||
read_receiver: mpsc::Receiver<IndexMsg>,
|
||||
write_receiver: mpsc::Receiver<IndexMsg>,
|
||||
store: S
|
||||
) -> Self {
|
||||
let options = IndexerOpts::default();
|
||||
let update_handler = UpdateHandler::new(&options).unwrap();
|
||||
let update_handler = Arc::new(update_handler);
|
||||
let inbox = Some(inbox);
|
||||
let read_receiver = Some(read_receiver);
|
||||
let write_receiver = Some(write_receiver);
|
||||
Self {
|
||||
inbox,
|
||||
read_receiver,
|
||||
write_receiver,
|
||||
store,
|
||||
update_handler,
|
||||
}
|
||||
}
|
||||
|
||||
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
|
||||
/// through the read channel are processed concurrently, the messages sent through the write
|
||||
/// channel are processed one at a time.
|
||||
async fn run(mut self) {
|
||||
use IndexMsg::*;
|
||||
|
||||
let mut inbox = self
|
||||
.inbox
|
||||
let mut read_receiver = self
|
||||
.read_receiver
|
||||
.take()
|
||||
.expect("Index Actor must have a inbox at this point.");
|
||||
|
||||
let stream = stream! {
|
||||
let read_stream = stream! {
|
||||
loop {
|
||||
match inbox.recv().await {
|
||||
match read_receiver.recv().await {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let fut = stream.for_each_concurrent(Some(10), |msg| async {
|
||||
match msg {
|
||||
CreateIndex {
|
||||
uuid,
|
||||
primary_key,
|
||||
ret,
|
||||
} => self.handle_create_index(uuid, primary_key, ret).await,
|
||||
Update { ret, meta, data } => self.handle_update(meta, data, ret).await,
|
||||
Search { ret, query, uuid } => self.handle_search(uuid, query, ret).await,
|
||||
Settings { ret, uuid } => self.handle_settings(uuid, ret).await,
|
||||
Documents {
|
||||
ret,
|
||||
uuid,
|
||||
attributes_to_retrieve,
|
||||
offset,
|
||||
limit,
|
||||
} => {
|
||||
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve, ret)
|
||||
.await
|
||||
}
|
||||
Document { uuid, attributes_to_retrieve, doc_id, ret } => self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve, ret).await,
|
||||
}
|
||||
});
|
||||
let mut write_receiver = self
|
||||
.write_receiver
|
||||
.take()
|
||||
.expect("Index Actor must have a inbox at this point.");
|
||||
|
||||
fut.await;
|
||||
let write_stream = stream! {
|
||||
loop {
|
||||
match write_receiver.recv().await {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pin_mut!(write_stream);
|
||||
pin_mut!(read_stream);
|
||||
|
||||
let fut1 = read_stream.for_each_concurrent(Some(10), |msg| self.handle_message(msg));
|
||||
let fut2 = write_stream.for_each_concurrent(Some(1), |msg| self.handle_message(msg));
|
||||
|
||||
let fut1: Box<dyn Future<Output = ()> + Unpin + Send> = Box::new(fut1);
|
||||
let fut2: Box<dyn Future<Output = ()> + Unpin + Send> = Box::new(fut2);
|
||||
|
||||
//let futures = futures::stream::futures_unordered::FuturesUnordered::new();
|
||||
//futures.push(fut1);
|
||||
//futures.push(fut2);
|
||||
//futures.for_each(f)
|
||||
tokio::join!(fut1, fut2);
|
||||
|
||||
}
|
||||
|
||||
async fn handle_message(&self, msg: IndexMsg) {
|
||||
use IndexMsg::*;
|
||||
match msg {
|
||||
CreateIndex {
|
||||
uuid,
|
||||
primary_key,
|
||||
ret,
|
||||
} => self.handle_create_index(uuid, primary_key, ret).await,
|
||||
Update { ret, meta, data } => self.handle_update(meta, data, ret).await,
|
||||
Search { ret, query, uuid } => self.handle_search(uuid, query, ret).await,
|
||||
Settings { ret, uuid } => self.handle_settings(uuid, ret).await,
|
||||
Documents {
|
||||
ret,
|
||||
uuid,
|
||||
attributes_to_retrieve,
|
||||
offset,
|
||||
limit,
|
||||
} => {
|
||||
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve, ret)
|
||||
.await
|
||||
}
|
||||
Document { uuid, attributes_to_retrieve, doc_id, ret } => self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve, ret).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_search(
|
||||
@ -221,17 +260,19 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IndexActorHandle {
|
||||
sender: mpsc::Sender<IndexMsg>,
|
||||
read_sender: mpsc::Sender<IndexMsg>,
|
||||
write_sender: mpsc::Sender<IndexMsg>,
|
||||
}
|
||||
|
||||
impl IndexActorHandle {
|
||||
pub fn new(path: impl AsRef<Path>) -> Self {
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
let (read_sender, read_receiver) = mpsc::channel(100);
|
||||
let (write_sender, write_receiver) = mpsc::channel(100);
|
||||
|
||||
let store = MapIndexStore::new(path);
|
||||
let actor = IndexActor::new(receiver, store);
|
||||
let actor = IndexActor::new(read_receiver, write_receiver, store);
|
||||
tokio::task::spawn(actor.run());
|
||||
Self { sender }
|
||||
Self { read_sender, write_sender }
|
||||
}
|
||||
|
||||
pub async fn create_index(
|
||||
@ -245,28 +286,28 @@ impl IndexActorHandle {
|
||||
uuid,
|
||||
primary_key,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
let _ = self.read_sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
|
||||
pub async fn update(&self, meta: Processing<UpdateMeta>, data: std::fs::File) -> UpdateResult {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Update { ret, meta, data };
|
||||
let _ = self.sender.send(msg).await;
|
||||
let _ = self.read_sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
|
||||
pub async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Search { uuid, query, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
let _ = self.read_sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
pub async fn settings(&self, uuid: Uuid) -> Result<Settings> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Settings { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
let _ = self.read_sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
@ -285,7 +326,7 @@ impl IndexActorHandle {
|
||||
attributes_to_retrieve,
|
||||
limit,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
let _ = self.read_sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
@ -302,7 +343,7 @@ impl IndexActorHandle {
|
||||
doc_id,
|
||||
attributes_to_retrieve,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
let _ = self.read_sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +1,18 @@
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::collections::{HashMap, hash_map::Entry};
|
||||
|
||||
use log::info;
|
||||
use super::index_actor::IndexActorHandle;
|
||||
use log::info;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::sync::{mpsc, oneshot, RwLock};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::{UpdateMeta, UpdateStatus};
|
||||
use crate::index::UpdateResult;
|
||||
use crate::index_controller::{UpdateMeta, UpdateStatus};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UpdateError>;
|
||||
type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
|
||||
@ -28,33 +29,42 @@ enum UpdateMsg<D> {
|
||||
uuid: Uuid,
|
||||
meta: UpdateMeta,
|
||||
data: mpsc::Receiver<PayloadData<D>>,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
},
|
||||
ListUpdates {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
struct UpdateActor<D> {
|
||||
struct UpdateActor<D, S> {
|
||||
path: PathBuf,
|
||||
store: Arc<UpdateStore>,
|
||||
store: S,
|
||||
inbox: mpsc::Receiver<UpdateMsg<D>>,
|
||||
index_handle: IndexActorHandle,
|
||||
}
|
||||
|
||||
impl<D> UpdateActor<D>
|
||||
where D: AsRef<[u8]> + Sized + 'static,
|
||||
#[async_trait::async_trait]
|
||||
trait UpdateStoreStore {
|
||||
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>>;
|
||||
}
|
||||
|
||||
impl<D, S> UpdateActor<D, S>
|
||||
where
|
||||
D: AsRef<[u8]> + Sized + 'static,
|
||||
S: UpdateStoreStore,
|
||||
{
|
||||
fn new(
|
||||
store: Arc<UpdateStore>,
|
||||
store: S,
|
||||
inbox: mpsc::Receiver<UpdateMsg<D>>,
|
||||
index_handle: IndexActorHandle,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Self {
|
||||
) -> Self {
|
||||
let path = path.as_ref().to_owned().join("update_files");
|
||||
create_dir_all(&path).unwrap();
|
||||
Self { store, inbox, index_handle, path }
|
||||
Self {
|
||||
store,
|
||||
inbox,
|
||||
path,
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(mut self) {
|
||||
@ -64,15 +74,26 @@ where D: AsRef<[u8]> + Sized + 'static,
|
||||
|
||||
loop {
|
||||
match self.inbox.recv().await {
|
||||
Some(Update { uuid, meta, data, ret }) => self.handle_update(uuid, meta, data, ret).await,
|
||||
Some(Update {
|
||||
uuid,
|
||||
meta,
|
||||
data,
|
||||
ret,
|
||||
}) => self.handle_update(uuid, meta, data, ret).await,
|
||||
Some(ListUpdates { uuid, ret }) => self.handle_list_updates(uuid, ret).await,
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_update(&self, uuid: Uuid, meta: UpdateMeta, mut payload: mpsc::Receiver<PayloadData<D>>, ret: oneshot::Sender<Result<UpdateStatus>>) {
|
||||
let store = self.store.clone();
|
||||
async fn handle_update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: UpdateMeta,
|
||||
mut payload: mpsc::Receiver<PayloadData<D>>,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
) {
|
||||
let update_store = self.store.get_or_create(uuid).await.unwrap();
|
||||
let update_file_id = uuid::Uuid::new_v4();
|
||||
let path = self.path.join(format!("update_{}", update_file_id));
|
||||
let mut file = File::create(&path).await.unwrap();
|
||||
@ -84,7 +105,7 @@ where D: AsRef<[u8]> + Sized + 'static,
|
||||
}
|
||||
Err(e) => {
|
||||
ret.send(Err(UpdateError::Error(e)));
|
||||
return
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -94,15 +115,20 @@ where D: AsRef<[u8]> + Sized + 'static,
|
||||
let file = file.into_std().await;
|
||||
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let result = store
|
||||
let result = update_store
|
||||
.register_update(meta, path, uuid)
|
||||
.map(|pending| UpdateStatus::Pending(pending))
|
||||
.map_err(|e| UpdateError::Error(Box::new(e)));
|
||||
let _ = ret.send(result);
|
||||
}).await;
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_list_updates(&self, uuid: Uuid, ret: oneshot::Sender<Result<Vec<UpdateStatus>>>) {
|
||||
async fn handle_list_updates(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
|
||||
) {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@ -113,29 +139,26 @@ pub struct UpdateActorHandle<D> {
|
||||
}
|
||||
|
||||
impl<D> UpdateActorHandle<D>
|
||||
where D: AsRef<[u8]> + Sized + 'static,
|
||||
where
|
||||
D: AsRef<[u8]> + Sized + 'static,
|
||||
{
|
||||
pub fn new(index_handle: IndexActorHandle, path: impl AsRef<Path>) -> Self {
|
||||
let path = path.as_ref().to_owned().join("updates");
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100_000);
|
||||
let store = MapUpdateStoreStore::new(index_handle, &path);
|
||||
let actor = UpdateActor::new(store, receiver, path);
|
||||
|
||||
let path = path
|
||||
.as_ref()
|
||||
.to_owned()
|
||||
.join("updates");
|
||||
|
||||
create_dir_all(&path).unwrap();
|
||||
let index_handle_clone = index_handle.clone();
|
||||
let store = UpdateStore::open(options, &path, move |meta, file| {
|
||||
futures::executor::block_on(index_handle_clone.update(meta, file))
|
||||
}).unwrap();
|
||||
let actor = UpdateActor::new(store, receiver, index_handle, path);
|
||||
tokio::task::spawn_local(actor.run());
|
||||
|
||||
Self { sender }
|
||||
}
|
||||
|
||||
pub async fn update(&self, meta: UpdateMeta, data: mpsc::Receiver<PayloadData<D>>, uuid: Uuid) -> Result<UpdateStatus> {
|
||||
pub async fn update(
|
||||
&self,
|
||||
meta: UpdateMeta,
|
||||
data: mpsc::Receiver<PayloadData<D>>,
|
||||
uuid: Uuid,
|
||||
) -> Result<UpdateStatus> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Update {
|
||||
uuid,
|
||||
@ -143,7 +166,46 @@ where D: AsRef<[u8]> + Sized + 'static,
|
||||
meta,
|
||||
ret,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("update actor killed.")
|
||||
}
|
||||
}
|
||||
|
||||
struct MapUpdateStoreStore {
|
||||
db: Arc<RwLock<HashMap<Uuid, Arc<UpdateStore>>>>,
|
||||
index_handle: IndexActorHandle,
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl MapUpdateStoreStore {
|
||||
fn new(index_handle: IndexActorHandle, path: impl AsRef<Path>) -> Self {
|
||||
let db = Arc::new(RwLock::new(HashMap::new()));
|
||||
let path = path.as_ref().to_owned();
|
||||
Self { db, index_handle, path }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UpdateStoreStore for MapUpdateStoreStore {
|
||||
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>> {
|
||||
match self.db.write().await.entry(uuid) {
|
||||
Entry::Vacant(e) => {
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100_000);
|
||||
let path = self.path.clone().join(format!("updates-{}", e.key()));
|
||||
create_dir_all(&path).unwrap();
|
||||
let index_handle = self.index_handle.clone();
|
||||
let store = UpdateStore::open(options, &path, move |meta, file| {
|
||||
futures::executor::block_on(index_handle.update(meta, file))
|
||||
}).unwrap();
|
||||
let store = e.insert(store);
|
||||
Ok(store.clone())
|
||||
}
|
||||
Entry::Occupied(e) => {
|
||||
Ok(e.get().clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user