2021-03-23 11:00:50 +01:00
|
|
|
use std::fs::File;
|
|
|
|
use std::path::PathBuf;
|
|
|
|
use std::sync::Arc;
|
|
|
|
|
|
|
|
use async_stream::stream;
|
|
|
|
use futures::stream::StreamExt;
|
|
|
|
use heed::CompactionOption;
|
|
|
|
use log::debug;
|
2021-04-14 18:55:04 +02:00
|
|
|
use tokio::sync::mpsc;
|
2021-03-23 11:00:50 +01:00
|
|
|
use tokio::task::spawn_blocking;
|
|
|
|
use uuid::Uuid;
|
|
|
|
|
2021-05-10 17:30:09 +02:00
|
|
|
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
2021-04-01 16:44:42 +02:00
|
|
|
use crate::index_controller::{
|
2021-04-22 10:14:29 +02:00
|
|
|
get_arc_ownership_blocking, update_handler::UpdateHandler, Failed, IndexStats, Processed,
|
|
|
|
Processing,
|
2021-04-01 16:44:42 +02:00
|
|
|
};
|
2021-03-23 11:00:50 +01:00
|
|
|
use crate::option::IndexerOpts;
|
|
|
|
|
2021-04-22 10:14:29 +02:00
|
|
|
use super::{IndexError, IndexMeta, IndexMsg, IndexResult, IndexSettings, IndexStore};
|
2021-04-01 16:44:42 +02:00
|
|
|
|
2021-04-14 17:53:12 +02:00
|
|
|
pub const CONCURRENT_INDEX_MSG: usize = 10;
|
|
|
|
|
2021-03-23 11:00:50 +01:00
|
|
|
pub struct IndexActor<S> {
|
2021-04-13 17:46:39 +02:00
|
|
|
receiver: Option<mpsc::Receiver<IndexMsg>>,
|
2021-03-23 11:00:50 +01:00
|
|
|
update_handler: Arc<UpdateHandler>,
|
|
|
|
store: S,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
2021-04-22 10:14:29 +02:00
|
|
|
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> IndexResult<Self> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let options = IndexerOpts::default();
|
|
|
|
let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?;
|
|
|
|
let update_handler = Arc::new(update_handler);
|
2021-04-13 17:46:39 +02:00
|
|
|
let receiver = Some(receiver);
|
2021-04-27 17:51:12 +02:00
|
|
|
Ok(Self { receiver, update_handler, store })
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
2021-04-28 16:43:49 +02:00
|
|
|
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
|
|
|
|
/// through the read channel are processed concurrently, the messages sent through the write
|
|
|
|
/// channel are processed one at a time.
|
2021-03-23 11:00:50 +01:00
|
|
|
pub async fn run(mut self) {
|
2021-04-13 17:46:39 +02:00
|
|
|
let mut receiver = self
|
|
|
|
.receiver
|
2021-03-23 11:00:50 +01:00
|
|
|
.take()
|
|
|
|
.expect("Index Actor must have a inbox at this point.");
|
|
|
|
|
2021-04-13 17:46:39 +02:00
|
|
|
let stream = stream! {
|
2021-03-23 11:00:50 +01:00
|
|
|
loop {
|
2021-04-13 17:46:39 +02:00
|
|
|
match receiver.recv().await {
|
2021-03-23 11:00:50 +01:00
|
|
|
Some(msg) => yield msg,
|
|
|
|
None => break,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-04-14 17:53:12 +02:00
|
|
|
stream
|
|
|
|
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
|
|
|
|
.await;
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
async fn handle_message(&self, msg: IndexMsg) {
|
|
|
|
use IndexMsg::*;
|
|
|
|
match msg {
|
|
|
|
CreateIndex {
|
|
|
|
uuid,
|
|
|
|
primary_key,
|
|
|
|
ret,
|
|
|
|
} => {
|
|
|
|
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
|
|
|
|
}
|
2021-04-14 17:53:12 +02:00
|
|
|
Update {
|
|
|
|
ret,
|
|
|
|
meta,
|
|
|
|
data,
|
|
|
|
uuid,
|
|
|
|
} => {
|
2021-04-13 17:14:02 +02:00
|
|
|
let _ = ret.send(self.handle_update(uuid, meta, data).await);
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
Search { ret, query, uuid } => {
|
|
|
|
let _ = ret.send(self.handle_search(uuid, query).await);
|
|
|
|
}
|
|
|
|
Settings { ret, uuid } => {
|
|
|
|
let _ = ret.send(self.handle_settings(uuid).await);
|
|
|
|
}
|
|
|
|
Documents {
|
|
|
|
ret,
|
|
|
|
uuid,
|
|
|
|
attributes_to_retrieve,
|
|
|
|
offset,
|
|
|
|
limit,
|
|
|
|
} => {
|
|
|
|
let _ = ret.send(
|
|
|
|
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
|
|
|
|
.await,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
Document {
|
|
|
|
uuid,
|
|
|
|
attributes_to_retrieve,
|
|
|
|
doc_id,
|
|
|
|
ret,
|
|
|
|
} => {
|
|
|
|
let _ = ret.send(
|
|
|
|
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
|
|
|
|
.await,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
Delete { uuid, ret } => {
|
|
|
|
let _ = ret.send(self.handle_delete(uuid).await);
|
|
|
|
}
|
|
|
|
GetMeta { uuid, ret } => {
|
|
|
|
let _ = ret.send(self.handle_get_meta(uuid).await);
|
|
|
|
}
|
|
|
|
UpdateIndex {
|
|
|
|
uuid,
|
|
|
|
index_settings,
|
|
|
|
ret,
|
|
|
|
} => {
|
|
|
|
let _ = ret.send(self.handle_update_index(uuid, index_settings).await);
|
|
|
|
}
|
|
|
|
Snapshot { uuid, path, ret } => {
|
|
|
|
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
|
|
|
}
|
2021-04-28 16:43:49 +02:00
|
|
|
Dump { uuid, path, ret } => {
|
|
|
|
let _ = ret.send(self.handle_dump(uuid, path).await);
|
|
|
|
}
|
2021-04-01 16:44:42 +02:00
|
|
|
GetStats { uuid, ret } => {
|
|
|
|
let _ = ret.send(self.handle_get_stats(uuid).await);
|
|
|
|
}
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> anyhow::Result<SearchResult> {
|
|
|
|
let index = self
|
|
|
|
.store
|
|
|
|
.get(uuid)
|
|
|
|
.await?
|
|
|
|
.ok_or(IndexError::UnexistingIndex)?;
|
|
|
|
spawn_blocking(move || index.perform_search(query)).await?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn handle_create_index(
|
|
|
|
&self,
|
|
|
|
uuid: Uuid,
|
|
|
|
primary_key: Option<String>,
|
2021-04-22 10:14:29 +02:00
|
|
|
) -> IndexResult<IndexMeta> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let index = self.store.create(uuid, primary_key).await?;
|
|
|
|
let meta = spawn_blocking(move || IndexMeta::new(&index))
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))??;
|
|
|
|
Ok(meta)
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn handle_update(
|
|
|
|
&self,
|
2021-04-13 17:14:02 +02:00
|
|
|
uuid: Uuid,
|
2021-04-22 10:14:29 +02:00
|
|
|
meta: Processing,
|
|
|
|
data: Option<File>,
|
|
|
|
) -> IndexResult<Result<Processed, Failed>> {
|
2021-04-19 09:47:43 +02:00
|
|
|
debug!("Processing update {}", meta.id());
|
|
|
|
let update_handler = self.update_handler.clone();
|
|
|
|
let index = match self.store.get(uuid).await? {
|
|
|
|
Some(index) => index,
|
|
|
|
None => self.store.create(uuid, None).await?,
|
2021-04-13 17:14:02 +02:00
|
|
|
};
|
2021-04-07 17:57:46 +02:00
|
|
|
|
2021-04-19 09:47:43 +02:00
|
|
|
spawn_blocking(move || update_handler.handle_update(meta, data, index))
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
2021-05-10 17:30:09 +02:00
|
|
|
async fn handle_settings(&self, uuid: Uuid) -> IndexResult<Settings<Checked>> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let index = self
|
|
|
|
.store
|
|
|
|
.get(uuid)
|
|
|
|
.await?
|
|
|
|
.ok_or(IndexError::UnexistingIndex)?;
|
|
|
|
spawn_blocking(move || index.settings().map_err(IndexError::Error))
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn handle_fetch_documents(
|
|
|
|
&self,
|
|
|
|
uuid: Uuid,
|
|
|
|
offset: usize,
|
|
|
|
limit: usize,
|
|
|
|
attributes_to_retrieve: Option<Vec<String>>,
|
2021-04-22 10:14:29 +02:00
|
|
|
) -> IndexResult<Vec<Document>> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let index = self
|
|
|
|
.store
|
|
|
|
.get(uuid)
|
|
|
|
.await?
|
|
|
|
.ok_or(IndexError::UnexistingIndex)?;
|
|
|
|
spawn_blocking(move || {
|
|
|
|
index
|
|
|
|
.retrieve_documents(offset, limit, attributes_to_retrieve)
|
|
|
|
.map_err(IndexError::Error)
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn handle_fetch_document(
|
|
|
|
&self,
|
|
|
|
uuid: Uuid,
|
|
|
|
doc_id: String,
|
|
|
|
attributes_to_retrieve: Option<Vec<String>>,
|
2021-04-22 10:14:29 +02:00
|
|
|
) -> IndexResult<Document> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let index = self
|
|
|
|
.store
|
|
|
|
.get(uuid)
|
|
|
|
.await?
|
|
|
|
.ok_or(IndexError::UnexistingIndex)?;
|
|
|
|
spawn_blocking(move || {
|
|
|
|
index
|
|
|
|
.retrieve_document(doc_id, attributes_to_retrieve)
|
|
|
|
.map_err(IndexError::Error)
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?
|
|
|
|
}
|
|
|
|
|
2021-04-22 10:14:29 +02:00
|
|
|
async fn handle_delete(&self, uuid: Uuid) -> IndexResult<()> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let index = self.store.delete(uuid).await?;
|
|
|
|
|
|
|
|
if let Some(index) = index {
|
|
|
|
tokio::task::spawn(async move {
|
|
|
|
let index = index.0;
|
|
|
|
let store = get_arc_ownership_blocking(index).await;
|
|
|
|
spawn_blocking(move || {
|
|
|
|
store.prepare_for_closing().wait();
|
|
|
|
debug!("Index closed");
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-04-22 10:14:29 +02:00
|
|
|
async fn handle_get_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
|
2021-03-23 11:00:50 +01:00
|
|
|
match self.store.get(uuid).await? {
|
|
|
|
Some(index) => {
|
|
|
|
let meta = spawn_blocking(move || IndexMeta::new(&index))
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))??;
|
|
|
|
Ok(meta)
|
|
|
|
}
|
|
|
|
None => Err(IndexError::UnexistingIndex),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn handle_update_index(
|
|
|
|
&self,
|
|
|
|
uuid: Uuid,
|
|
|
|
index_settings: IndexSettings,
|
2021-04-22 10:14:29 +02:00
|
|
|
) -> IndexResult<IndexMeta> {
|
2021-03-23 11:00:50 +01:00
|
|
|
let index = self
|
|
|
|
.store
|
|
|
|
.get(uuid)
|
|
|
|
.await?
|
|
|
|
.ok_or(IndexError::UnexistingIndex)?;
|
|
|
|
|
|
|
|
spawn_blocking(move || match index_settings.primary_key {
|
|
|
|
Some(ref primary_key) => {
|
|
|
|
let mut txn = index.write_txn()?;
|
|
|
|
if index.primary_key(&txn)?.is_some() {
|
|
|
|
return Err(IndexError::ExistingPrimaryKey);
|
|
|
|
}
|
|
|
|
index.put_primary_key(&mut txn, primary_key)?;
|
|
|
|
let meta = IndexMeta::new_txn(&index, &txn)?;
|
|
|
|
txn.commit()?;
|
|
|
|
Ok(meta)
|
|
|
|
}
|
|
|
|
None => {
|
|
|
|
let meta = IndexMeta::new(&index)?;
|
|
|
|
Ok(meta)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?
|
|
|
|
}
|
|
|
|
|
2021-04-22 10:14:29 +02:00
|
|
|
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
|
2021-03-23 11:00:50 +01:00
|
|
|
use tokio::fs::create_dir_all;
|
|
|
|
|
|
|
|
path.push("indexes");
|
|
|
|
create_dir_all(&path)
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?;
|
|
|
|
|
|
|
|
if let Some(index) = self.store.get(uuid).await? {
|
|
|
|
let mut index_path = path.join(format!("index-{}", uuid));
|
|
|
|
create_dir_all(&index_path)
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?;
|
|
|
|
index_path.push("data.mdb");
|
|
|
|
spawn_blocking(move || -> anyhow::Result<()> {
|
|
|
|
// Get write txn to wait for ongoing write transaction before snapshot.
|
|
|
|
let _txn = index.write_txn()?;
|
|
|
|
index
|
|
|
|
.env
|
|
|
|
.copy_to_path(index_path, CompactionOption::Enabled)?;
|
|
|
|
Ok(())
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?
|
2021-03-24 11:50:52 +01:00
|
|
|
.map_err(IndexError::Error)?;
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2021-04-01 16:44:42 +02:00
|
|
|
|
2021-05-10 20:24:14 +02:00
|
|
|
async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
|
2021-04-28 16:43:49 +02:00
|
|
|
use tokio::fs::create_dir_all;
|
|
|
|
|
|
|
|
path.push("indexes");
|
|
|
|
create_dir_all(&path)
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?;
|
|
|
|
|
|
|
|
if let Some(index) = self.store.get(uuid).await? {
|
|
|
|
let mut index_path = path.join(format!("index-{}", uuid));
|
|
|
|
create_dir_all(&index_path)
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?;
|
|
|
|
index_path.push("data.mdb");
|
|
|
|
spawn_blocking(move || -> anyhow::Result<()> {
|
|
|
|
// Get write txn to wait for ongoing write transaction before dump.
|
|
|
|
let _txn = index.write_txn()?;
|
|
|
|
index.env.copy_to_path(index_path, CompactionOption::Enabled)?;
|
|
|
|
Ok(())
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?
|
|
|
|
.map_err(IndexError::Error)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-05-10 20:24:14 +02:00
|
|
|
async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
|
2021-04-01 16:44:42 +02:00
|
|
|
let index = self
|
|
|
|
.store
|
|
|
|
.get(uuid)
|
|
|
|
.await?
|
|
|
|
.ok_or(IndexError::UnexistingIndex)?;
|
|
|
|
|
|
|
|
spawn_blocking(move || {
|
|
|
|
let rtxn = index.read_txn()?;
|
|
|
|
|
|
|
|
Ok(IndexStats {
|
2021-04-09 14:41:24 +02:00
|
|
|
size: index.size(),
|
2021-04-01 16:44:42 +02:00
|
|
|
number_of_documents: index.number_of_documents(&rtxn)?,
|
2021-04-14 18:55:04 +02:00
|
|
|
is_indexing: None,
|
2021-04-01 16:44:42 +02:00
|
|
|
fields_distribution: index.fields_distribution(&rtxn)?,
|
|
|
|
})
|
|
|
|
})
|
|
|
|
.await
|
|
|
|
.map_err(|e| IndexError::Error(e.into()))?
|
|
|
|
}
|
2021-03-23 11:00:50 +01:00
|
|
|
}
|