MeiliSearch/meilisearch-http/src/index_controller/index_actor/actor.rs

352 lines
10 KiB
Rust
Raw Normal View History

2021-03-23 11:00:50 +01:00
use std::fs::File;
use std::path::PathBuf;
use std::sync::Arc;
use async_stream::stream;
use futures::stream::StreamExt;
use heed::CompactionOption;
use log::debug;
2021-06-16 17:15:56 +02:00
use milli::update::UpdateBuilder;
2021-03-23 11:00:50 +01:00
use tokio::task::spawn_blocking;
2021-05-31 16:03:39 +02:00
use tokio::{fs, sync::mpsc};
2021-03-23 11:00:50 +01:00
use uuid::Uuid;
2021-05-31 16:03:39 +02:00
use crate::index::{
update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings,
};
2021-04-01 16:44:42 +02:00
use crate::index_controller::{
2021-05-31 16:03:39 +02:00
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
2021-04-01 16:44:42 +02:00
};
2021-03-23 11:00:50 +01:00
use crate::option::IndexerOpts;
2021-06-15 17:39:07 +02:00
use super::error::{IndexActorError, Result};
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
2021-04-01 16:44:42 +02:00
2021-04-14 17:53:12 +02:00
pub const CONCURRENT_INDEX_MSG: usize = 10;
2021-03-23 11:00:50 +01:00
pub struct IndexActor<S> {
2021-04-13 17:46:39 +02:00
receiver: Option<mpsc::Receiver<IndexMsg>>,
2021-03-23 11:00:50 +01:00
update_handler: Arc<UpdateHandler>,
store: S,
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
pub fn new(
receiver: mpsc::Receiver<IndexMsg>,
store: S,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let update_handler = UpdateHandler::new(options)?;
2021-03-23 11:00:50 +01:00
let update_handler = Arc::new(update_handler);
2021-04-13 17:46:39 +02:00
let receiver = Some(receiver);
2021-05-24 17:20:44 +02:00
Ok(Self {
receiver,
update_handler,
store,
})
2021-03-23 11:00:50 +01:00
}
2021-04-28 16:43:49 +02:00
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
/// through the read channel are processed concurrently, the messages sent through the write
/// channel are processed one at a time.
2021-03-23 11:00:50 +01:00
pub async fn run(mut self) {
2021-04-13 17:46:39 +02:00
let mut receiver = self
.receiver
2021-03-23 11:00:50 +01:00
.take()
.expect("Index Actor must have a inbox at this point.");
2021-04-13 17:46:39 +02:00
let stream = stream! {
2021-03-23 11:00:50 +01:00
loop {
2021-04-13 17:46:39 +02:00
match receiver.recv().await {
2021-03-23 11:00:50 +01:00
Some(msg) => yield msg,
None => break,
}
}
};
2021-04-14 17:53:12 +02:00
stream
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
.await;
2021-03-23 11:00:50 +01:00
}
async fn handle_message(&self, msg: IndexMsg) {
use IndexMsg::*;
match msg {
CreateIndex {
uuid,
primary_key,
ret,
} => {
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
}
2021-04-14 17:53:12 +02:00
Update {
ret,
meta,
data,
uuid,
} => {
2021-04-13 17:14:02 +02:00
let _ = ret.send(self.handle_update(uuid, meta, data).await);
2021-03-23 11:00:50 +01:00
}
Search { ret, query, uuid } => {
let _ = ret.send(self.handle_search(uuid, query).await);
}
Settings { ret, uuid } => {
let _ = ret.send(self.handle_settings(uuid).await);
}
Documents {
ret,
uuid,
attributes_to_retrieve,
offset,
limit,
} => {
let _ = ret.send(
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
.await,
);
}
Document {
uuid,
attributes_to_retrieve,
doc_id,
ret,
} => {
let _ = ret.send(
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
.await,
);
}
Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
GetMeta { uuid, ret } => {
let _ = ret.send(self.handle_get_meta(uuid).await);
}
UpdateIndex {
uuid,
index_settings,
ret,
} => {
let _ = ret.send(self.handle_update_index(uuid, index_settings).await);
}
Snapshot { uuid, path, ret } => {
let _ = ret.send(self.handle_snapshot(uuid, path).await);
}
2021-05-24 18:16:35 +02:00
Dump { uuid, path, ret } => {
let _ = ret.send(self.handle_dump(uuid, path).await);
2021-04-28 16:43:49 +02:00
}
2021-04-01 16:44:42 +02:00
GetStats { uuid, ret } => {
let _ = ret.send(self.handle_get_stats(uuid).await);
}
2021-03-23 11:00:50 +01:00
}
}
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
2021-03-23 11:00:50 +01:00
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result = spawn_blocking(move || index.perform_search(query)).await??;
Ok(result)
2021-03-23 11:00:50 +01:00
}
async fn handle_create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> Result<IndexMeta> {
2021-03-23 11:00:50 +01:00
let index = self.store.create(uuid, primary_key).await?;
2021-05-24 17:20:44 +02:00
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
2021-03-23 11:00:50 +01:00
Ok(meta)
}
async fn handle_update(
&self,
2021-04-13 17:14:02 +02:00
uuid: Uuid,
2021-04-22 10:14:29 +02:00
meta: Processing,
data: Option<File>,
) -> Result<std::result::Result<Processed, Failed>> {
2021-04-19 09:47:43 +02:00
debug!("Processing update {}", meta.id());
let update_handler = self.update_handler.clone();
let index = match self.store.get(uuid).await? {
Some(index) => index,
None => self.store.create(uuid, None).await?,
2021-04-13 17:14:02 +02:00
};
2021-06-02 15:20:32 +02:00
Ok(spawn_blocking(move || update_handler.handle_update(meta, data, index)).await?)
2021-03-23 11:00:50 +01:00
}
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
2021-03-23 11:00:50 +01:00
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
2021-05-24 17:20:44 +02:00
let result = spawn_blocking(move || index.settings()).await??;
Ok(result)
2021-03-23 11:00:50 +01:00
}
async fn handle_fetch_documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
2021-03-23 11:00:50 +01:00
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
2021-05-24 17:20:44 +02:00
let result =
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
.await??;
Ok(result)
2021-03-23 11:00:50 +01:00
}
async fn handle_fetch_document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
2021-03-23 11:00:50 +01:00
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
2021-05-24 17:20:44 +02:00
let result =
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
.await??;
Ok(result)
2021-03-23 11:00:50 +01:00
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
2021-03-23 11:00:50 +01:00
let index = self.store.delete(uuid).await?;
if let Some(index) = index {
tokio::task::spawn(async move {
let index = index.0;
let store = get_arc_ownership_blocking(index).await;
spawn_blocking(move || {
store.prepare_for_closing().wait();
debug!("Index closed");
});
});
}
Ok(())
}
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
2021-03-23 11:00:50 +01:00
match self.store.get(uuid).await? {
Some(index) => {
2021-05-24 17:20:44 +02:00
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
2021-03-23 11:00:50 +01:00
Ok(meta)
}
None => Err(IndexActorError::UnexistingIndex),
2021-03-23 11:00:50 +01:00
}
}
async fn handle_update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
2021-03-23 11:00:50 +01:00
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
2021-03-23 11:00:50 +01:00
2021-05-24 17:20:44 +02:00
let result = spawn_blocking(move || match index_settings.primary_key {
2021-06-16 17:15:56 +02:00
Some(primary_key) => {
2021-03-23 11:00:50 +01:00
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
return Err(IndexActorError::ExistingPrimaryKey);
2021-03-23 11:00:50 +01:00
}
2021-06-16 17:15:56 +02:00
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
builder.set_primary_key(primary_key);
2021-06-17 14:36:32 +02:00
builder.execute(|_, _| ())?;
2021-03-23 11:00:50 +01:00
let meta = IndexMeta::new_txn(&index, &txn)?;
txn.commit()?;
Ok(meta)
}
None => {
let meta = IndexMeta::new(&index)?;
Ok(meta)
}
})
2021-05-24 17:20:44 +02:00
.await??;
Ok(result)
2021-03-23 11:00:50 +01:00
}
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
2021-03-23 11:00:50 +01:00
use tokio::fs::create_dir_all;
path.push("indexes");
2021-05-24 17:20:44 +02:00
create_dir_all(&path).await?;
2021-03-23 11:00:50 +01:00
if let Some(index) = self.store.get(uuid).await? {
let mut index_path = path.join(format!("index-{}", uuid));
2021-05-24 17:20:44 +02:00
create_dir_all(&index_path).await?;
2021-03-23 11:00:50 +01:00
index_path.push("data.mdb");
spawn_blocking(move || -> Result<()> {
2021-03-23 11:00:50 +01:00
// Get write txn to wait for ongoing write transaction before snapshot.
let _txn = index.write_txn()?;
index
.env
.copy_to_path(index_path, CompactionOption::Enabled)?;
Ok(())
})
2021-05-24 17:20:44 +02:00
.await??;
2021-03-23 11:00:50 +01:00
}
Ok(())
}
2021-04-01 16:44:42 +02:00
2021-05-05 14:11:56 +02:00
/// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the
/// documents and all the settings.
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
2021-05-24 18:16:35 +02:00
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
2021-05-05 14:11:56 +02:00
2021-05-24 18:16:35 +02:00
let path = path.join(format!("indexes/index-{}/", uuid));
fs::create_dir_all(&path).await?;
2021-05-05 14:11:56 +02:00
2021-05-24 18:16:35 +02:00
tokio::task::spawn_blocking(move || index.dump(path)).await??;
2021-04-28 16:43:49 +02:00
Ok(())
}
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
2021-04-01 16:44:42 +02:00
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
2021-04-01 16:44:42 +02:00
spawn_blocking(move || {
let rtxn = index.read_txn()?;
Ok(IndexStats {
size: index.size(),
2021-06-17 14:36:32 +02:00
number_of_documents: index.number_of_documents(&rtxn)?,
2021-04-14 18:55:04 +02:00
is_indexing: None,
2021-06-21 16:59:27 +02:00
field_distribution: index.field_distribution(&rtxn)?,
2021-04-01 16:44:42 +02:00
})
})
2021-05-24 17:20:44 +02:00
.await?
2021-04-01 16:44:42 +02:00
}
2021-03-23 11:00:50 +01:00
}