mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
split meilisearch-http and meilisearch-lib
This commit is contained in:
parent
09d4e37044
commit
60518449fc
63 changed files with 608 additions and 324 deletions
|
@ -4,8 +4,8 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
|||
use log::debug;
|
||||
use serde::Serialize;
|
||||
use siphasher::sip::SipHasher;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
|
||||
use crate::Data;
|
||||
use crate::Opt;
|
||||
|
||||
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
|
||||
|
@ -18,8 +18,8 @@ struct EventProperties {
|
|||
}
|
||||
|
||||
impl EventProperties {
|
||||
async fn from(data: Data) -> anyhow::Result<EventProperties> {
|
||||
let stats = data.index_controller.get_all_stats().await?;
|
||||
async fn from(data: MeiliSearch) -> anyhow::Result<EventProperties> {
|
||||
let stats = data.get_all_stats().await?;
|
||||
|
||||
let database_size = stats.database_size;
|
||||
let last_update_timestamp = stats.last_update.map(|u| u.timestamp());
|
||||
|
@ -62,7 +62,7 @@ struct AmplitudeRequest<'a> {
|
|||
events: Vec<Event<'a>>,
|
||||
}
|
||||
|
||||
pub async fn analytics_sender(data: Data, opt: Opt) {
|
||||
pub async fn analytics_sender(data: MeiliSearch, opt: Opt) {
|
||||
let username = whoami::username();
|
||||
let hostname = whoami::hostname();
|
||||
let platform = whoami::platform();
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::index::{Checked, Settings};
|
||||
use crate::index_controller::{
|
||||
error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats,
|
||||
};
|
||||
use crate::option::Opt;
|
||||
|
||||
pub mod search;
|
||||
mod updates;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Data {
|
||||
inner: Arc<DataInner>,
|
||||
}
|
||||
|
||||
impl Deref for Data {
|
||||
type Target = DataInner;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DataInner {
|
||||
pub index_controller: IndexController,
|
||||
//pub api_keys: ApiKeys,
|
||||
}
|
||||
|
||||
impl Data {
|
||||
pub fn new(options: Opt) -> anyhow::Result<Data> {
|
||||
let path = options.db_path.clone();
|
||||
|
||||
let index_controller = IndexController::new(&path, &options)?;
|
||||
|
||||
let inner = DataInner {
|
||||
index_controller,
|
||||
};
|
||||
let inner = Arc::new(inner);
|
||||
|
||||
Ok(Data { inner })
|
||||
}
|
||||
|
||||
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
|
||||
self.index_controller.settings(uid).await
|
||||
}
|
||||
|
||||
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
|
||||
self.index_controller.list_indexes().await
|
||||
}
|
||||
|
||||
pub async fn index(&self, uid: String) -> Result<IndexMetadata> {
|
||||
self.index_controller.get_index(uid).await
|
||||
}
|
||||
|
||||
//pub async fn create_index(
|
||||
//&self,
|
||||
//uid: String,
|
||||
//primary_key: Option<String>,
|
||||
//) -> Result<IndexMetadata> {
|
||||
//let settings = IndexSettings {
|
||||
//uid: Some(uid),
|
||||
//primary_key,
|
||||
//};
|
||||
|
||||
//let meta = self.index_controller.create_index(settings).await?;
|
||||
//Ok(meta)
|
||||
//}
|
||||
|
||||
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
|
||||
Ok(self.index_controller.get_index_stats(uid).await?)
|
||||
}
|
||||
|
||||
pub async fn get_all_stats(&self) -> Result<Stats> {
|
||||
Ok(self.index_controller.get_all_stats().await?)
|
||||
}
|
||||
|
||||
pub async fn create_dump(&self) -> Result<DumpInfo> {
|
||||
Ok(self.index_controller.create_dump().await?)
|
||||
}
|
||||
|
||||
pub async fn dump_status(&self, uid: String) -> Result<DumpInfo> {
|
||||
Ok(self.index_controller.dump_info(uid).await?)
|
||||
}
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
use serde_json::{Map, Value};
|
||||
|
||||
use super::Data;
|
||||
use crate::index::{SearchQuery, SearchResult};
|
||||
use crate::index_controller::error::Result;
|
||||
|
||||
impl Data {
|
||||
pub async fn search(&self, index: String, search_query: SearchQuery) -> Result<SearchResult> {
|
||||
self.index_controller.search(index, search_query).await
|
||||
}
|
||||
|
||||
pub async fn retrieve_documents(
|
||||
&self,
|
||||
index: String,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Map<String, Value>>> {
|
||||
self.index_controller
|
||||
.documents(index, offset, limit, attributes_to_retrieve)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn retrieve_document(
|
||||
&self,
|
||||
index: String,
|
||||
document_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Map<String, Value>> {
|
||||
self.index_controller
|
||||
.document(index, document_id, attributes_to_retrieve)
|
||||
.await
|
||||
}
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
use crate::index_controller::Update;
|
||||
use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus};
|
||||
use crate::Data;
|
||||
|
||||
impl Data {
|
||||
pub async fn register_update(&self, index_uid: &str, update: Update) -> Result<UpdateStatus> {
|
||||
let status = self.index_controller.register_update(index_uid, update).await?;
|
||||
Ok(status)
|
||||
}
|
||||
|
||||
pub async fn get_update_status(&self, index: String, uid: u64) -> Result<UpdateStatus> {
|
||||
self.index_controller.update_status(index, uid).await
|
||||
}
|
||||
|
||||
pub async fn get_updates_status(&self, index: String) -> Result<Vec<UpdateStatus>> {
|
||||
self.index_controller.all_update_status(index).await
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
&self,
|
||||
uid: String,
|
||||
primary_key: Option<String>,
|
||||
new_uid: Option<String>,
|
||||
) -> Result<IndexMetadata> {
|
||||
let settings = IndexSettings {
|
||||
uid: new_uid,
|
||||
primary_key,
|
||||
};
|
||||
|
||||
self.index_controller.update_index(uid, settings).await
|
||||
}
|
||||
}
|
|
@ -55,18 +55,6 @@ impl aweb::error::ResponseError for ResponseError {
|
|||
}
|
||||
}
|
||||
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
$(
|
||||
impl From<$other> for $target {
|
||||
fn from(other: $other) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MilliError<'a>(pub &'a milli::Error);
|
||||
|
||||
|
|
|
@ -1,166 +0,0 @@
|
|||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use heed::RoTxn;
|
||||
use indexmap::IndexMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::option::IndexerOpts;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{Index, Settings, Unchecked};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct DumpMeta {
|
||||
settings: Settings<Unchecked>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
const META_FILE_NAME: &str = "meta.json";
|
||||
const DATA_FILE_NAME: &str = "documents.jsonl";
|
||||
|
||||
impl Index {
|
||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
// acquire write txn make sure any ongoing write is finished before we start.
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
self.dump_documents(&txn, &path)?;
|
||||
self.dump_meta(&txn, &path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
|
||||
let mut document_file = File::create(&document_file_path)?;
|
||||
|
||||
let documents = self.all_documents(txn)?;
|
||||
let fields_ids_map = self.fields_ids_map(txn)?;
|
||||
|
||||
// dump documents
|
||||
let mut json_map = IndexMap::new();
|
||||
for document in documents {
|
||||
let (_, reader) = document?;
|
||||
|
||||
for (fid, bytes) in reader.iter() {
|
||||
if let Some(name) = fields_ids_map.name(fid) {
|
||||
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut document_file, &json_map)?;
|
||||
document_file.write_all(b"\n")?;
|
||||
|
||||
json_map.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let meta_file_path = path.as_ref().join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_file_path)?;
|
||||
|
||||
let settings = self.settings_txn(txn)?.into_unchecked();
|
||||
let primary_key = self.primary_key(txn)?.map(String::from);
|
||||
let meta = DumpMeta {
|
||||
settings,
|
||||
primary_key,
|
||||
};
|
||||
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_size: usize,
|
||||
_indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
//let dir_name = src
|
||||
//.as_ref()
|
||||
//.file_name()
|
||||
//.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||
|
||||
//let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||
//create_dir_all(&dst_dir_path)?;
|
||||
|
||||
//let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||
//let mut meta_file = File::open(meta_path)?;
|
||||
|
||||
//// We first deserialize the dump meta into a serde_json::Value and change
|
||||
//// the custom ranking rules settings from the old format to the new format.
|
||||
//let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||
//if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||
//convert_custom_ranking_rules(ranking_rules);
|
||||
//}
|
||||
|
||||
//// Then we serialize it back into a vec to deserialize it
|
||||
//// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
||||
//let patched_meta = serde_json::to_vec(&meta)?;
|
||||
|
||||
//let DumpMeta {
|
||||
//settings,
|
||||
//primary_key,
|
||||
//} = serde_json::from_slice(&patched_meta)?;
|
||||
//let settings = settings.check();
|
||||
//let index = Self::open(&dst_dir_path, size)?;
|
||||
//let mut txn = index.write_txn()?;
|
||||
|
||||
//let handler = UpdateHandler::new(indexing_options)?;
|
||||
|
||||
//index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
|
||||
|
||||
//let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||
//let reader = File::open(&document_file_path)?;
|
||||
//let mut reader = BufReader::new(reader);
|
||||
//reader.fill_buf()?;
|
||||
// If the document file is empty, we don't perform the document addition, to prevent
|
||||
// a primary key error to be thrown.
|
||||
|
||||
todo!("fix obk document dumps")
|
||||
//if !reader.buffer().is_empty() {
|
||||
//index.update_documents_txn(
|
||||
//&mut txn,
|
||||
//IndexDocumentsMethod::UpdateDocuments,
|
||||
//Some(reader),
|
||||
//handler.update_builder(0),
|
||||
//primary_key.as_deref(),
|
||||
//)?;
|
||||
//}
|
||||
|
||||
//txn.commit()?;
|
||||
|
||||
//match Arc::try_unwrap(index.0) {
|
||||
//Ok(inner) => inner.prepare_for_closing().wait(),
|
||||
//Err(_) => bail!("Could not close index properly."),
|
||||
//}
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||
// ///
|
||||
// /// This is done for compatibility reasons, and to avoid a new dump version,
|
||||
// /// since the new syntax was introduced soon after the new dump version.
|
||||
//fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
//*ranking_rules = match ranking_rules.take() {
|
||||
//Value::Array(values) => values
|
||||
//.into_iter()
|
||||
//.filter_map(|value| match value {
|
||||
//Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:asc", f))
|
||||
//.map(Value::String),
|
||||
//Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:desc", f))
|
||||
//.map(Value::String),
|
||||
//otherwise => Some(otherwise),
|
||||
//})
|
||||
//.collect(),
|
||||
//otherwise => otherwise,
|
||||
//}
|
||||
//}
|
|
@ -1,52 +0,0 @@
|
|||
use std::error::Error;
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::MilliError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexError {
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("Document with id {0} not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("{0}")]
|
||||
Facet(#[from] FacetError),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
IndexError: std::io::Error,
|
||||
heed::Error,
|
||||
fst::Error,
|
||||
serde_json::Error
|
||||
);
|
||||
|
||||
impl ErrorCode for IndexError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexError::Internal(_) => Code::Internal,
|
||||
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
IndexError::Facet(e) => e.error_code(),
|
||||
IndexError::Milli(e) => MilliError(e).error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum FacetError {
|
||||
#[error("Invalid facet expression, expected {}, found: {1}", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
}
|
||||
|
||||
impl ErrorCode for FacetError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
FacetError::InvalidExpression(_, _) => Code::Facet,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,202 +0,0 @@
|
|||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::fs::create_dir_all;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use heed::{EnvOpenOptions, RoTxn};
|
||||
use milli::update::Setting;
|
||||
use milli::{obkv_to_json, FieldId};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use error::Result;
|
||||
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
||||
pub use updates::{Checked, Facets, Settings, Unchecked};
|
||||
|
||||
use crate::helpers::EnvSizer;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
|
||||
use self::error::IndexError;
|
||||
|
||||
pub mod error;
|
||||
pub mod update_handler;
|
||||
|
||||
mod dump;
|
||||
mod search;
|
||||
mod updates;
|
||||
|
||||
pub type Document = Map<String, Value>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub inner: Arc<milli::Index>,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
}
|
||||
|
||||
impl Deref for Index {
|
||||
type Target = milli::Index;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.inner.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn open(path: impl AsRef<Path>, size: usize, update_file_store: Arc<UpdateFileStore>) -> Result<Self> {
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let inner = Arc::new(milli::Index::new(options, &path)?);
|
||||
Ok(Index { inner, update_file_store })
|
||||
}
|
||||
|
||||
pub fn settings(&self) -> Result<Settings<Checked>> {
|
||||
let txn = self.read_txn()?;
|
||||
self.settings_txn(&txn)
|
||||
}
|
||||
|
||||
pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> {
|
||||
let displayed_attributes = self
|
||||
.displayed_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let searchable_attributes = self
|
||||
.searchable_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let criteria = self
|
||||
.criteria(txn)?
|
||||
.into_iter()
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
|
||||
let stop_words = self
|
||||
.stop_words(txn)?
|
||||
.map(|stop_words| -> Result<BTreeSet<_>> {
|
||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_else(BTreeSet::new);
|
||||
let distinct_field = self.distinct_field(txn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = self
|
||||
.synonyms(txn)?
|
||||
.iter()
|
||||
.map(|(key, values)| {
|
||||
(
|
||||
key.join(" "),
|
||||
values.iter().map(|value| value.join(" ")).collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
searchable_attributes: match searchable_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn retrieve_documents<S: AsRef<str>>(
|
||||
&self,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Vec<Map<String, Value>>> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
let fields_to_display =
|
||||
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
||||
|
||||
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for entry in iter {
|
||||
let (_id, obkv) = entry?;
|
||||
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
|
||||
documents.push(object);
|
||||
}
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub fn retrieve_document<S: AsRef<str>>(
|
||||
&self,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Map<String, Value>> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
|
||||
let fields_to_display =
|
||||
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
||||
|
||||
let internal_id = self
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
|
||||
|
||||
let document = self
|
||||
.documents(&txn, std::iter::once(internal_id))?
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|(_, d)| d)
|
||||
.ok_or(IndexError::DocumentNotFound(doc_id))?;
|
||||
|
||||
let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)?;
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.env.size()
|
||||
}
|
||||
|
||||
fn fields_to_display<S: AsRef<str>>(
|
||||
&self,
|
||||
txn: &heed::RoTxn,
|
||||
attributes_to_retrieve: &Option<Vec<S>>,
|
||||
fields_ids_map: &milli::FieldsIdsMap,
|
||||
) -> Result<Vec<FieldId>> {
|
||||
let mut displayed_fields_ids = match self.displayed_fields_ids(txn)? {
|
||||
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
let attributes_to_retrieve_ids = match attributes_to_retrieve {
|
||||
Some(attrs) => attrs
|
||||
.iter()
|
||||
.filter_map(|f| fields_ids_map.id(f.as_ref()))
|
||||
.collect::<HashSet<_>>(),
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
|
||||
Ok(displayed_fields_ids)
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,72 +0,0 @@
|
|||
use crate::index::Index;
|
||||
use milli::update::UpdateBuilder;
|
||||
use milli::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::index_controller::update_actor::RegisterUpdate;
|
||||
use crate::index_controller::{Failed, Processed, Processing};
|
||||
use crate::option::IndexerOpts;
|
||||
|
||||
pub struct UpdateHandler {
|
||||
max_nb_chunks: Option<usize>,
|
||||
chunk_compression_level: Option<u32>,
|
||||
thread_pool: ThreadPool,
|
||||
log_frequency: usize,
|
||||
max_memory: Option<usize>,
|
||||
chunk_compression_type: CompressionType,
|
||||
}
|
||||
|
||||
impl UpdateHandler {
|
||||
pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> {
|
||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2))
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
max_nb_chunks: opt.max_nb_chunks,
|
||||
chunk_compression_level: opt.chunk_compression_level,
|
||||
thread_pool,
|
||||
log_frequency: opt.log_every_n,
|
||||
max_memory: opt.max_memory.map(|m| m.get_bytes() as usize),
|
||||
chunk_compression_type: opt.chunk_compression_type,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn update_builder(&self, update_id: u64) -> UpdateBuilder {
|
||||
// We prepare the update by using the update builder.
|
||||
let mut update_builder = UpdateBuilder::new(update_id);
|
||||
if let Some(max_nb_chunks) = self.max_nb_chunks {
|
||||
update_builder.max_nb_chunks(max_nb_chunks);
|
||||
}
|
||||
if let Some(chunk_compression_level) = self.chunk_compression_level {
|
||||
update_builder.chunk_compression_level(chunk_compression_level);
|
||||
}
|
||||
update_builder.thread_pool(&self.thread_pool);
|
||||
update_builder.log_every_n(self.log_frequency);
|
||||
if let Some(max_memory) = self.max_memory {
|
||||
update_builder.max_memory(max_memory);
|
||||
}
|
||||
update_builder.chunk_compression_type(self.chunk_compression_type);
|
||||
update_builder
|
||||
}
|
||||
|
||||
pub fn handle_update(
|
||||
&self,
|
||||
index: Index,
|
||||
meta: Processing,
|
||||
) -> Result<Processed, Failed> {
|
||||
let update_id = meta.id();
|
||||
let update_builder = self.update_builder(update_id);
|
||||
|
||||
let result = match meta.meta() {
|
||||
RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => {
|
||||
index.update_documents(*method, *content_uuid, update_builder, primary_key.as_deref())
|
||||
}
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(result) => Ok(meta.process(result)),
|
||||
Err(e) => Err(meta.fail(e.into())),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,366 +0,0 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::marker::PhantomData;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use log::{debug, info, trace};
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::UpdateResult;
|
||||
|
||||
use super::Index;
|
||||
use super::error::Result;
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let wildcard = vec!["*".to_string()];
|
||||
match field {
|
||||
Setting::Set(value) => Some(value),
|
||||
Setting::Reset => Some(&wildcard),
|
||||
Setting::NotSet => None,
|
||||
}
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize)]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
|
||||
pub struct Unchecked;
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Settings<Checked> {
|
||||
pub fn cleared() -> Settings<Checked> {
|
||||
Settings {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
||||
let Self {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
..
|
||||
} = self;
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Settings<Unchecked> {
|
||||
pub fn check(self) -> Settings<Checked> {
|
||||
let displayed_attributes = match self.displayed_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
let searchable_attributes = match self.searchable_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Facets {
|
||||
pub level_group_size: Option<NonZeroUsize>,
|
||||
pub min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn update_documents(
|
||||
&self,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
update_builder: UpdateBuilder,
|
||||
primary_key: Option<&str>,
|
||||
) -> Result<UpdateResult> {
|
||||
let mut txn = self.write_txn()?;
|
||||
let result = self.update_documents_txn(&mut txn, method, content_uuid, update_builder, primary_key)?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn update_documents_txn<'a, 'b>(
|
||||
&'a self,
|
||||
txn: &mut heed::RwTxn<'a, 'b>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
update_builder: UpdateBuilder,
|
||||
primary_key: Option<&str>,
|
||||
) -> Result<UpdateResult> {
|
||||
trace!("performing document addition");
|
||||
|
||||
// Set the primary key if not set already, ignore if already set.
|
||||
if let (None, Some(primary_key)) = (self.primary_key(txn)?, primary_key) {
|
||||
let mut builder = UpdateBuilder::new(0).settings(txn, self);
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
builder.execute(|_, _| ())?;
|
||||
}
|
||||
|
||||
let indexing_callback =
|
||||
|indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step);
|
||||
|
||||
let content_file = self.update_file_store.get_update(content_uuid).unwrap();
|
||||
let reader = DocumentBatchReader::from_reader(content_file).unwrap();
|
||||
|
||||
let mut builder = update_builder.index_documents(txn, self);
|
||||
builder.index_documents_method(method);
|
||||
let addition = builder.execute(reader, indexing_callback)?;
|
||||
|
||||
info!("document addition done: {:?}", addition);
|
||||
|
||||
Ok(UpdateResult::DocumentsAddition(addition))
|
||||
}
|
||||
|
||||
//pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> {
|
||||
//// We must use the write transaction of the update here.
|
||||
//let mut wtxn = self.write_txn()?;
|
||||
//let builder = update_builder.clear_documents(&mut wtxn, self);
|
||||
|
||||
//let _count = builder.execute()?;
|
||||
|
||||
//wtxn.commit()
|
||||
//.and(Ok(UpdateResult::Other))
|
||||
//.map_err(Into::into)
|
||||
//}
|
||||
|
||||
//pub fn update_settings_txn<'a, 'b>(
|
||||
//&'a self,
|
||||
//txn: &mut heed::RwTxn<'a, 'b>,
|
||||
//settings: &Settings<Checked>,
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//// We must use the write transaction of the update here.
|
||||
//let mut builder = update_builder.settings(txn, self);
|
||||
|
||||
//match settings.searchable_attributes {
|
||||
//Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||
//Setting::Reset => builder.reset_searchable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.displayed_attributes {
|
||||
//Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||
//Setting::Reset => builder.reset_displayed_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.filterable_attributes {
|
||||
//Setting::Set(ref facets) => {
|
||||
//builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_filterable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.sortable_attributes {
|
||||
//Setting::Set(ref fields) => {
|
||||
//builder.set_sortable_fields(fields.iter().cloned().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_sortable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.ranking_rules {
|
||||
//Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
||||
//Setting::Reset => builder.reset_criteria(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.stop_words {
|
||||
//Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||
//Setting::Reset => builder.reset_stop_words(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.synonyms {
|
||||
//Setting::Set(ref synonyms) => {
|
||||
//builder.set_synonyms(synonyms.clone().into_iter().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_synonyms(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.distinct_attribute {
|
||||
//Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||
//Setting::Reset => builder.reset_distinct_field(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//builder.execute(|indexing_step, update_id| {
|
||||
//debug!("update {}: {:?}", update_id, indexing_step)
|
||||
//})?;
|
||||
|
||||
//Ok(UpdateResult::Other)
|
||||
//}
|
||||
|
||||
//pub fn update_settings(
|
||||
//&self,
|
||||
//settings: &Settings<Checked>,
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//let mut txn = self.write_txn()?;
|
||||
//let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
|
||||
//txn.commit()?;
|
||||
//Ok(result)
|
||||
//}
|
||||
|
||||
//pub fn delete_documents(
|
||||
//&self,
|
||||
//document_ids: &[String],
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//let mut txn = self.write_txn()?;
|
||||
//let mut builder = update_builder.delete_documents(&mut txn, self)?;
|
||||
|
||||
//// We ignore unexisting document ids
|
||||
//document_ids.iter().for_each(|id| {
|
||||
//builder.delete_external_id(id);
|
||||
//});
|
||||
|
||||
//let deleted = builder.execute()?;
|
||||
//txn.commit()
|
||||
//.and(Ok(UpdateResult::DocumentDeletion { deleted }))
|
||||
//.map_err(Into::into)
|
||||
//}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_setting_check() {
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.clone().check();
|
||||
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
||||
assert_eq!(
|
||||
settings.searchable_attributes,
|
||||
checked.searchable_attributes
|
||||
);
|
||||
|
||||
// test wildcard
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.check();
|
||||
assert_eq!(checked.displayed_attributes, Setting::Reset);
|
||||
assert_eq!(checked.searchable_attributes, Setting::Reset);
|
||||
}
|
||||
}
|
|
@ -1,157 +0,0 @@
|
|||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
use chrono::Utc;
|
||||
use futures::{lock::Mutex, stream::StreamExt};
|
||||
use log::{error, trace};
|
||||
use tokio::sync::{mpsc, oneshot, RwLock};
|
||||
use update_actor::UpdateActorHandle;
|
||||
use uuid_resolver::UuidResolverHandle;
|
||||
|
||||
use super::error::{DumpActorError, Result};
|
||||
use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask};
|
||||
use crate::index_controller::{update_actor, uuid_resolver};
|
||||
|
||||
pub const CONCURRENT_DUMP_MSG: usize = 10;
|
||||
|
||||
pub struct DumpActor<UuidResolver, Update> {
|
||||
inbox: Option<mpsc::Receiver<DumpMsg>>,
|
||||
uuid_resolver: UuidResolver,
|
||||
update: Update,
|
||||
dump_path: PathBuf,
|
||||
lock: Arc<Mutex<()>>,
|
||||
dump_infos: Arc<RwLock<HashMap<String, DumpInfo>>>,
|
||||
update_db_size: usize,
|
||||
index_db_size: usize,
|
||||
}
|
||||
|
||||
/// Generate uid from creation date
|
||||
fn generate_uid() -> String {
|
||||
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
||||
}
|
||||
|
||||
impl<UuidResolver, Update> DumpActor<UuidResolver, Update>
|
||||
where
|
||||
UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||
Update: UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
inbox: mpsc::Receiver<DumpMsg>,
|
||||
uuid_resolver: UuidResolver,
|
||||
update: Update,
|
||||
dump_path: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
) -> Self {
|
||||
let dump_infos = Arc::new(RwLock::new(HashMap::new()));
|
||||
let lock = Arc::new(Mutex::new(()));
|
||||
Self {
|
||||
inbox: Some(inbox),
|
||||
uuid_resolver,
|
||||
update,
|
||||
dump_path: dump_path.as_ref().into(),
|
||||
dump_infos,
|
||||
lock,
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
trace!("Started dump actor.");
|
||||
|
||||
let mut inbox = self
|
||||
.inbox
|
||||
.take()
|
||||
.expect("Dump Actor must have a inbox at this point.");
|
||||
|
||||
let stream = stream! {
|
||||
loop {
|
||||
match inbox.recv().await {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stream
|
||||
.for_each_concurrent(Some(CONCURRENT_DUMP_MSG), |msg| self.handle_message(msg))
|
||||
.await;
|
||||
|
||||
error!("Dump actor stopped.");
|
||||
}
|
||||
|
||||
async fn handle_message(&self, msg: DumpMsg) {
|
||||
use DumpMsg::*;
|
||||
|
||||
match msg {
|
||||
CreateDump { ret } => {
|
||||
let _ = self.handle_create_dump(ret).await;
|
||||
}
|
||||
DumpInfo { ret, uid } => {
|
||||
let _ = ret.send(self.handle_dump_info(uid).await);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_create_dump(&self, ret: oneshot::Sender<Result<DumpInfo>>) {
|
||||
let uid = generate_uid();
|
||||
let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress);
|
||||
|
||||
let _lock = match self.lock.try_lock() {
|
||||
Some(lock) => lock,
|
||||
None => {
|
||||
ret.send(Err(DumpActorError::DumpAlreadyRunning))
|
||||
.expect("Dump actor is dead");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
self.dump_infos
|
||||
.write()
|
||||
.await
|
||||
.insert(uid.clone(), info.clone());
|
||||
|
||||
ret.send(Ok(info)).expect("Dump actor is dead");
|
||||
|
||||
let task = DumpTask {
|
||||
path: self.dump_path.clone(),
|
||||
uuid_resolver: self.uuid_resolver.clone(),
|
||||
update_handle: self.update.clone(),
|
||||
uid: uid.clone(),
|
||||
update_db_size: self.update_db_size,
|
||||
index_db_size: self.index_db_size,
|
||||
};
|
||||
|
||||
let task_result = tokio::task::spawn(task.run()).await;
|
||||
|
||||
let mut dump_infos = self.dump_infos.write().await;
|
||||
let dump_infos = dump_infos
|
||||
.get_mut(&uid)
|
||||
.expect("dump entry deleted while lock was acquired");
|
||||
|
||||
match task_result {
|
||||
Ok(Ok(())) => {
|
||||
dump_infos.done();
|
||||
trace!("Dump succeed");
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
dump_infos.with_error(e.to_string());
|
||||
error!("Dump failed: {}", e);
|
||||
}
|
||||
Err(_) => {
|
||||
dump_infos.with_error("Unexpected error while performing dump.".to_string());
|
||||
error!("Dump panicked. Dump status set to failed");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
async fn handle_dump_info(&self, uid: String) -> Result<DumpInfo> {
|
||||
match self.dump_infos.read().await.get(&uid) {
|
||||
Some(info) => Ok(info.clone()),
|
||||
_ => Err(DumpActorError::DumpDoesNotExist(uid)),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
use crate::index_controller::update_actor::error::UpdateActorError;
|
||||
use crate::index_controller::uuid_resolver::error::UuidResolverError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, DumpActorError>;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum DumpActorError {
|
||||
#[error("Another dump is already in progress")]
|
||||
DumpAlreadyRunning,
|
||||
#[error("Dump `{0}` not found")]
|
||||
DumpDoesNotExist(String),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
UuidResolver(#[from] UuidResolverError),
|
||||
#[error("{0}")]
|
||||
UpdateActor(#[from] UpdateActorError),
|
||||
}
|
||||
|
||||
macro_rules! internal_error {
|
||||
($($other:path), *) => {
|
||||
$(
|
||||
impl From<$other> for DumpActorError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
heed::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
serde_json::error::Error,
|
||||
tempfile::PersistError
|
||||
);
|
||||
|
||||
impl ErrorCode for DumpActorError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress,
|
||||
DumpActorError::DumpDoesNotExist(_) => Code::NotFound,
|
||||
DumpActorError::Internal(_) => Code::Internal,
|
||||
DumpActorError::UuidResolver(e) => e.error_code(),
|
||||
DumpActorError::UpdateActor(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
use std::path::Path;
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DumpActorHandleImpl {
|
||||
sender: mpsc::Sender<DumpMsg>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl DumpActorHandle for DumpActorHandleImpl {
|
||||
async fn create_dump(&self) -> Result<DumpInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = DumpMsg::CreateDump { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
|
||||
async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = DumpMsg::DumpInfo { ret, uid };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpActorHandleImpl {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl,
|
||||
update: crate::index_controller::update_actor::UpdateActorHandleImpl,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (sender, receiver) = mpsc::channel(10);
|
||||
let actor = DumpActor::new(
|
||||
receiver,
|
||||
uuid_resolver,
|
||||
update,
|
||||
path,
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
);
|
||||
|
||||
tokio::task::spawn(actor.run());
|
||||
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
pub mod v1;
|
||||
pub mod v2;
|
|
@ -1,224 +0,0 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::marker::PhantomData;
|
||||
use std::path::Path;
|
||||
|
||||
use log::{error, info, warn};
|
||||
use milli::update::Setting;
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata};
|
||||
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
|
||||
use crate::{
|
||||
index::Unchecked,
|
||||
option::IndexerOpts,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MetadataV1 {
|
||||
db_version: String,
|
||||
indexes: Vec<IndexMetadata>,
|
||||
}
|
||||
|
||||
impl MetadataV1 {
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
size: usize,
|
||||
indexer_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"Loading dump, dump database version: {}, dump version: V1",
|
||||
self.db_version
|
||||
);
|
||||
|
||||
let uuid_store = HeedUuidStore::new(&dst)?;
|
||||
for index in self.indexes {
|
||||
let uuid = Uuid::new_v4();
|
||||
uuid_store.insert(index.uid.clone(), uuid)?;
|
||||
let src = src.as_ref().join(index.uid);
|
||||
load_index(
|
||||
&src,
|
||||
&dst,
|
||||
uuid,
|
||||
index.meta.primary_key.as_deref(),
|
||||
size,
|
||||
indexer_options,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result<Option<T>, D::Error>
|
||||
where
|
||||
T: Deserialize<'de>,
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(Some)
|
||||
}
|
||||
|
||||
// These are the settings used in legacy meilisearch (<v0.21.0).
|
||||
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct Settings {
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub ranking_rules: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub distinct_attribute: Option<Option<String>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attributes_for_faceting: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
fn load_index(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_uuid: Uuid,
|
||||
_primary_key: Option<&str>,
|
||||
_size: usize,
|
||||
_indexer_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
todo!("fix dump obkv documents")
|
||||
//let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid));
|
||||
|
||||
//create_dir_all(&index_path)?;
|
||||
//let mut options = EnvOpenOptions::new();
|
||||
//options.map_size(size);
|
||||
//let index = milli::Index::new(options, index_path)?;
|
||||
//let index = Index(Arc::new(index));
|
||||
|
||||
//// extract `settings.json` file and import content
|
||||
//let settings = import_settings(&src)?;
|
||||
//let settings: index_controller::Settings<Unchecked> = settings.into();
|
||||
|
||||
//let mut txn = index.write_txn()?;
|
||||
|
||||
//let handler = UpdateHandler::new(indexer_options)?;
|
||||
|
||||
//index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?;
|
||||
|
||||
//let file = File::open(&src.as_ref().join("documents.jsonl"))?;
|
||||
//let mut reader = std::io::BufReader::new(file);
|
||||
//reader.fill_buf()?;
|
||||
//if !reader.buffer().is_empty() {
|
||||
//index.update_documents_txn(
|
||||
//&mut txn,
|
||||
//IndexDocumentsMethod::ReplaceDocuments,
|
||||
//Some(reader),
|
||||
//handler.update_builder(0),
|
||||
//primary_key,
|
||||
//)?;
|
||||
//}
|
||||
|
||||
//txn.commit()?;
|
||||
|
||||
//// Finaly, we extract the original milli::Index and close it
|
||||
//Arc::try_unwrap(index.0)
|
||||
//.map_err(|_e| "Couldn't close the index properly")
|
||||
//.unwrap()
|
||||
//.prepare_for_closing()
|
||||
//.wait();
|
||||
|
||||
//// Updates are ignored in dumps V1.
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
|
||||
/// we need to **always** be able to convert the old settings to the settings currently being used
|
||||
impl From<Settings> for index_controller::Settings<Unchecked> {
|
||||
fn from(settings: Settings) -> Self {
|
||||
Self {
|
||||
distinct_attribute: match settings.distinct_attribute {
|
||||
Some(Some(attr)) => Setting::Set(attr),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
displayed_attributes: match settings.displayed_attributes {
|
||||
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
searchable_attributes: match settings.searchable_attributes {
|
||||
Some(Some(attrs)) => Setting::Set(attrs),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
filterable_attributes: match settings.attributes_for_faceting {
|
||||
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: match settings.ranking_rules {
|
||||
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| {
|
||||
match criterion.as_str() {
|
||||
"words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion),
|
||||
s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
|
||||
s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
|
||||
"wordsPosition" => {
|
||||
warn!("The criteria `attribute` and `wordsPosition` have been merged \
|
||||
into a single criterion `attribute` so `wordsPositon` will be \
|
||||
ignored");
|
||||
None
|
||||
}
|
||||
s => {
|
||||
error!("Unknown criterion found in the dump: `{}`, it will be ignored", s);
|
||||
None
|
||||
}
|
||||
}
|
||||
}).collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
stop_words: match settings.stop_words {
|
||||
Some(Some(stop_words)) => Setting::Set(stop_words.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
// we need to convert the old `Vec<String>` into a `BTreeMap<String>`
|
||||
synonyms: match settings.synonyms {
|
||||
Some(Some(synonyms)) => Setting::Set(synonyms.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// /// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||
//fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> {
|
||||
//let path = dir_path.as_ref().join("settings.json");
|
||||
//let file = File::open(path)?;
|
||||
//let reader = std::io::BufReader::new(file);
|
||||
//let metadata = serde_json::from_reader(reader)?;
|
||||
|
||||
//Ok(metadata)
|
||||
//}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn settings_format_regression() {
|
||||
let settings = Settings::default();
|
||||
assert_eq!(
|
||||
r##"{"rankingRules":null,"distinctAttribute":null,"searchableAttributes":null,"displayedAttributes":null,"stopWords":null,"synonyms":null,"attributesForFaceting":null}"##,
|
||||
serde_json::to_string(&settings).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
use std::path::Path;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore};
|
||||
use crate::option::IndexerOpts;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MetadataV2 {
|
||||
db_version: String,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
dump_date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl MetadataV2 {
|
||||
pub fn new(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
Self {
|
||||
db_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
dump_date: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"Loading dump from {}, dump database version: {}, dump version: V2",
|
||||
self.dump_date, self.db_version
|
||||
);
|
||||
|
||||
info!("Loading index database.");
|
||||
HeedUuidStore::load_dump(src.as_ref(), &dst)?;
|
||||
|
||||
info!("Loading updates.");
|
||||
UpdateStore::load_dump(&src, &dst, update_db_size)?;
|
||||
|
||||
info!("Loading indexes.");
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
let indexes = indexes_path.read_dir()?;
|
||||
for index in indexes {
|
||||
let index = index?;
|
||||
Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
use tokio::sync::oneshot;
|
||||
|
||||
use super::error::Result;
|
||||
use super::DumpInfo;
|
||||
|
||||
pub enum DumpMsg {
|
||||
CreateDump {
|
||||
ret: oneshot::Sender<Result<DumpInfo>>,
|
||||
},
|
||||
DumpInfo {
|
||||
uid: String,
|
||||
ret: oneshot::Sender<Result<DumpInfo>>,
|
||||
},
|
||||
}
|
|
@ -1,203 +0,0 @@
|
|||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::Context;
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::{info, trace, warn};
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::fs::create_dir_all;
|
||||
|
||||
use loaders::v1::MetadataV1;
|
||||
use loaders::v2::MetadataV2;
|
||||
|
||||
pub use actor::DumpActor;
|
||||
pub use handle_impl::*;
|
||||
pub use message::DumpMsg;
|
||||
|
||||
use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle};
|
||||
use crate::index_controller::dump_actor::error::DumpActorError;
|
||||
use crate::{helpers::compression, option::IndexerOpts};
|
||||
use error::Result;
|
||||
|
||||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod loaders;
|
||||
mod message;
|
||||
|
||||
const META_FILE_NAME: &str = "metadata.json";
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, automock)]
|
||||
pub trait DumpActorHandle {
|
||||
/// Start the creation of a dump
|
||||
/// Implementation: [handle_impl::DumpActorHandleImpl::create_dump]
|
||||
async fn create_dump(&self) -> Result<DumpInfo>;
|
||||
|
||||
/// Return the status of an already created dump
|
||||
/// Implementation: [handle_impl::DumpActorHandleImpl::dump_info]
|
||||
async fn dump_info(&self, uid: String) -> Result<DumpInfo>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "dumpVersion")]
|
||||
pub enum Metadata {
|
||||
V1(MetadataV1),
|
||||
V2(MetadataV2),
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
pub fn new_v2(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
let meta = MetadataV2::new(index_db_size, update_db_size);
|
||||
Self::V2(meta)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DumpStatus {
|
||||
Done,
|
||||
InProgress,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DumpInfo {
|
||||
pub uid: String,
|
||||
pub status: DumpStatus,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
started_at: DateTime<Utc>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
finished_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
impl DumpInfo {
|
||||
pub fn new(uid: String, status: DumpStatus) -> Self {
|
||||
Self {
|
||||
uid,
|
||||
status,
|
||||
error: None,
|
||||
started_at: Utc::now(),
|
||||
finished_at: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_error(&mut self, error: String) {
|
||||
self.status = DumpStatus::Failed;
|
||||
self.finished_at = Some(Utc::now());
|
||||
self.error = Some(error);
|
||||
}
|
||||
|
||||
pub fn done(&mut self) {
|
||||
self.finished_at = Some(Utc::now());
|
||||
self.status = DumpStatus::Done;
|
||||
}
|
||||
|
||||
pub fn dump_already_in_progress(&self) -> bool {
|
||||
self.status == DumpStatus::InProgress
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
dst_path: impl AsRef<Path>,
|
||||
src_path: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
let tmp_src = tempfile::tempdir_in(".")?;
|
||||
let tmp_src_path = tmp_src.path();
|
||||
|
||||
compression::from_tar_gz(&src_path, tmp_src_path)?;
|
||||
|
||||
let meta_path = tmp_src_path.join(META_FILE_NAME);
|
||||
let mut meta_file = File::open(&meta_path)?;
|
||||
let meta: Metadata = serde_json::from_reader(&mut meta_file)?;
|
||||
|
||||
let dst_dir = dst_path
|
||||
.as_ref()
|
||||
.parent()
|
||||
.with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?;
|
||||
|
||||
let tmp_dst = tempfile::tempdir_in(dst_dir)?;
|
||||
|
||||
match meta {
|
||||
Metadata::V1(meta) => {
|
||||
meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)?
|
||||
}
|
||||
Metadata::V2(meta) => meta.load_dump(
|
||||
&tmp_src_path,
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?,
|
||||
}
|
||||
// Persist and atomically rename the db
|
||||
let persisted_dump = tmp_dst.into_path();
|
||||
if dst_path.as_ref().exists() {
|
||||
warn!("Overwriting database at {}", dst_path.as_ref().display());
|
||||
std::fs::remove_dir_all(&dst_path)?;
|
||||
}
|
||||
|
||||
std::fs::rename(&persisted_dump, &dst_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct DumpTask<U, P> {
|
||||
path: PathBuf,
|
||||
uuid_resolver: U,
|
||||
update_handle: P,
|
||||
uid: String,
|
||||
update_db_size: usize,
|
||||
index_db_size: usize,
|
||||
}
|
||||
|
||||
impl<U, P> DumpTask<U, P>
|
||||
where
|
||||
U: UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||
P: UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||
{
|
||||
async fn run(self) -> Result<()> {
|
||||
trace!("Performing dump.");
|
||||
|
||||
create_dir_all(&self.path).await?;
|
||||
|
||||
let path_clone = self.path.clone();
|
||||
let temp_dump_dir =
|
||||
tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??;
|
||||
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||
|
||||
let meta = Metadata::new_v2(self.index_db_size, self.update_db_size);
|
||||
let meta_path = temp_dump_path.join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_path)?;
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
|
||||
let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?;
|
||||
|
||||
self.update_handle
|
||||
.dump(uuids, temp_dump_path.clone())
|
||||
.await?;
|
||||
|
||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
|
||||
compression::to_tar_gz(temp_dump_path, temp_dump_file.path())
|
||||
.map_err(|e| DumpActorError::Internal(e.into()))?;
|
||||
|
||||
let dump_path = self.path.join(self.uid).with_extension("dump");
|
||||
temp_dump_file.persist(&dump_path)?;
|
||||
|
||||
Ok(dump_path)
|
||||
})
|
||||
.await??;
|
||||
|
||||
info!("Created dump in {:?}.", dump_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
use meilisearch_error::Code;
|
||||
use meilisearch_error::ErrorCode;
|
||||
|
||||
use crate::index::error::IndexError;
|
||||
|
||||
use super::dump_actor::error::DumpActorError;
|
||||
use super::index_actor::error::IndexActorError;
|
||||
use super::update_actor::error::UpdateActorError;
|
||||
use super::uuid_resolver::error::UuidResolverError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexControllerError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexControllerError {
|
||||
#[error("Index creation must have an uid")]
|
||||
MissingUid,
|
||||
#[error("{0}")]
|
||||
Uuid(#[from] UuidResolverError),
|
||||
#[error("{0}")]
|
||||
IndexActor(#[from] IndexActorError),
|
||||
#[error("{0}")]
|
||||
UpdateActor(#[from] UpdateActorError),
|
||||
#[error("{0}")]
|
||||
DumpActor(#[from] DumpActorError),
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
}
|
||||
|
||||
impl ErrorCode for IndexControllerError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexControllerError::MissingUid => Code::BadRequest,
|
||||
IndexControllerError::Uuid(e) => e.error_code(),
|
||||
IndexControllerError::IndexActor(e) => e.error_code(),
|
||||
IndexControllerError::UpdateActor(e) => e.error_code(),
|
||||
IndexControllerError::DumpActor(e) => e.error_code(),
|
||||
IndexControllerError::IndexError(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,349 +0,0 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
use futures::stream::StreamExt;
|
||||
use heed::CompactionOption;
|
||||
use log::debug;
|
||||
use milli::update::UpdateBuilder;
|
||||
use tokio::task::spawn_blocking;
|
||||
use tokio::{fs, sync::mpsc};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{
|
||||
update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings,
|
||||
};
|
||||
use crate::index_controller::{
|
||||
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
|
||||
};
|
||||
use crate::option::IndexerOpts;
|
||||
|
||||
use super::error::{IndexActorError, Result};
|
||||
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
|
||||
|
||||
pub const CONCURRENT_INDEX_MSG: usize = 10;
|
||||
|
||||
pub struct IndexActor<S> {
|
||||
receiver: Option<mpsc::Receiver<IndexMsg>>,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
store: S,
|
||||
}
|
||||
|
||||
impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
pub fn new(
|
||||
receiver: mpsc::Receiver<IndexMsg>,
|
||||
store: S,
|
||||
options: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let update_handler = UpdateHandler::new(options)?;
|
||||
let update_handler = Arc::new(update_handler);
|
||||
let receiver = Some(receiver);
|
||||
|
||||
Ok(Self {
|
||||
receiver,
|
||||
update_handler,
|
||||
store,
|
||||
})
|
||||
}
|
||||
|
||||
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
|
||||
/// through the read channel are processed concurrently, the messages sent through the write
|
||||
/// channel are processed one at a time.
|
||||
pub async fn run(mut self) {
|
||||
let mut receiver = self
|
||||
.receiver
|
||||
.take()
|
||||
.expect("Index Actor must have a inbox at this point.");
|
||||
|
||||
let stream = stream! {
|
||||
loop {
|
||||
match receiver.recv().await {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stream
|
||||
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_message(&self, msg: IndexMsg) {
|
||||
use IndexMsg::*;
|
||||
match msg {
|
||||
CreateIndex {
|
||||
uuid,
|
||||
primary_key,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
|
||||
}
|
||||
Update {
|
||||
ret,
|
||||
meta,
|
||||
uuid,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_update(uuid, meta).await);
|
||||
}
|
||||
Search { ret, query, uuid } => {
|
||||
let _ = ret.send(self.handle_search(uuid, query).await);
|
||||
}
|
||||
Settings { ret, uuid } => {
|
||||
let _ = ret.send(self.handle_settings(uuid).await);
|
||||
}
|
||||
Documents {
|
||||
ret,
|
||||
uuid,
|
||||
attributes_to_retrieve,
|
||||
offset,
|
||||
limit,
|
||||
} => {
|
||||
let _ = ret.send(
|
||||
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
|
||||
.await,
|
||||
);
|
||||
}
|
||||
Document {
|
||||
uuid,
|
||||
attributes_to_retrieve,
|
||||
doc_id,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(
|
||||
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
|
||||
.await,
|
||||
);
|
||||
}
|
||||
Delete { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_delete(uuid).await);
|
||||
}
|
||||
GetMeta { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_get_meta(uuid).await);
|
||||
}
|
||||
UpdateIndex {
|
||||
uuid,
|
||||
index_settings,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_update_index(uuid, index_settings).await);
|
||||
}
|
||||
Snapshot { uuid, path, ret } => {
|
||||
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
||||
}
|
||||
Dump { uuid, path, ret } => {
|
||||
let _ = ret.send(self.handle_dump(uuid, path).await);
|
||||
}
|
||||
GetStats { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_get_stats(uuid).await);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
let result = spawn_blocking(move || index.perform_search(query)).await??;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_create_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
) -> Result<IndexMeta> {
|
||||
let index = self.store.create(uuid, primary_key).await?;
|
||||
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
async fn handle_update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
) -> Result<std::result::Result<Processed, Failed>> {
|
||||
debug!("Processing update {}", meta.id());
|
||||
let update_handler = self.update_handler.clone();
|
||||
let index = match self.store.get(uuid).await? {
|
||||
Some(index) => index,
|
||||
None => self.store.create(uuid, None).await?,
|
||||
};
|
||||
|
||||
Ok(spawn_blocking(move || update_handler.handle_update(index, meta)).await?)
|
||||
}
|
||||
|
||||
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
let result = spawn_blocking(move || index.settings()).await??;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_fetch_documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
let result =
|
||||
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
|
||||
.await??;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_fetch_document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
let result =
|
||||
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
|
||||
.await??;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let index = self.store.delete(uuid).await?;
|
||||
|
||||
if let Some(index) = index {
|
||||
tokio::task::spawn(async move {
|
||||
let index = index.inner;
|
||||
let store = get_arc_ownership_blocking(index).await;
|
||||
spawn_blocking(move || {
|
||||
store.prepare_for_closing().wait();
|
||||
debug!("Index closed");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
|
||||
match self.store.get(uuid).await? {
|
||||
Some(index) => {
|
||||
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
|
||||
Ok(meta)
|
||||
}
|
||||
None => Err(IndexActorError::UnexistingIndex),
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_update_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
index_settings: IndexSettings,
|
||||
) -> Result<IndexMeta> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
let result = spawn_blocking(move || match index_settings.primary_key {
|
||||
Some(primary_key) => {
|
||||
let mut txn = index.write_txn()?;
|
||||
if index.primary_key(&txn)?.is_some() {
|
||||
return Err(IndexActorError::ExistingPrimaryKey);
|
||||
}
|
||||
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_, _| ())?;
|
||||
let meta = IndexMeta::new_txn(&index, &txn)?;
|
||||
txn.commit()?;
|
||||
Ok(meta)
|
||||
}
|
||||
None => {
|
||||
let meta = IndexMeta::new(&index)?;
|
||||
Ok(meta)
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
|
||||
use tokio::fs::create_dir_all;
|
||||
|
||||
path.push("indexes");
|
||||
create_dir_all(&path).await?;
|
||||
|
||||
if let Some(index) = self.store.get(uuid).await? {
|
||||
let mut index_path = path.join(format!("index-{}", uuid));
|
||||
|
||||
create_dir_all(&index_path).await?;
|
||||
|
||||
index_path.push("data.mdb");
|
||||
spawn_blocking(move || -> Result<()> {
|
||||
// Get write txn to wait for ongoing write transaction before snapshot.
|
||||
let _txn = index.write_txn()?;
|
||||
index
|
||||
.env
|
||||
.copy_to_path(index_path, CompactionOption::Enabled)?;
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the
|
||||
/// documents and all the settings.
|
||||
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
let path = path.join(format!("indexes/index-{}/", uuid));
|
||||
fs::create_dir_all(&path).await?;
|
||||
|
||||
tokio::task::spawn_blocking(move || index.dump(path)).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
spawn_blocking(move || {
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
Ok(IndexStats {
|
||||
size: index.size(),
|
||||
number_of_documents: index.number_of_documents(&rtxn)?,
|
||||
is_indexing: None,
|
||||
field_distribution: index.field_distribution(&rtxn)?,
|
||||
})
|
||||
})
|
||||
.await?
|
||||
}
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
use crate::{error::MilliError, index::error::IndexError};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexActorError>;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum IndexActorError {
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
#[error("Index already exists")]
|
||||
IndexAlreadyExists,
|
||||
#[error("Index not found")]
|
||||
UnexistingIndex,
|
||||
#[error("A primary key is already present. It's impossible to update it")]
|
||||
ExistingPrimaryKey,
|
||||
#[error("Internal Error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
}
|
||||
|
||||
macro_rules! internal_error {
|
||||
($($other:path), *) => {
|
||||
$(
|
||||
impl From<$other> for IndexActorError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(heed::Error, tokio::task::JoinError, std::io::Error);
|
||||
|
||||
impl ErrorCode for IndexActorError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexActorError::IndexError(e) => e.error_code(),
|
||||
IndexActorError::IndexAlreadyExists => Code::IndexAlreadyExists,
|
||||
IndexActorError::UnexistingIndex => Code::IndexNotFound,
|
||||
IndexActorError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
|
||||
IndexActorError::Internal(_) => Code::Internal,
|
||||
IndexActorError::Milli(e) => MilliError(e).error_code(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,162 +0,0 @@
|
|||
use crate::option::IndexerOpts;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
index::Checked,
|
||||
index_controller::{IndexSettings, IndexStats, Processing},
|
||||
};
|
||||
use crate::{
|
||||
index::{Document, SearchQuery, SearchResult, Settings},
|
||||
index_controller::{Failed, Processed},
|
||||
};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IndexActorHandleImpl {
|
||||
sender: mpsc::Sender<IndexMsg>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexActorHandle for IndexActorHandleImpl {
|
||||
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::CreateIndex {
|
||||
ret,
|
||||
uuid,
|
||||
primary_key,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
) -> Result<std::result::Result<Processed, Failed>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Update {
|
||||
ret,
|
||||
meta,
|
||||
uuid,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Search { uuid, query, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Settings { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Documents {
|
||||
uuid,
|
||||
ret,
|
||||
offset,
|
||||
attributes_to_retrieve,
|
||||
limit,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Document {
|
||||
uuid,
|
||||
ret,
|
||||
doc_id,
|
||||
attributes_to_retrieve,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Delete { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::GetMeta { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::UpdateIndex {
|
||||
uuid,
|
||||
index_settings,
|
||||
ret,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Snapshot { uuid, path, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Dump { uuid, path, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::GetStats { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexActorHandleImpl {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
options: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
|
||||
let store = MapIndexStore::new(&path, index_size);
|
||||
let actor = IndexActor::new(receiver, store, options)?;
|
||||
tokio::task::spawn(actor.run());
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use tokio::sync::oneshot;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result as IndexResult;
|
||||
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
||||
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
|
||||
|
||||
use super::{IndexMeta, IndexSettings};
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum IndexMsg {
|
||||
CreateIndex {
|
||||
uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
ret: oneshot::Sender<IndexResult<IndexMeta>>,
|
||||
},
|
||||
Update {
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>,
|
||||
},
|
||||
Search {
|
||||
uuid: Uuid,
|
||||
query: SearchQuery,
|
||||
ret: oneshot::Sender<IndexResult<SearchResult>>,
|
||||
},
|
||||
Settings {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<Settings<Checked>>>,
|
||||
},
|
||||
Documents {
|
||||
uuid: Uuid,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
ret: oneshot::Sender<IndexResult<Vec<Document>>>,
|
||||
},
|
||||
Document {
|
||||
uuid: Uuid,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
doc_id: String,
|
||||
ret: oneshot::Sender<IndexResult<Document>>,
|
||||
},
|
||||
Delete {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<()>>,
|
||||
},
|
||||
GetMeta {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<IndexMeta>>,
|
||||
},
|
||||
UpdateIndex {
|
||||
uuid: Uuid,
|
||||
index_settings: IndexSettings,
|
||||
ret: oneshot::Sender<IndexResult<IndexMeta>>,
|
||||
},
|
||||
Snapshot {
|
||||
uuid: Uuid,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<IndexResult<()>>,
|
||||
},
|
||||
Dump {
|
||||
uuid: Uuid,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<IndexResult<()>>,
|
||||
},
|
||||
GetStats {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<IndexStats>>,
|
||||
},
|
||||
}
|
|
@ -1,167 +0,0 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use actor::IndexActor;
|
||||
pub use actor::CONCURRENT_INDEX_MSG;
|
||||
pub use handle_impl::IndexActorHandleImpl;
|
||||
use message::IndexMsg;
|
||||
use store::{IndexStore, MapIndexStore};
|
||||
|
||||
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
|
||||
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
|
||||
use error::Result;
|
||||
|
||||
use super::IndexSettings;
|
||||
|
||||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod message;
|
||||
mod store;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMeta {
|
||||
created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
impl IndexMeta {
|
||||
fn new(index: &Index) -> Result<Self> {
|
||||
let txn = index.read_txn()?;
|
||||
Self::new_txn(index, &txn)
|
||||
}
|
||||
|
||||
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
|
||||
let created_at = index.created_at(txn)?;
|
||||
let updated_at = index.updated_at(txn)?;
|
||||
let primary_key = index.primary_key(txn)?.map(String::from);
|
||||
Ok(Self {
|
||||
created_at,
|
||||
updated_at,
|
||||
primary_key,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, automock)]
|
||||
pub trait IndexActorHandle {
|
||||
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>;
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
) -> Result<std::result::Result<Processed, Failed>>;
|
||||
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>;
|
||||
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>>;
|
||||
|
||||
async fn documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>>;
|
||||
async fn document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
||||
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>;
|
||||
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>;
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
|
||||
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
|
||||
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
|
||||
impl IndexActorHandle for Arc<MockIndexActorHandle> {
|
||||
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
|
||||
self.as_ref().create_index(uuid, primary_key).await
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
data: Option<std::fs::File>,
|
||||
) -> Result<std::result::Result<Processed, Failed>> {
|
||||
self.as_ref().update(uuid, meta, data).await
|
||||
}
|
||||
|
||||
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
|
||||
self.as_ref().search(uuid, query).await
|
||||
}
|
||||
|
||||
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
|
||||
self.as_ref().settings(uuid).await
|
||||
}
|
||||
|
||||
async fn documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
self.as_ref()
|
||||
.documents(uuid, offset, limit, attributes_to_retrieve)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
self.as_ref()
|
||||
.document(uuid, doc_id, attributes_to_retrieve)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
self.as_ref().delete(uuid).await
|
||||
}
|
||||
|
||||
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
|
||||
self.as_ref().get_index_meta(uuid).await
|
||||
}
|
||||
|
||||
async fn update_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
index_settings: IndexSettings,
|
||||
) -> Result<IndexMeta> {
|
||||
self.as_ref().update_index(uuid, index_settings).await
|
||||
}
|
||||
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
self.as_ref().snapshot(uuid, path).await
|
||||
}
|
||||
|
||||
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
self.as_ref().dump(uuid, path).await
|
||||
}
|
||||
|
||||
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||
self.as_ref().get_index_stats(uuid).await
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::UpdateBuilder;
|
||||
use tokio::fs;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexActorError, Result};
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
|
||||
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait IndexStore {
|
||||
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
|
||||
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
}
|
||||
|
||||
pub struct MapIndexStore {
|
||||
index_store: AsyncMap<Uuid, Index>,
|
||||
path: PathBuf,
|
||||
index_size: usize,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
}
|
||||
|
||||
impl MapIndexStore {
|
||||
pub fn new(path: impl AsRef<Path>, index_size: usize) -> Self {
|
||||
let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap());
|
||||
let path = path.as_ref().join("indexes/");
|
||||
let index_store = Arc::new(RwLock::new(HashMap::new()));
|
||||
Self {
|
||||
index_store,
|
||||
path,
|
||||
index_size,
|
||||
update_file_store,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexStore for MapIndexStore {
|
||||
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
|
||||
// We need to keep the lock until we are sure the db file has been opened correclty, to
|
||||
// ensure that another db is not created at the same time.
|
||||
let mut lock = self.index_store.write().await;
|
||||
|
||||
if let Some(index) = lock.get(&uuid) {
|
||||
return Ok(index.clone());
|
||||
}
|
||||
let path = self.path.join(format!("index-{}", uuid));
|
||||
if path.exists() {
|
||||
return Err(IndexActorError::IndexAlreadyExists);
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let file_store = self.update_file_store.clone();
|
||||
let index = spawn_blocking(move || -> Result<Index> {
|
||||
let index = Index::open(path, index_size, file_store)?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
let mut txn = index.write_txn()?;
|
||||
|
||||
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_, _| ())?;
|
||||
|
||||
txn.commit()?;
|
||||
}
|
||||
Ok(index)
|
||||
})
|
||||
.await??;
|
||||
|
||||
lock.insert(uuid, index.clone());
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
|
||||
let guard = self.index_store.read().await;
|
||||
match guard.get(&uuid) {
|
||||
Some(index) => Ok(Some(index.clone())),
|
||||
None => {
|
||||
// drop the guard here so we can perform the write after without deadlocking;
|
||||
drop(guard);
|
||||
let path = self.path.join(format!("index-{}", uuid));
|
||||
if !path.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let file_store = self.update_file_store.clone();
|
||||
let index = spawn_blocking(move || Index::open(path, index_size, file_store)).await??;
|
||||
self.index_store.write().await.insert(uuid, index.clone());
|
||||
Ok(Some(index))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
|
||||
let db_path = self.path.join(format!("index-{}", uuid));
|
||||
fs::remove_dir_all(db_path).await?;
|
||||
let index = self.index_store.write().await.remove(&uuid);
|
||||
Ok(index)
|
||||
}
|
||||
}
|
|
@ -1,491 +0,0 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_web::error::PayloadError;
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::Stream;
|
||||
use log::info;
|
||||
use milli::FieldDistribution;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::sleep;
|
||||
use uuid::Uuid;
|
||||
|
||||
use dump_actor::DumpActorHandle;
|
||||
pub use dump_actor::{DumpInfo, DumpStatus};
|
||||
use index_actor::IndexActorHandle;
|
||||
use snapshot::load_snapshot;
|
||||
use update_actor::UpdateActorHandle;
|
||||
pub use updates::*;
|
||||
use uuid_resolver::{error::UuidResolverError, UuidResolverHandle};
|
||||
|
||||
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
||||
use crate::option::Opt;
|
||||
use error::Result;
|
||||
|
||||
use self::dump_actor::load_dump;
|
||||
|
||||
mod dump_actor;
|
||||
pub mod error;
|
||||
pub mod index_actor;
|
||||
mod snapshot;
|
||||
pub mod update_actor;
|
||||
mod updates;
|
||||
mod uuid_resolver;
|
||||
pub mod update_file_store;
|
||||
|
||||
pub type Payload = Box<dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin>;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMetadata {
|
||||
#[serde(skip)]
|
||||
pub uuid: Uuid,
|
||||
pub uid: String,
|
||||
name: String,
|
||||
#[serde(flatten)]
|
||||
pub meta: index_actor::IndexMeta,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IndexSettings {
|
||||
pub uid: Option<String>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexStats {
|
||||
#[serde(skip)]
|
||||
pub size: u64,
|
||||
pub number_of_documents: u64,
|
||||
/// Whether the current index is performing an update. It is initially `None` when the
|
||||
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
|
||||
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
|
||||
pub is_indexing: Option<bool>,
|
||||
pub field_distribution: FieldDistribution,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IndexController {
|
||||
uuid_resolver: uuid_resolver::UuidResolverHandleImpl,
|
||||
index_handle: index_actor::IndexActorHandleImpl,
|
||||
update_handle: update_actor::UpdateActorHandleImpl,
|
||||
dump_handle: dump_actor::DumpActorHandleImpl,
|
||||
}
|
||||
|
||||
pub enum DocumentAdditionFormat {
|
||||
Json,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Stats {
|
||||
pub database_size: u64,
|
||||
pub last_update: Option<DateTime<Utc>>,
|
||||
pub indexes: BTreeMap<String, IndexStats>,
|
||||
}
|
||||
|
||||
pub enum Update {
|
||||
DocumentAddition {
|
||||
payload: Payload,
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
format: DocumentAdditionFormat,
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexController {
|
||||
pub fn new(path: impl AsRef<Path>, options: &Opt) -> anyhow::Result<Self> {
|
||||
let index_size = options.max_index_size.get_bytes() as usize;
|
||||
let update_store_size = options.max_index_size.get_bytes() as usize;
|
||||
|
||||
if let Some(ref path) = options.import_snapshot {
|
||||
info!("Loading from snapshot {:?}", path);
|
||||
load_snapshot(
|
||||
&options.db_path,
|
||||
path,
|
||||
options.ignore_snapshot_if_db_exists,
|
||||
options.ignore_missing_snapshot,
|
||||
)?;
|
||||
} else if let Some(ref src_path) = options.import_dump {
|
||||
load_dump(
|
||||
&options.db_path,
|
||||
src_path,
|
||||
options.max_index_size.get_bytes() as usize,
|
||||
options.max_udb_size.get_bytes() as usize,
|
||||
&options.indexer_options,
|
||||
)?;
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?;
|
||||
let index_handle =
|
||||
index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?;
|
||||
let update_handle = update_actor::UpdateActorHandleImpl::new(
|
||||
index_handle.clone(),
|
||||
&path,
|
||||
update_store_size,
|
||||
)?;
|
||||
let dump_handle = dump_actor::DumpActorHandleImpl::new(
|
||||
&options.dumps_dir,
|
||||
uuid_resolver.clone(),
|
||||
update_handle.clone(),
|
||||
options.max_index_size.get_bytes() as usize,
|
||||
options.max_udb_size.get_bytes() as usize,
|
||||
)?;
|
||||
|
||||
//if options.schedule_snapshot {
|
||||
//let snapshot_service = SnapshotService::new(
|
||||
//uuid_resolver.clone(),
|
||||
//update_handle.clone(),
|
||||
//Duration::from_secs(options.snapshot_interval_sec),
|
||||
//options.snapshot_dir.clone(),
|
||||
//options
|
||||
//.db_path
|
||||
//.file_name()
|
||||
//.map(|n| n.to_owned().into_string().expect("invalid path"))
|
||||
//.unwrap_or_else(|| String::from("data.ms")),
|
||||
//);
|
||||
|
||||
//tokio::task::spawn(snapshot_service.run());
|
||||
//}
|
||||
|
||||
Ok(Self {
|
||||
uuid_resolver,
|
||||
index_handle,
|
||||
update_handle,
|
||||
dump_handle,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn register_update(&self, uid: &str, update: Update) -> Result<UpdateStatus> {
|
||||
match self.uuid_resolver.get(uid.to_string()).await {
|
||||
Ok(uuid) => {
|
||||
let update_result = self.update_handle.update(uuid, update).await?;
|
||||
Ok(update_result)
|
||||
},
|
||||
Err(UuidResolverError::UnexistingIndex(name)) => {
|
||||
let uuid = Uuid::new_v4();
|
||||
let update_result = self.update_handle.update(uuid, update).await?;
|
||||
// ignore if index creation fails now, since it may already have been created
|
||||
let _ = self.index_handle.create_index(uuid, None).await;
|
||||
self.uuid_resolver.insert(name, uuid).await?;
|
||||
Ok(update_result)
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
//pub async fn add_documents(
|
||||
//&self,
|
||||
//uid: String,
|
||||
//method: milli::update::IndexDocumentsMethod,
|
||||
//payload: Payload,
|
||||
//primary_key: Option<String>,
|
||||
//) -> Result<UpdateStatus> {
|
||||
//let perform_update = |uuid| async move {
|
||||
//let meta = UpdateMeta::DocumentsAddition {
|
||||
//method,
|
||||
//primary_key,
|
||||
//};
|
||||
//let (sender, receiver) = mpsc::channel(10);
|
||||
|
||||
//// It is necessary to spawn a local task to send the payload to the update handle to
|
||||
//// prevent dead_locking between the update_handle::update that waits for the update to be
|
||||
//// registered and the update_actor that waits for the the payload to be sent to it.
|
||||
//tokio::task::spawn_local(async move {
|
||||
//payload
|
||||
//.for_each(|r| async {
|
||||
//let _ = sender.send(r).await;
|
||||
//})
|
||||
//.await
|
||||
//});
|
||||
|
||||
//// This must be done *AFTER* spawning the task.
|
||||
//self.update_handle.update(meta, receiver, uuid).await
|
||||
//};
|
||||
|
||||
//match self.uuid_resolver.get(uid).await {
|
||||
//Ok(uuid) => Ok(perform_update(uuid).await?),
|
||||
//Err(UuidResolverError::UnexistingIndex(name)) => {
|
||||
//let uuid = Uuid::new_v4();
|
||||
//let status = perform_update(uuid).await?;
|
||||
//// ignore if index creation fails now, since it may already have been created
|
||||
//let _ = self.index_handle.create_index(uuid, None).await;
|
||||
//self.uuid_resolver.insert(name, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
//Err(e) => Err(e.into()),
|
||||
//}
|
||||
//}
|
||||
|
||||
//pub async fn clear_documents(&self, uid: String) -> Result<UpdateStatus> {
|
||||
//let uuid = self.uuid_resolver.get(uid).await?;
|
||||
//let meta = UpdateMeta::ClearDocuments;
|
||||
//let (_, receiver) = mpsc::channel(1);
|
||||
//let status = self.update_handle.update(meta, receiver, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
|
||||
//pub async fn delete_documents(
|
||||
//&self,
|
||||
//uid: String,
|
||||
//documents: Vec<String>,
|
||||
//) -> Result<UpdateStatus> {
|
||||
//let uuid = self.uuid_resolver.get(uid).await?;
|
||||
//let meta = UpdateMeta::DeleteDocuments { ids: documents };
|
||||
//let (_, receiver) = mpsc::channel(1);
|
||||
//let status = self.update_handle.update(meta, receiver, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
|
||||
//pub async fn update_settings(
|
||||
//&self,
|
||||
//uid: String,
|
||||
//settings: Settings<Checked>,
|
||||
//create: bool,
|
||||
//) -> Result<UpdateStatus> {
|
||||
//let perform_udpate = |uuid| async move {
|
||||
//let meta = UpdateMeta::Settings(settings.into_unchecked());
|
||||
//// Nothing so send, drop the sender right away, as not to block the update actor.
|
||||
//let (_, receiver) = mpsc::channel(1);
|
||||
//self.update_handle.update(meta, receiver, uuid).await
|
||||
//};
|
||||
|
||||
//match self.uuid_resolver.get(uid).await {
|
||||
//Ok(uuid) => Ok(perform_udpate(uuid).await?),
|
||||
//Err(UuidResolverError::UnexistingIndex(name)) if create => {
|
||||
//let uuid = Uuid::new_v4();
|
||||
//let status = perform_udpate(uuid).await?;
|
||||
//// ignore if index creation fails now, since it may already have been created
|
||||
//let _ = self.index_handle.create_index(uuid, None).await;
|
||||
//self.uuid_resolver.insert(name, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
//Err(e) => Err(e.into()),
|
||||
//}
|
||||
//}
|
||||
|
||||
//pub async fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata> {
|
||||
//let IndexSettings { uid, primary_key } = index_settings;
|
||||
//let uid = uid.ok_or(IndexControllerError::MissingUid)?;
|
||||
//let uuid = Uuid::new_v4();
|
||||
//let meta = self.index_handle.create_index(uuid, primary_key).await?;
|
||||
//self.uuid_resolver.insert(uid.clone(), uuid).await?;
|
||||
//let meta = IndexMetadata {
|
||||
//uuid,
|
||||
//name: uid.clone(),
|
||||
//uid,
|
||||
//meta,
|
||||
//};
|
||||
|
||||
//Ok(meta)
|
||||
//}
|
||||
|
||||
//pub async fn delete_index(&self, uid: String) -> Result<()> {
|
||||
//let uuid = self.uuid_resolver.delete(uid).await?;
|
||||
|
||||
//// We remove the index from the resolver synchronously, and effectively perform the index
|
||||
//// deletion as a background task.
|
||||
//let update_handle = self.update_handle.clone();
|
||||
//let index_handle = self.index_handle.clone();
|
||||
//tokio::spawn(async move {
|
||||
//if let Err(e) = update_handle.delete(uuid).await {
|
||||
//error!("Error while deleting index: {}", e);
|
||||
//}
|
||||
//if let Err(e) = index_handle.delete(uuid).await {
|
||||
//error!("Error while deleting index: {}", e);
|
||||
//}
|
||||
//});
|
||||
|
||||
//Ok(())
|
||||
//}
|
||||
|
||||
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let result = self.update_handle.update_status(uuid, id).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let result = self.update_handle.get_all_updates_status(uuid).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
|
||||
let uuids = self.uuid_resolver.list().await?;
|
||||
|
||||
let mut ret = Vec::new();
|
||||
|
||||
for (uid, uuid) in uuids {
|
||||
let meta = self.index_handle.get_index_meta(uuid).await?;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
ret.push(meta);
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let settings = self.index_handle.settings(uuid).await?;
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
pub async fn documents(
|
||||
&self,
|
||||
uid: String,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let documents = self
|
||||
.index_handle
|
||||
.documents(uuid, offset, limit, attributes_to_retrieve)
|
||||
.await?;
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub async fn document(
|
||||
&self,
|
||||
uid: String,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let document = self
|
||||
.index_handle
|
||||
.document(uuid, doc_id, attributes_to_retrieve)
|
||||
.await?;
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
&self,
|
||||
uid: String,
|
||||
mut index_settings: IndexSettings,
|
||||
) -> Result<IndexMetadata> {
|
||||
if index_settings.uid.is_some() {
|
||||
index_settings.uid.take();
|
||||
}
|
||||
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let meta = self.index_handle.update_index(uuid, index_settings).await?;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let result = self.index_handle.search(uuid, query).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let meta = self.index_handle.get_index_meta(uuid).await?;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub async fn get_uuids_size(&self) -> Result<u64> {
|
||||
Ok(self.uuid_resolver.get_size().await?)
|
||||
}
|
||||
|
||||
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let update_infos = self.update_handle.get_info().await?;
|
||||
let mut stats = self.index_handle.get_index_stats(uuid).await?;
|
||||
// Check if the currently indexing update is from out index.
|
||||
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
pub async fn get_all_stats(&self) -> Result<Stats> {
|
||||
let update_infos = self.update_handle.get_info().await?;
|
||||
let mut database_size = self.get_uuids_size().await? + update_infos.size;
|
||||
let mut last_update: Option<DateTime<_>> = None;
|
||||
let mut indexes = BTreeMap::new();
|
||||
|
||||
for index in self.list_indexes().await? {
|
||||
let mut index_stats = self.index_handle.get_index_stats(index.uuid).await?;
|
||||
database_size += index_stats.size;
|
||||
|
||||
last_update = last_update.map_or(Some(index.meta.updated_at), |last| {
|
||||
Some(last.max(index.meta.updated_at))
|
||||
});
|
||||
|
||||
index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing);
|
||||
|
||||
indexes.insert(index.uid, index_stats);
|
||||
}
|
||||
|
||||
Ok(Stats {
|
||||
database_size,
|
||||
last_update,
|
||||
indexes,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn create_dump(&self) -> Result<DumpInfo> {
|
||||
Ok(self.dump_handle.create_dump().await?)
|
||||
}
|
||||
|
||||
pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
|
||||
Ok(self.dump_handle.dump_info(uid).await?)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
||||
loop {
|
||||
match Arc::try_unwrap(item) {
|
||||
Ok(item) => return item,
|
||||
Err(item_arc) => {
|
||||
item = item_arc;
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("asc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("desc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
fn update_files_path(path: impl AsRef<Path>) -> PathBuf {
|
||||
path.as_ref().join("updates/updates_files")
|
||||
}
|
|
@ -1,261 +0,0 @@
|
|||
use std::path::Path;
|
||||
|
||||
use anyhow::bail;
|
||||
|
||||
use crate::helpers::compression;
|
||||
|
||||
//pub struct SnapshotService<U, R> {
|
||||
//uuid_resolver_handle: R,
|
||||
//update_handle: U,
|
||||
//snapshot_period: Duration,
|
||||
//snapshot_path: PathBuf,
|
||||
//db_name: String,
|
||||
//}
|
||||
|
||||
//impl<U, R> SnapshotService<U, R>
|
||||
//where
|
||||
//U: UpdateActorHandle,
|
||||
//R: UuidResolverHandle,
|
||||
//{
|
||||
//pub fn new(
|
||||
//uuid_resolver_handle: R,
|
||||
//update_handle: U,
|
||||
//snapshot_period: Duration,
|
||||
//snapshot_path: PathBuf,
|
||||
//db_name: String,
|
||||
//) -> Self {
|
||||
//Self {
|
||||
//uuid_resolver_handle,
|
||||
//update_handle,
|
||||
//snapshot_period,
|
||||
//snapshot_path,
|
||||
//db_name,
|
||||
//}
|
||||
//}
|
||||
|
||||
//pub async fn run(self) {
|
||||
//info!(
|
||||
//"Snapshot scheduled every {}s.",
|
||||
//self.snapshot_period.as_secs()
|
||||
//);
|
||||
//loop {
|
||||
//if let Err(e) = self.perform_snapshot().await {
|
||||
//error!("Error while performing snapshot: {}", e);
|
||||
//}
|
||||
//sleep(self.snapshot_period).await;
|
||||
//}
|
||||
//}
|
||||
|
||||
//async fn perform_snapshot(&self) -> anyhow::Result<()> {
|
||||
//trace!("Performing snapshot.");
|
||||
|
||||
//let snapshot_dir = self.snapshot_path.clone();
|
||||
//fs::create_dir_all(&snapshot_dir).await?;
|
||||
//let temp_snapshot_dir =
|
||||
//spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??;
|
||||
//let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
|
||||
|
||||
//let uuids = self
|
||||
//.uuid_resolver_handle
|
||||
//.snapshot(temp_snapshot_path.clone())
|
||||
//.await?;
|
||||
|
||||
//if uuids.is_empty() {
|
||||
//return Ok(());
|
||||
//}
|
||||
|
||||
//self.update_handle
|
||||
//.snapshot(uuids, temp_snapshot_path.clone())
|
||||
//.await?;
|
||||
//let snapshot_dir = self.snapshot_path.clone();
|
||||
//let snapshot_path = self
|
||||
//.snapshot_path
|
||||
//.join(format!("{}.snapshot", self.db_name));
|
||||
//let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
||||
//let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?;
|
||||
//let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
|
||||
//compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
|
||||
//temp_snapshot_file.persist(&snapshot_path)?;
|
||||
//Ok(snapshot_path)
|
||||
//})
|
||||
//.await??;
|
||||
|
||||
//trace!("Created snapshot in {:?}.", snapshot_path);
|
||||
|
||||
//Ok(())
|
||||
//}
|
||||
//}
|
||||
|
||||
pub fn load_snapshot(
|
||||
db_path: impl AsRef<Path>,
|
||||
snapshot_path: impl AsRef<Path>,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
|
||||
match compression::from_tar_gz(snapshot_path, &db_path) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
// clean created db folder
|
||||
std::fs::remove_dir_all(&db_path)?;
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
db_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
|
||||
bail!(
|
||||
"snapshot doesn't exist at {:?}",
|
||||
snapshot_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
|
||||
)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::iter::FromIterator;
|
||||
use std::{collections::HashSet, sync::Arc};
|
||||
|
||||
use futures::future::{err, ok};
|
||||
use rand::Rng;
|
||||
use tokio::time::timeout;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::index_controller::index_actor::MockIndexActorHandle;
|
||||
use crate::index_controller::update_actor::{
|
||||
error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl,
|
||||
};
|
||||
use crate::index_controller::uuid_resolver::{
|
||||
error::UuidResolverError, MockUuidResolverHandle,
|
||||
};
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_normal() {
|
||||
let mut rng = rand::thread_rng();
|
||||
let uuids_num: usize = rng.gen_range(5..10);
|
||||
let uuids = (0..uuids_num)
|
||||
.map(|_| Uuid::new_v4())
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
let uuids_clone = uuids.clone();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
.times(1)
|
||||
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
|
||||
|
||||
let uuids_clone = uuids.clone();
|
||||
let mut index_handle = MockIndexActorHandle::new();
|
||||
index_handle
|
||||
.expect_snapshot()
|
||||
.withf(move |uuid, _path| uuids_clone.contains(uuid))
|
||||
.times(uuids_num)
|
||||
.returning(move |_, _| Box::pin(ok(())));
|
||||
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let handle = Arc::new(index_handle);
|
||||
let update_handle =
|
||||
UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
snapshot_service.perform_snapshot().await.unwrap();
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_performing_uuid_snapshot() {
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
.times(1)
|
||||
// abitrary error
|
||||
.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
|
||||
|
||||
let update_handle = MockUpdateActorHandle::new();
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
assert!(snapshot_service.perform_snapshot().await.is_err());
|
||||
// Nothing was written to the file
|
||||
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_performing_index_snapshot() {
|
||||
let uuid = Uuid::new_v4();
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
.times(1)
|
||||
.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
|
||||
|
||||
let mut update_handle = MockUpdateActorHandle::new();
|
||||
update_handle
|
||||
.expect_snapshot()
|
||||
// abitrary error
|
||||
.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0))));
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
assert!(snapshot_service.perform_snapshot().await.is_err());
|
||||
// Nothing was written to the file
|
||||
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_loop() {
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
// we expect the funtion to be called between 2 and 3 time in the given interval.
|
||||
.times(2..4)
|
||||
// abitrary error, to short-circuit the function
|
||||
.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
|
||||
|
||||
let update_handle = MockUpdateActorHandle::new();
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await;
|
||||
}
|
||||
}
|
|
@ -1,260 +0,0 @@
|
|||
use std::collections::HashSet;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::error::PayloadError;
|
||||
use async_stream::stream;
|
||||
use bytes::Bytes;
|
||||
use futures::{Stream, StreamExt};
|
||||
use log::trace;
|
||||
use milli::documents::DocumentBatchBuilder;
|
||||
use serde_json::{Map, Value};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{Result, UpdateActorError};
|
||||
use super::RegisterUpdate;
|
||||
use super::{UpdateMsg, UpdateStore, UpdateStoreInfo, Update};
|
||||
use crate::index_controller::index_actor::IndexActorHandle;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use crate::index_controller::{DocumentAdditionFormat, Payload, UpdateStatus};
|
||||
|
||||
pub struct UpdateActor<I> {
|
||||
store: Arc<UpdateStore>,
|
||||
inbox: Option<mpsc::Receiver<UpdateMsg>>,
|
||||
update_file_store: UpdateFileStore,
|
||||
index_handle: I,
|
||||
must_exit: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
struct StreamReader<S> {
|
||||
stream: S,
|
||||
current: Option<Bytes>,
|
||||
}
|
||||
|
||||
impl<S> StreamReader<S> {
|
||||
fn new(stream: S) -> Self {
|
||||
Self { stream, current: None }
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<S: Stream<Item = std::result::Result<Bytes, PayloadError>> + Unpin> io::Read for StreamReader<S> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
match self.current.take() {
|
||||
Some(mut bytes) => {
|
||||
let copied = bytes.split_to(buf.len());
|
||||
buf.copy_from_slice(&copied);
|
||||
if !bytes.is_empty() {
|
||||
self.current.replace(bytes);
|
||||
}
|
||||
Ok(copied.len())
|
||||
}
|
||||
None => {
|
||||
match tokio::runtime::Handle::current().block_on(self.stream.next()) {
|
||||
Some(Ok(bytes)) => {
|
||||
self.current.replace(bytes);
|
||||
self.read(buf)
|
||||
},
|
||||
Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)),
|
||||
None => return Ok(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> UpdateActor<I>
|
||||
where
|
||||
I: IndexActorHandle + Clone + Sync + Send + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
update_db_size: usize,
|
||||
inbox: mpsc::Receiver<UpdateMsg>,
|
||||
path: impl AsRef<Path>,
|
||||
index_handle: I,
|
||||
) -> anyhow::Result<Self> {
|
||||
let path = path.as_ref().to_owned();
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(update_db_size);
|
||||
|
||||
let must_exit = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let store = UpdateStore::open(options, &path, index_handle.clone(), must_exit.clone())?;
|
||||
|
||||
let inbox = Some(inbox);
|
||||
|
||||
let update_file_store = UpdateFileStore::new(&path).unwrap();
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
inbox,
|
||||
index_handle,
|
||||
must_exit,
|
||||
update_file_store
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
use UpdateMsg::*;
|
||||
|
||||
trace!("Started update actor.");
|
||||
|
||||
let mut inbox = self
|
||||
.inbox
|
||||
.take()
|
||||
.expect("A receiver should be present by now.");
|
||||
|
||||
let must_exit = self.must_exit.clone();
|
||||
let stream = stream! {
|
||||
loop {
|
||||
let msg = inbox.recv().await;
|
||||
|
||||
if must_exit.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
break;
|
||||
}
|
||||
|
||||
match msg {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stream
|
||||
.for_each_concurrent(Some(10), |msg| async {
|
||||
match msg {
|
||||
Update {
|
||||
uuid,
|
||||
update,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_update(uuid, update).await);
|
||||
}
|
||||
ListUpdates { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_list_updates(uuid).await);
|
||||
}
|
||||
GetUpdate { uuid, ret, id } => {
|
||||
let _ = ret.send(self.handle_get_update(uuid, id).await);
|
||||
}
|
||||
Delete { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_delete(uuid).await);
|
||||
}
|
||||
Snapshot { uuids, path, ret } => {
|
||||
let _ = ret.send(self.handle_snapshot(uuids, path).await);
|
||||
}
|
||||
GetInfo { ret } => {
|
||||
let _ = ret.send(self.handle_get_info().await);
|
||||
}
|
||||
Dump { uuids, path, ret } => {
|
||||
let _ = ret.send(self.handle_dump(uuids, path).await);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_update(
|
||||
&self,
|
||||
index_uuid: Uuid,
|
||||
update: Update,
|
||||
) -> Result<UpdateStatus> {
|
||||
let registration = match update {
|
||||
Update::DocumentAddition { payload, primary_key, method, format } => {
|
||||
let content_uuid = match format {
|
||||
DocumentAdditionFormat::Json => self.documents_from_json(payload).await?,
|
||||
};
|
||||
|
||||
RegisterUpdate::DocumentAddition { primary_key, method, content_uuid }
|
||||
}
|
||||
};
|
||||
|
||||
let store = self.store.clone();
|
||||
let status = tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration)).await??;
|
||||
|
||||
Ok(status.into())
|
||||
}
|
||||
|
||||
async fn documents_from_json(&self, payload: Payload) -> Result<Uuid> {
|
||||
let file_store = self.update_file_store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let (uuid, mut file) = file_store.new_update().unwrap();
|
||||
let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap();
|
||||
|
||||
let documents: Vec<Map<String, Value>> = serde_json::from_reader(StreamReader::new(payload))?;
|
||||
builder.add_documents(documents).unwrap();
|
||||
builder.finish().unwrap();
|
||||
|
||||
file.persist();
|
||||
|
||||
Ok(uuid)
|
||||
}).await?
|
||||
}
|
||||
|
||||
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let update_store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = update_store.list(uuid)?;
|
||||
Ok(result)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
|
||||
let store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = store
|
||||
.meta(uuid, id)?
|
||||
.ok_or(UpdateActorError::UnexistingUpdate(id))?;
|
||||
Ok(result)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let index_handle = self.index_handle.clone();
|
||||
let update_store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle))
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let index_handle = self.index_handle.clone();
|
||||
let update_store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||
update_store.dump(&uuids, path.to_path_buf(), index_handle)?;
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let update_store = self.store.clone();
|
||||
let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> {
|
||||
let info = update_store.get_info()?;
|
||||
Ok(info)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
use std::error::Error;
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
use crate::index_controller::index_actor::error::IndexActorError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UpdateActorError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum UpdateActorError {
|
||||
#[error("Update {0} not found.")]
|
||||
UnexistingUpdate(u64),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
IndexActor(#[from] IndexActorError),
|
||||
#[error(
|
||||
"update store was shut down due to a fatal error, please check your logs for more info."
|
||||
)]
|
||||
FatalUpdateStoreError,
|
||||
#[error("{0}")]
|
||||
InvalidPayload(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
PayloadError(#[from] actix_web::error::PayloadError),
|
||||
}
|
||||
|
||||
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateActorError {
|
||||
fn from(_: tokio::sync::mpsc::error::SendError<T>) -> Self {
|
||||
Self::FatalUpdateStoreError
|
||||
}
|
||||
}
|
||||
|
||||
impl From<tokio::sync::oneshot::error::RecvError> for UpdateActorError {
|
||||
fn from(_: tokio::sync::oneshot::error::RecvError) -> Self {
|
||||
Self::FatalUpdateStoreError
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
UpdateActorError: heed::Error,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
tokio::task::JoinError
|
||||
);
|
||||
|
||||
impl ErrorCode for UpdateActorError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
UpdateActorError::UnexistingUpdate(_) => Code::NotFound,
|
||||
UpdateActorError::Internal(_) => Code::Internal,
|
||||
UpdateActorError::IndexActor(e) => e.error_code(),
|
||||
UpdateActorError::FatalUpdateStoreError => Code::Internal,
|
||||
UpdateActorError::InvalidPayload(_) => Code::BadRequest,
|
||||
UpdateActorError::PayloadError(error) => match error {
|
||||
actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
||||
_ => Code::Internal,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,93 +0,0 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::{IndexActorHandle, Update, UpdateStatus};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{UpdateActor, UpdateActorHandle, UpdateMsg, UpdateStoreInfo};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateActorHandleImpl {
|
||||
sender: mpsc::Sender<UpdateMsg>,
|
||||
}
|
||||
|
||||
impl UpdateActorHandleImpl {
|
||||
pub fn new<I>(
|
||||
index_handle: I,
|
||||
path: impl AsRef<Path>,
|
||||
update_store_size: usize,
|
||||
) -> anyhow::Result<Self>
|
||||
where
|
||||
I: IndexActorHandle + Clone + Sync + Send +'static,
|
||||
{
|
||||
let path = path.as_ref().to_owned();
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?;
|
||||
|
||||
tokio::task::spawn_local(actor.run());
|
||||
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UpdateActorHandle for UpdateActorHandleImpl {
|
||||
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::ListUpdates { uuid, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::GetUpdate { uuid, id, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Delete { uuid, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Snapshot { uuids, path, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Dump { uuids, path, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::GetInfo { ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
) -> Result<UpdateStatus> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Update {
|
||||
uuid,
|
||||
update,
|
||||
ret,
|
||||
};
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use tokio::sync::oneshot;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{UpdateStatus, UpdateStoreInfo, Update};
|
||||
|
||||
pub enum UpdateMsg {
|
||||
Update {
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
},
|
||||
ListUpdates {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
|
||||
},
|
||||
GetUpdate {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
id: u64,
|
||||
},
|
||||
Delete {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
Snapshot {
|
||||
uuids: HashSet<Uuid>,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
Dump {
|
||||
uuids: HashSet<Uuid>,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
GetInfo {
|
||||
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
|
||||
},
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
use std::{collections::HashSet, path::PathBuf};
|
||||
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use uuid::Uuid;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
use crate::index_controller::UpdateStatus;
|
||||
use super::Update;
|
||||
|
||||
use actor::UpdateActor;
|
||||
use error::Result;
|
||||
use message::UpdateMsg;
|
||||
|
||||
pub use handle_impl::UpdateActorHandleImpl;
|
||||
pub use store::{UpdateStore, UpdateStoreInfo};
|
||||
|
||||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod message;
|
||||
pub mod store;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum RegisterUpdate {
|
||||
DocumentAddition {
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait UpdateActorHandle {
|
||||
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
|
||||
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
||||
async fn snapshot(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
||||
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
||||
async fn get_info(&self) -> Result<UpdateStoreInfo>;
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
) -> Result<UpdateStatus>;
|
||||
}
|
|
@ -1,86 +0,0 @@
|
|||
use std::{borrow::Cow, convert::TryInto, mem::size_of};
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct NextIdCodec;
|
||||
|
||||
pub enum NextIdKey {
|
||||
Global,
|
||||
Index(Uuid),
|
||||
}
|
||||
|
||||
impl<'a> BytesEncode<'a> for NextIdCodec {
|
||||
type EItem = NextIdKey;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
match item {
|
||||
NextIdKey::Global => Some(Cow::Borrowed(b"__global__")),
|
||||
NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PendingKeyCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for PendingKeyCodec {
|
||||
type EItem = (u64, Uuid, u64);
|
||||
|
||||
fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||
bytes.extend_from_slice(&global_id.to_be_bytes());
|
||||
bytes.extend_from_slice(uuid.as_bytes());
|
||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for PendingKeyCodec {
|
||||
type DItem = (u64, Uuid, u64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?;
|
||||
let global_id = u64::from_be_bytes(global_id_bytes);
|
||||
|
||||
let uuid_bytes = bytes
|
||||
.get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))?
|
||||
.try_into()
|
||||
.ok()?;
|
||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||
|
||||
let update_id_bytes = bytes
|
||||
.get((size_of::<u64>() + size_of::<Uuid>())..)?
|
||||
.try_into()
|
||||
.ok()?;
|
||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||
|
||||
Some((global_id, uuid, update_id))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateKeyCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for UpdateKeyCodec {
|
||||
type EItem = (Uuid, u64);
|
||||
|
||||
fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||
bytes.extend_from_slice(uuid.as_bytes());
|
||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for UpdateKeyCodec {
|
||||
type DItem = (Uuid, u64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?;
|
||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||
|
||||
let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?;
|
||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||
|
||||
Some((uuid, update_id))
|
||||
}
|
||||
}
|
|
@ -1,186 +0,0 @@
|
|||
use std::{
|
||||
collections::HashSet,
|
||||
fs::{create_dir_all, File},
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use heed::RoTxn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{Result, State, UpdateStore};
|
||||
use crate::index_controller::{
|
||||
index_actor::IndexActorHandle,
|
||||
UpdateStatus,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct UpdateEntry {
|
||||
uuid: Uuid,
|
||||
update: UpdateStatus,
|
||||
}
|
||||
|
||||
impl UpdateStore {
|
||||
pub fn dump(
|
||||
&self,
|
||||
uuids: &HashSet<Uuid>,
|
||||
path: PathBuf,
|
||||
handle: impl IndexActorHandle,
|
||||
) -> Result<()> {
|
||||
let state_lock = self.state.write();
|
||||
state_lock.swap(State::Dumping);
|
||||
|
||||
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
let dump_path = path.join("updates");
|
||||
create_dir_all(&dump_path)?;
|
||||
|
||||
self.dump_updates(&txn, uuids, &dump_path)?;
|
||||
|
||||
let fut = dump_indexes(uuids, handle, &path);
|
||||
tokio::runtime::Handle::current().block_on(fut)?;
|
||||
|
||||
state_lock.swap(State::Idle);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_updates(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
uuids: &HashSet<Uuid>,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
let dump_data_path = path.as_ref().join("data.jsonl");
|
||||
let mut dump_data_file = File::create(dump_data_path)?;
|
||||
|
||||
let update_files_path = path.as_ref().join(super::UPDATE_DIR);
|
||||
create_dir_all(&update_files_path)?;
|
||||
|
||||
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
|
||||
self.dump_completed(txn, uuids, &mut dump_data_file)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_pending(
|
||||
&self,
|
||||
_txn: &RoTxn,
|
||||
_uuids: &HashSet<Uuid>,
|
||||
_file: &mut File,
|
||||
_dst_path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
todo!()
|
||||
//let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
|
||||
|
||||
//for pending in pendings {
|
||||
//let ((_, uuid, _), data) = pending?;
|
||||
//if uuids.contains(&uuid) {
|
||||
//let update = data.decode()?;
|
||||
|
||||
//if let Some(ref update_uuid) = update.content {
|
||||
//let src = super::update_uuid_to_file_path(&self.path, *update_uuid);
|
||||
//let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid);
|
||||
//std::fs::copy(src, dst)?;
|
||||
//}
|
||||
|
||||
//let update_json = UpdateEntry {
|
||||
//uuid,
|
||||
//update: update.into(),
|
||||
//};
|
||||
|
||||
//serde_json::to_writer(&mut file, &update_json)?;
|
||||
//file.write_all(b"\n")?;
|
||||
//}
|
||||
//}
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
|
||||
fn dump_completed(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
uuids: &HashSet<Uuid>,
|
||||
mut file: &mut File,
|
||||
) -> Result<()> {
|
||||
let updates = self.updates.iter(txn)?.lazily_decode_data();
|
||||
|
||||
for update in updates {
|
||||
let ((uuid, _), data) = update?;
|
||||
if uuids.contains(&uuid) {
|
||||
let update = data.decode()?;
|
||||
|
||||
let update_json = UpdateEntry { uuid, update };
|
||||
|
||||
serde_json::to_writer(&mut file, &update_json)?;
|
||||
file.write_all(b"\n")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_db_size: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
todo!()
|
||||
//let dst_update_path = dst.as_ref().join("updates/");
|
||||
//create_dir_all(&dst_update_path)?;
|
||||
|
||||
//let mut options = EnvOpenOptions::new();
|
||||
//options.map_size(db_size as usize);
|
||||
//let (store, _) = UpdateStore::new(options, &dst_update_path)?;
|
||||
|
||||
//let src_update_path = src.as_ref().join("updates");
|
||||
//let update_data = File::open(&src_update_path.join("data.jsonl"))?;
|
||||
//let mut update_data = BufReader::new(update_data);
|
||||
|
||||
//std::fs::create_dir_all(dst_update_path.join("update_files/"))?;
|
||||
|
||||
//let mut wtxn = store.env.write_txn()?;
|
||||
//let mut line = String::new();
|
||||
//loop {
|
||||
//match update_data.read_line(&mut line) {
|
||||
//Ok(0) => break,
|
||||
//Ok(_) => {
|
||||
//let UpdateEntry { uuid, update } = serde_json::from_str(&line)?;
|
||||
//store.register_raw_updates(&mut wtxn, &update, uuid)?;
|
||||
|
||||
//// Copy ascociated update path if it exists
|
||||
//if let UpdateStatus::Enqueued(Enqueued {
|
||||
//content: Some(uuid),
|
||||
//..
|
||||
//}) = update
|
||||
//{
|
||||
//let src = update_uuid_to_file_path(&src_update_path, uuid);
|
||||
//let dst = update_uuid_to_file_path(&dst_update_path, uuid);
|
||||
//std::fs::copy(src, dst)?;
|
||||
//}
|
||||
//}
|
||||
//_ => break,
|
||||
//}
|
||||
|
||||
//line.clear();
|
||||
//}
|
||||
|
||||
//wtxn.commit()?;
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
async fn dump_indexes(
|
||||
uuids: &HashSet<Uuid>,
|
||||
handle: impl IndexActorHandle,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
for uuid in uuids {
|
||||
handle.dump(*uuid, path.as_ref().to_owned()).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -1,708 +0,0 @@
|
|||
mod codec;
|
||||
pub mod dump;
|
||||
|
||||
use std::fs::{create_dir_all, remove_file};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashSet},
|
||||
path::PathBuf,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use futures::StreamExt;
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeJson};
|
||||
use heed::zerocopy::U64;
|
||||
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||
use log::error;
|
||||
use parking_lot::{Mutex, MutexGuard};
|
||||
use tokio::runtime::Handle;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio::time::timeout;
|
||||
use uuid::Uuid;
|
||||
|
||||
use codec::*;
|
||||
|
||||
use super::RegisterUpdate;
|
||||
use super::error::Result;
|
||||
use crate::helpers::EnvSizer;
|
||||
use crate::index_controller::update_files_path;
|
||||
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle};
|
||||
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
type BEU64 = U64<heed::byteorder::BE>;
|
||||
|
||||
const UPDATE_DIR: &str = "update_files";
|
||||
|
||||
pub struct UpdateStoreInfo {
|
||||
/// Size of the update store in bytes.
|
||||
pub size: u64,
|
||||
/// Uuid of the currently processing update if it exists
|
||||
pub processing: Option<Uuid>,
|
||||
}
|
||||
|
||||
/// A data structure that allows concurrent reads AND exactly one writer.
|
||||
pub struct StateLock {
|
||||
lock: Mutex<()>,
|
||||
data: ArcSwap<State>,
|
||||
}
|
||||
|
||||
pub struct StateLockGuard<'a> {
|
||||
_lock: MutexGuard<'a, ()>,
|
||||
state: &'a StateLock,
|
||||
}
|
||||
|
||||
impl StateLockGuard<'_> {
|
||||
pub fn swap(&self, state: State) -> Arc<State> {
|
||||
self.state.data.swap(Arc::new(state))
|
||||
}
|
||||
}
|
||||
|
||||
impl StateLock {
|
||||
fn from_state(state: State) -> Self {
|
||||
let lock = Mutex::new(());
|
||||
let data = ArcSwap::from(Arc::new(state));
|
||||
Self { lock, data }
|
||||
}
|
||||
|
||||
pub fn read(&self) -> Arc<State> {
|
||||
self.data.load().clone()
|
||||
}
|
||||
|
||||
pub fn write(&self) -> StateLockGuard {
|
||||
let _lock = self.lock.lock();
|
||||
let state = &self;
|
||||
StateLockGuard { _lock, state }
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum State {
|
||||
Idle,
|
||||
Processing(Uuid, Processing),
|
||||
Snapshoting,
|
||||
Dumping,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateStore {
|
||||
pub env: Env,
|
||||
/// A queue containing the updates to process, ordered by arrival.
|
||||
/// The key are built as follow:
|
||||
/// | global_update_id | index_uuid | update_id |
|
||||
/// | 8-bytes | 16-bytes | 8-bytes |
|
||||
pending_queue: Database<PendingKeyCodec, SerdeJson<Enqueued>>,
|
||||
/// Map indexes to the next available update id. If NextIdKey::Global is queried, then the next
|
||||
/// global update id is returned
|
||||
next_update_id: Database<NextIdCodec, OwnedType<BEU64>>,
|
||||
/// Contains all the performed updates meta, be they failed, aborted, or processed.
|
||||
/// The keys are built as follow:
|
||||
/// | Uuid | id |
|
||||
/// | 16-bytes | 8-bytes |
|
||||
updates: Database<UpdateKeyCodec, SerdeJson<UpdateStatus>>,
|
||||
/// Indicates the current state of the update store,
|
||||
state: Arc<StateLock>,
|
||||
/// Wake up the loop when a new event occurs.
|
||||
notification_sender: mpsc::Sender<()>,
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateStore {
|
||||
fn new(
|
||||
mut options: EnvOpenOptions,
|
||||
path: impl AsRef<Path>,
|
||||
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
|
||||
options.max_dbs(5);
|
||||
|
||||
let update_path = path.as_ref().join("updates");
|
||||
std::fs::create_dir_all(&update_path)?;
|
||||
let env = options.open(update_path)?;
|
||||
let pending_queue = env.create_database(Some("pending-queue"))?;
|
||||
let next_update_id = env.create_database(Some("next-update-id"))?;
|
||||
let updates = env.create_database(Some("updates"))?;
|
||||
|
||||
let state = Arc::new(StateLock::from_state(State::Idle));
|
||||
|
||||
let (notification_sender, notification_receiver) = mpsc::channel(1);
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
env,
|
||||
pending_queue,
|
||||
next_update_id,
|
||||
updates,
|
||||
state,
|
||||
notification_sender,
|
||||
path: path.as_ref().to_owned(),
|
||||
},
|
||||
notification_receiver,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn open(
|
||||
options: EnvOpenOptions,
|
||||
path: impl AsRef<Path>,
|
||||
index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static,
|
||||
must_exit: Arc<AtomicBool>,
|
||||
) -> anyhow::Result<Arc<Self>> {
|
||||
let (update_store, mut notification_receiver) = Self::new(options, path)?;
|
||||
let update_store = Arc::new(update_store);
|
||||
|
||||
// Send a first notification to trigger the process.
|
||||
if let Err(TrySendError::Closed(())) = update_store.notification_sender.try_send(()) {
|
||||
panic!("Failed to init update store");
|
||||
}
|
||||
|
||||
// We need a weak reference so we can take ownership on the arc later when we
|
||||
// want to close the index.
|
||||
let duration = Duration::from_secs(10 * 60); // 10 minutes
|
||||
let update_store_weak = Arc::downgrade(&update_store);
|
||||
tokio::task::spawn_local(async move {
|
||||
// Block and wait for something to process with a timeout. The timeout
|
||||
// function returns a Result and we must just unlock the loop on Result.
|
||||
'outer: while timeout(duration, notification_receiver.recv())
|
||||
.await
|
||||
.map_or(true, |o| o.is_some())
|
||||
{
|
||||
loop {
|
||||
match update_store_weak.upgrade() {
|
||||
Some(update_store) => {
|
||||
let handler = index_handle.clone();
|
||||
let res = tokio::task::spawn_blocking(move || {
|
||||
update_store.process_pending_update(handler)
|
||||
})
|
||||
.await
|
||||
.expect("Fatal error processing update.");
|
||||
match res {
|
||||
Ok(Some(_)) => (),
|
||||
Ok(None) => break,
|
||||
Err(e) => {
|
||||
error!("Fatal error while processing an update that requires the update store to shutdown: {}", e);
|
||||
must_exit.store(true, Ordering::SeqCst);
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
// the ownership on the arc has been taken, we need to exit.
|
||||
None => break 'outer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error!("Update store loop exited.");
|
||||
});
|
||||
|
||||
Ok(update_store)
|
||||
}
|
||||
|
||||
/// Returns the next global update id and the next update id for a given `index_uuid`.
|
||||
fn next_update_id(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<(u64, u64)> {
|
||||
let global_id = self
|
||||
.next_update_id
|
||||
.get(txn, &NextIdKey::Global)?
|
||||
.map(U64::get)
|
||||
.unwrap_or_default();
|
||||
|
||||
self.next_update_id
|
||||
.put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?;
|
||||
|
||||
let update_id = self.next_update_id_raw(txn, index_uuid)?;
|
||||
|
||||
Ok((global_id, update_id))
|
||||
}
|
||||
|
||||
/// Returns the next next update id for a given `index_uuid` without
|
||||
/// incrementing the global update id. This is useful for the dumps.
|
||||
fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<u64> {
|
||||
let update_id = self
|
||||
.next_update_id
|
||||
.get(txn, &NextIdKey::Index(index_uuid))?
|
||||
.map(U64::get)
|
||||
.unwrap_or_default();
|
||||
|
||||
self.next_update_id.put(
|
||||
txn,
|
||||
&NextIdKey::Index(index_uuid),
|
||||
&BEU64::new(update_id + 1),
|
||||
)?;
|
||||
|
||||
Ok(update_id)
|
||||
}
|
||||
|
||||
/// Registers the update content in the pending store and the meta
|
||||
/// into the pending-meta store. Returns the new unique update id.
|
||||
pub fn register_update(
|
||||
&self,
|
||||
index_uuid: Uuid,
|
||||
update: RegisterUpdate,
|
||||
) -> heed::Result<Enqueued> {
|
||||
let mut txn = self.env.write_txn()?;
|
||||
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
|
||||
let meta = Enqueued::new(update, update_id);
|
||||
|
||||
self.pending_queue
|
||||
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
if let Err(TrySendError::Closed(())) = self.notification_sender.try_send(()) {
|
||||
panic!("Update store loop exited");
|
||||
}
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
// /// Push already processed update in the UpdateStore without triggering the notification
|
||||
// /// process. This is useful for the dumps.
|
||||
//pub fn register_raw_updates(
|
||||
//&self,
|
||||
//wtxn: &mut heed::RwTxn,
|
||||
//update: &UpdateStatus,
|
||||
//index_uuid: Uuid,
|
||||
//) -> heed::Result<()> {
|
||||
//match update {
|
||||
//UpdateStatus::Enqueued(enqueued) => {
|
||||
//let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
|
||||
//self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
|
||||
//wtxn,
|
||||
//&(global_id, index_uuid, enqueued.id()),
|
||||
//enqueued,
|
||||
//)?;
|
||||
//}
|
||||
//_ => {
|
||||
//let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
|
||||
//self.updates.put(wtxn, &(index_uuid, update.id()), update)?;
|
||||
//}
|
||||
//}
|
||||
//Ok(())
|
||||
//}
|
||||
|
||||
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
||||
/// This is asynchronous as it let the user process the update with a read-only txn and
|
||||
/// only writing the result meta to the processed-meta store *after* it has been processed.
|
||||
fn process_pending_update(&self, index_handle: impl IndexActorHandle) -> Result<Option<()>> {
|
||||
// Create a read transaction to be able to retrieve the pending update in order.
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let first_meta = self.pending_queue.first(&rtxn)?;
|
||||
drop(rtxn);
|
||||
|
||||
// If there is a pending update we process and only keep
|
||||
// a reader while processing it, not a writer.
|
||||
match first_meta {
|
||||
Some(((global_id, index_uuid, _), pending)) => {
|
||||
let processing = pending.processing();
|
||||
// Acquire the state lock and set the current state to processing.
|
||||
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||
let state = self.state.write();
|
||||
state.swap(State::Processing(index_uuid, processing.clone()));
|
||||
|
||||
let result =
|
||||
self.perform_update(processing, index_handle, index_uuid, global_id);
|
||||
|
||||
state.swap(State::Idle);
|
||||
|
||||
result
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn perform_update(
|
||||
&self,
|
||||
processing: Processing,
|
||||
index_handle: impl IndexActorHandle,
|
||||
index_uuid: Uuid,
|
||||
global_id: u64,
|
||||
) -> Result<Option<()>> {
|
||||
// Process the pending update using the provided user function.
|
||||
let handle = Handle::current();
|
||||
let update_id = processing.id();
|
||||
let result =
|
||||
match handle.block_on(index_handle.update(index_uuid, processing.clone())) {
|
||||
Ok(result) => result,
|
||||
Err(e) => Err(processing.fail(e.into())),
|
||||
};
|
||||
|
||||
// Once the pending update have been successfully processed
|
||||
// we must remove the content from the pending and processing stores and
|
||||
// write the *new* meta to the processed-meta store and commit.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.pending_queue
|
||||
.delete(&mut wtxn, &(global_id, index_uuid, update_id))?;
|
||||
|
||||
let result = match result {
|
||||
Ok(res) => res.into(),
|
||||
Err(res) => res.into(),
|
||||
};
|
||||
|
||||
self.updates
|
||||
.put(&mut wtxn, &(index_uuid, update_id), &result)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Some(()))
|
||||
}
|
||||
|
||||
/// List the updates for `index_uuid`.
|
||||
pub fn list(&self, index_uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let mut update_list = BTreeMap::<u64, UpdateStatus>::new();
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
for entry in pendings {
|
||||
let ((_, uuid, id), pending) = entry?;
|
||||
if uuid == index_uuid {
|
||||
update_list.insert(id, pending.decode()?.into());
|
||||
}
|
||||
}
|
||||
|
||||
let updates = self
|
||||
.updates
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(&txn, index_uuid.as_bytes())?;
|
||||
|
||||
for entry in updates {
|
||||
let (_, update) = entry?;
|
||||
update_list.insert(update.id(), update);
|
||||
}
|
||||
|
||||
// If the currently processing update is from this index, replace the corresponding pending update with this one.
|
||||
match *self.state.read() {
|
||||
State::Processing(uuid, ref processing) if uuid == index_uuid => {
|
||||
update_list.insert(processing.id(), processing.clone().into());
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
Ok(update_list.into_iter().map(|(_, v)| v).collect())
|
||||
}
|
||||
|
||||
/// Returns the update associated meta or `None` if the update doesn't exist.
|
||||
pub fn meta(&self, index_uuid: Uuid, update_id: u64) -> heed::Result<Option<UpdateStatus>> {
|
||||
// Check if the update is the one currently processing
|
||||
match *self.state.read() {
|
||||
State::Processing(uuid, ref processing)
|
||||
if uuid == index_uuid && processing.id() == update_id =>
|
||||
{
|
||||
return Ok(Some(processing.clone().into()));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
// Else, check if it is in the updates database:
|
||||
let update = self.updates.get(&txn, &(index_uuid, update_id))?;
|
||||
|
||||
if let Some(update) = update {
|
||||
return Ok(Some(update));
|
||||
}
|
||||
|
||||
// If nothing was found yet, we resolve to iterate over the pending queue.
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
|
||||
for entry in pendings {
|
||||
let ((_, uuid, id), pending) = entry?;
|
||||
if uuid == index_uuid && id == update_id {
|
||||
return Ok(Some(pending.decode()?.into()));
|
||||
}
|
||||
}
|
||||
|
||||
// No update was found.
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Delete all updates for an index from the update store. If the currently processing update
|
||||
/// is for `index_uuid`, the call will block until the update is terminated.
|
||||
pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> {
|
||||
let mut txn = self.env.write_txn()?;
|
||||
// Contains all the content file paths that we need to be removed if the deletion was successful.
|
||||
let uuids_to_remove = Vec::new();
|
||||
|
||||
let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data();
|
||||
|
||||
while let Some(Ok(((_, uuid, _), pending))) = pendings.next() {
|
||||
if uuid == index_uuid {
|
||||
let mut _pending = pending.decode()?;
|
||||
//if let Some(update_uuid) = pending.content.take() {
|
||||
//uuids_to_remove.push(update_uuid);
|
||||
//}
|
||||
|
||||
// Invariant check: we can only delete the current entry when we don't hold
|
||||
// references to it anymore. This must be done after we have retrieved its content.
|
||||
unsafe {
|
||||
pendings.del_current()?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop(pendings);
|
||||
|
||||
let mut updates = self
|
||||
.updates
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter_mut(&mut txn, index_uuid.as_bytes())?
|
||||
.lazily_decode_data();
|
||||
|
||||
while let Some(_) = updates.next() {
|
||||
unsafe {
|
||||
updates.del_current()?;
|
||||
}
|
||||
}
|
||||
|
||||
drop(updates);
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
// If the currently processing update is from our index, we wait until it is
|
||||
// finished before returning. This ensure that no write to the index occurs after we delete it.
|
||||
if let State::Processing(uuid, _) = *self.state.read() {
|
||||
if uuid == index_uuid {
|
||||
// wait for a write lock, do nothing with it.
|
||||
self.state.write();
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, remove any outstanding update files. This must be done after waiting for the
|
||||
// last update to ensure that the update files are not deleted before the update needs
|
||||
// them.
|
||||
uuids_to_remove
|
||||
.iter()
|
||||
.map(|uuid: &Uuid| update_files_path(&self.path).join(uuid.to_string()))
|
||||
.for_each(|path| {
|
||||
let _ = remove_file(path);
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn snapshot(
|
||||
&self,
|
||||
uuids: &HashSet<Uuid>,
|
||||
path: impl AsRef<Path>,
|
||||
handle: impl IndexActorHandle + Clone,
|
||||
) -> Result<()> {
|
||||
let state_lock = self.state.write();
|
||||
state_lock.swap(State::Snapshoting);
|
||||
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
let update_path = path.as_ref().join("updates");
|
||||
create_dir_all(&update_path)?;
|
||||
|
||||
// acquire write lock to prevent further writes during snapshot
|
||||
create_dir_all(&update_path)?;
|
||||
let db_path = update_path.join("data.mdb");
|
||||
|
||||
// create db snapshot
|
||||
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
|
||||
|
||||
let update_files_path = update_path.join(UPDATE_DIR);
|
||||
create_dir_all(&update_files_path)?;
|
||||
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
|
||||
for entry in pendings {
|
||||
let ((_, _uuid, _), _pending) = entry?;
|
||||
//if uuids.contains(&uuid) {
|
||||
//if let Enqueued {
|
||||
//content: Some(uuid),
|
||||
//..
|
||||
//} = pending.decode()?
|
||||
//{
|
||||
//let path = update_uuid_to_file_path(&self.path, uuid);
|
||||
//copy(path, &update_files_path)?;
|
||||
//}
|
||||
//}
|
||||
}
|
||||
|
||||
let path = &path.as_ref().to_path_buf();
|
||||
let handle = &handle;
|
||||
// Perform the snapshot of each index concurently. Only a third of the capabilities of
|
||||
// the index actor at a time not to put too much pressure on the index actor
|
||||
let mut stream = futures::stream::iter(uuids.iter())
|
||||
.map(move |uuid| handle.snapshot(*uuid, path.clone()))
|
||||
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
|
||||
|
||||
Handle::current().block_on(async {
|
||||
while let Some(res) = stream.next().await {
|
||||
res?;
|
||||
}
|
||||
Ok(()) as Result<()>
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let size = self.env.size();
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.pending_queue.iter(&txn)? {
|
||||
let (_, _pending) = entry?;
|
||||
//if let Enqueued {
|
||||
//content: Some(uuid),
|
||||
//..
|
||||
//} = pending
|
||||
//{
|
||||
//let path = update_uuid_to_file_path(&self.path, uuid);
|
||||
//size += File::open(path)?.metadata()?.len();
|
||||
//}
|
||||
}
|
||||
let processing = match *self.state.read() {
|
||||
State::Processing(uuid, _) => Some(uuid),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Ok(UpdateStoreInfo { size, processing })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::index_controller::{
|
||||
index_actor::{error::IndexActorError, MockIndexActorHandle},
|
||||
UpdateResult,
|
||||
};
|
||||
|
||||
use futures::future::ok;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_next_id() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let handle = Arc::new(MockIndexActorHandle::new());
|
||||
options.map_size(4096 * 100);
|
||||
let update_store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
handle,
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let index1_uuid = Uuid::new_v4();
|
||||
let index2_uuid = Uuid::new_v4();
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((0, 0), ids);
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((1, 0), ids);
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((2, 1), ids);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_register_update() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let handle = Arc::new(MockIndexActorHandle::new());
|
||||
options.map_size(4096 * 100);
|
||||
let update_store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
handle,
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
)
|
||||
.unwrap();
|
||||
let meta = UpdateMeta::ClearDocuments;
|
||||
let uuid = Uuid::new_v4();
|
||||
let store_clone = update_store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
store_clone.register_update(meta, None, uuid).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let txn = update_store.env.read_txn().unwrap();
|
||||
assert!(update_store
|
||||
.pending_queue
|
||||
.get(&txn, &(0, uuid, 0))
|
||||
.unwrap()
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_process_update() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let mut handle = MockIndexActorHandle::new();
|
||||
|
||||
handle
|
||||
.expect_update()
|
||||
.times(2)
|
||||
.returning(|_index_uuid, processing, _file| {
|
||||
if processing.id() == 0 {
|
||||
Box::pin(ok(Ok(processing.process(UpdateResult::Other))))
|
||||
} else {
|
||||
Box::pin(ok(Err(
|
||||
processing.fail(IndexActorError::ExistingPrimaryKey.into())
|
||||
)))
|
||||
}
|
||||
});
|
||||
|
||||
let handle = Arc::new(handle);
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100);
|
||||
let store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
handle.clone(),
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// wait a bit for the event loop exit.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
|
||||
let mut txn = store.env.write_txn().unwrap();
|
||||
|
||||
let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None);
|
||||
let uuid = Uuid::new_v4();
|
||||
|
||||
store
|
||||
.pending_queue
|
||||
.put(&mut txn, &(0, uuid, 0), &update)
|
||||
.unwrap();
|
||||
|
||||
let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None);
|
||||
|
||||
store
|
||||
.pending_queue
|
||||
.put(&mut txn, &(1, uuid, 1), &update)
|
||||
.unwrap();
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
// Process the pending, and check that it has been moved to the update databases, and
|
||||
// removed from the pending database.
|
||||
let store_clone = store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
store_clone.process_pending_update(handle.clone()).unwrap();
|
||||
store_clone.process_pending_update(handle).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let txn = store.env.read_txn().unwrap();
|
||||
|
||||
assert!(store.pending_queue.first(&txn).unwrap().is_none());
|
||||
let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap();
|
||||
|
||||
assert!(matches!(update, UpdateStatus::Processed(_)));
|
||||
let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap();
|
||||
|
||||
assert!(matches!(update, UpdateStatus::Failed(_)));
|
||||
}
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use tempfile::NamedTempFile;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result;
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
file: NamedTempFile,
|
||||
}
|
||||
|
||||
impl UpdateFile {
|
||||
pub fn persist(self) {
|
||||
println!("persisting in {}", self.path.display());
|
||||
self.file.persist(&self.path).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for UpdateFile {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for UpdateFile {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateFileStore {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateFileStore {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join("updates/updates_files");
|
||||
std::fs::create_dir_all(&path).unwrap();
|
||||
Ok(Self { path })
|
||||
}
|
||||
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
let file = NamedTempFile::new().unwrap();
|
||||
let uuid = Uuid::new_v4();
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = UpdateFile { file, path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
println!("reading in {}", path.display());
|
||||
let file = File::open(path).unwrap();
|
||||
Ok(file)
|
||||
}
|
||||
}
|
|
@ -1,231 +0,0 @@
|
|||
use chrono::{DateTime, Utc};
|
||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
error::ResponseError,
|
||||
index::{Settings, Unchecked},
|
||||
};
|
||||
|
||||
use super::update_actor::RegisterUpdate;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateResult {
|
||||
DocumentsAddition(DocumentAdditionResult),
|
||||
DocumentDeletion { deleted: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: RegisterUpdate,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Enqueued {
|
||||
pub fn new(meta: RegisterUpdate, update_id: u64) -> Self {
|
||||
Self {
|
||||
enqueued_at: Utc::now(),
|
||||
meta,
|
||||
update_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn processing(self) -> Processing {
|
||||
Processing {
|
||||
from: self,
|
||||
started_processing_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn abort(self) -> Aborted {
|
||||
Aborted {
|
||||
from: self,
|
||||
aborted_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
&self.meta
|
||||
}
|
||||
|
||||
pub fn id(&self) -> u64 {
|
||||
self.update_id
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: UpdateResult,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
impl Processed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub started_processing_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Processing {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
|
||||
pub fn process(self, success: UpdateResult) -> Processed {
|
||||
Processed {
|
||||
success,
|
||||
from: self,
|
||||
processed_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fail(self, error: ResponseError) -> Failed {
|
||||
Failed {
|
||||
from: self,
|
||||
error,
|
||||
failed_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Aborted {
|
||||
#[serde(flatten)]
|
||||
from: Enqueued,
|
||||
aborted_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Aborted {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub error: ResponseError,
|
||||
pub failed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Failed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Aborted(Aborted),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
impl UpdateStatus {
|
||||
pub fn id(&self) -> u64 {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.id(),
|
||||
UpdateStatus::Enqueued(u) => u.id(),
|
||||
UpdateStatus::Processed(u) => u.id(),
|
||||
UpdateStatus::Aborted(u) => u.id(),
|
||||
UpdateStatus::Failed(u) => u.id(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.meta(),
|
||||
UpdateStatus::Enqueued(u) => u.meta(),
|
||||
UpdateStatus::Processed(u) => u.meta(),
|
||||
UpdateStatus::Aborted(u) => u.meta(),
|
||||
UpdateStatus::Failed(u) => u.meta(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn processed(&self) -> Option<&Processed> {
|
||||
match self {
|
||||
UpdateStatus::Processed(p) => Some(p),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Enqueued> for UpdateStatus {
|
||||
fn from(other: Enqueued) -> Self {
|
||||
Self::Enqueued(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Aborted> for UpdateStatus {
|
||||
fn from(other: Aborted) -> Self {
|
||||
Self::Aborted(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Processed> for UpdateStatus {
|
||||
fn from(other: Processed) -> Self {
|
||||
Self::Processed(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Processing> for UpdateStatus {
|
||||
fn from(other: Processing) -> Self {
|
||||
Self::Processing(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Failed> for UpdateStatus {
|
||||
fn from(other: Failed) -> Self {
|
||||
Self::Failed(other)
|
||||
}
|
||||
}
|
|
@ -1,98 +0,0 @@
|
|||
use std::{collections::HashSet, path::PathBuf};
|
||||
|
||||
use log::{trace, warn};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{error::UuidResolverError, Result, UuidResolveMsg, UuidStore};
|
||||
|
||||
pub struct UuidResolverActor<S> {
|
||||
inbox: mpsc::Receiver<UuidResolveMsg>,
|
||||
store: S,
|
||||
}
|
||||
|
||||
impl<S: UuidStore> UuidResolverActor<S> {
|
||||
pub fn new(inbox: mpsc::Receiver<UuidResolveMsg>, store: S) -> Self {
|
||||
Self { inbox, store }
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
use UuidResolveMsg::*;
|
||||
|
||||
trace!("uuid resolver started");
|
||||
|
||||
loop {
|
||||
match self.inbox.recv().await {
|
||||
Some(Get { uid: name, ret }) => {
|
||||
let _ = ret.send(self.handle_get(name).await);
|
||||
}
|
||||
Some(Delete { uid: name, ret }) => {
|
||||
let _ = ret.send(self.handle_delete(name).await);
|
||||
}
|
||||
Some(List { ret }) => {
|
||||
let _ = ret.send(self.handle_list().await);
|
||||
}
|
||||
Some(Insert { ret, uuid, name }) => {
|
||||
let _ = ret.send(self.handle_insert(name, uuid).await);
|
||||
}
|
||||
Some(SnapshotRequest { path, ret }) => {
|
||||
let _ = ret.send(self.handle_snapshot(path).await);
|
||||
}
|
||||
Some(GetSize { ret }) => {
|
||||
let _ = ret.send(self.handle_get_size().await);
|
||||
}
|
||||
Some(DumpRequest { path, ret }) => {
|
||||
let _ = ret.send(self.handle_dump(path).await);
|
||||
}
|
||||
// all senders have been dropped, need to quit.
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
warn!("exiting uuid resolver loop");
|
||||
}
|
||||
|
||||
async fn handle_get(&self, uid: String) -> Result<Uuid> {
|
||||
self.store
|
||||
.get_uuid(uid.clone())
|
||||
.await?
|
||||
.ok_or(UuidResolverError::UnexistingIndex(uid))
|
||||
}
|
||||
|
||||
async fn handle_delete(&self, uid: String) -> Result<Uuid> {
|
||||
self.store
|
||||
.delete(uid.clone())
|
||||
.await?
|
||||
.ok_or(UuidResolverError::UnexistingIndex(uid))
|
||||
}
|
||||
|
||||
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let result = self.store.list().await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
self.store.snapshot(path).await
|
||||
}
|
||||
|
||||
async fn handle_dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
self.store.dump(path).await
|
||||
}
|
||||
|
||||
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
|
||||
if !is_index_uid_valid(&uid) {
|
||||
return Err(UuidResolverError::BadlyFormatted(uid));
|
||||
}
|
||||
self.store.insert(uid, uuid).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_size(&self) -> Result<u64> {
|
||||
self.store.get_size().await
|
||||
}
|
||||
}
|
||||
|
||||
fn is_index_uid_valid(uid: &str) -> bool {
|
||||
uid.chars()
|
||||
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UuidResolverError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum UuidResolverError {
|
||||
#[error("Index already exists.")]
|
||||
NameAlreadyExist,
|
||||
#[error("Index \"{0}\" not found.")]
|
||||
UnexistingIndex(String),
|
||||
#[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")]
|
||||
BadlyFormatted(String),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Sync + Send + 'static>),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
UuidResolverError: heed::Error,
|
||||
uuid::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
serde_json::Error
|
||||
);
|
||||
|
||||
impl ErrorCode for UuidResolverError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
UuidResolverError::NameAlreadyExist => Code::IndexAlreadyExists,
|
||||
UuidResolverError::UnexistingIndex(_) => Code::IndexNotFound,
|
||||
UuidResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
|
||||
UuidResolverError::Internal(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,87 +0,0 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{HeedUuidStore, Result, UuidResolveMsg, UuidResolverActor, UuidResolverHandle};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UuidResolverHandleImpl {
|
||||
sender: mpsc::Sender<UuidResolveMsg>,
|
||||
}
|
||||
|
||||
impl UuidResolverHandleImpl {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let (sender, reveiver) = mpsc::channel(100);
|
||||
let store = HeedUuidStore::new(path)?;
|
||||
let actor = UuidResolverActor::new(reveiver, store);
|
||||
tokio::spawn(actor.run());
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UuidResolverHandle for UuidResolverHandleImpl {
|
||||
async fn get(&self, name: String) -> Result<Uuid> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::Get { uid: name, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn delete(&self, name: String) -> Result<Uuid> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::Delete { uid: name, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::List { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::Insert { ret, name, uuid };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::SnapshotRequest { path, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn get_size(&self) -> Result<u64> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::GetSize { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::DumpRequest { ret, path };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use tokio::sync::oneshot;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::Result;
|
||||
|
||||
pub enum UuidResolveMsg {
|
||||
Get {
|
||||
uid: String,
|
||||
ret: oneshot::Sender<Result<Uuid>>,
|
||||
},
|
||||
Delete {
|
||||
uid: String,
|
||||
ret: oneshot::Sender<Result<Uuid>>,
|
||||
},
|
||||
List {
|
||||
ret: oneshot::Sender<Result<Vec<(String, Uuid)>>>,
|
||||
},
|
||||
Insert {
|
||||
uuid: Uuid,
|
||||
name: String,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
SnapshotRequest {
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
|
||||
},
|
||||
GetSize {
|
||||
ret: oneshot::Sender<Result<u64>>,
|
||||
},
|
||||
DumpRequest {
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
|
||||
},
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod message;
|
||||
pub mod store;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use actor::UuidResolverActor;
|
||||
use error::Result;
|
||||
use message::UuidResolveMsg;
|
||||
use store::UuidStore;
|
||||
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
|
||||
pub use handle_impl::UuidResolverHandleImpl;
|
||||
pub use store::HeedUuidStore;
|
||||
|
||||
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, automock)]
|
||||
pub trait UuidResolverHandle {
|
||||
async fn get(&self, name: String) -> Result<Uuid>;
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
||||
async fn delete(&self, name: String) -> Result<Uuid>;
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
}
|
|
@ -1,224 +0,0 @@
|
|||
use std::collections::HashSet;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use heed::types::{ByteSlice, Str};
|
||||
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{error::UuidResolverError, Result, UUID_STORE_SIZE};
|
||||
use crate::helpers::EnvSizer;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct DumpEntry {
|
||||
uuid: Uuid,
|
||||
uid: String,
|
||||
}
|
||||
|
||||
const UUIDS_DB_PATH: &str = "index_uuids";
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait UuidStore: Sized {
|
||||
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
||||
// the uuid otherwise.
|
||||
async fn get_uuid(&self, uid: String) -> Result<Option<Uuid>>;
|
||||
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HeedUuidStore {
|
||||
env: Env,
|
||||
db: Database<Str, ByteSlice>,
|
||||
}
|
||||
|
||||
impl HeedUuidStore {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(UUIDS_DB_PATH);
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(UUID_STORE_SIZE); // 1GB
|
||||
let env = options.open(path)?;
|
||||
let db = env.create_database(None)?;
|
||||
Ok(Self { env, db })
|
||||
}
|
||||
|
||||
pub fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
match db.get(&txn, &name)? {
|
||||
Some(uuid) => {
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
match db.get(&txn, &uid)? {
|
||||
Some(uuid) => {
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
db.delete(&mut txn, &uid)?;
|
||||
txn.commit()?;
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
let mut entries = Vec::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (name, uuid) = entry?;
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
entries.push((name.to_owned(), uuid))
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
|
||||
if db.get(&txn, &name)?.is_some() {
|
||||
return Err(UuidResolverError::NameAlreadyExist);
|
||||
}
|
||||
|
||||
db.put(&mut txn, &name, uuid.as_bytes())?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
// Write transaction to acquire a lock on the database.
|
||||
let txn = env.write_txn()?;
|
||||
let mut entries = HashSet::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (_, uuid) = entry?;
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
entries.insert(uuid);
|
||||
}
|
||||
|
||||
// only perform snapshot if there are indexes
|
||||
if !entries.is_empty() {
|
||||
path.push(UUIDS_DB_PATH);
|
||||
create_dir_all(&path).unwrap();
|
||||
path.push("data.mdb");
|
||||
env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub fn get_size(&self) -> Result<u64> {
|
||||
Ok(self.env.size())
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let dump_path = path.join(UUIDS_DB_PATH);
|
||||
create_dir_all(&dump_path)?;
|
||||
let dump_file_path = dump_path.join("data.jsonl");
|
||||
let mut dump_file = File::create(&dump_file_path)?;
|
||||
let mut uuids = HashSet::new();
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (uid, uuid) = entry?;
|
||||
let uid = uid.to_string();
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
|
||||
let entry = DumpEntry { uuid, uid };
|
||||
serde_json::to_writer(&mut dump_file, &entry)?;
|
||||
dump_file.write_all(b"\n").unwrap();
|
||||
|
||||
uuids.insert(uuid);
|
||||
}
|
||||
|
||||
Ok(uuids)
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH);
|
||||
std::fs::create_dir_all(&uuid_resolver_path)?;
|
||||
|
||||
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
|
||||
let indexes = File::open(&src_indexes)?;
|
||||
let mut indexes = BufReader::new(indexes);
|
||||
let mut line = String::new();
|
||||
|
||||
let db = Self::new(dst)?;
|
||||
let mut txn = db.env.write_txn()?;
|
||||
|
||||
loop {
|
||||
match indexes.read_line(&mut line) {
|
||||
Ok(0) => break,
|
||||
Ok(_) => {
|
||||
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
|
||||
println!("importing {} {}", uid, uuid);
|
||||
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
|
||||
line.clear();
|
||||
}
|
||||
txn.commit()?;
|
||||
|
||||
db.env.prepare_for_closing().wait();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UuidStore for HeedUuidStore {
|
||||
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.get_uuid(name)).await?
|
||||
}
|
||||
|
||||
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.list()).await?
|
||||
}
|
||||
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
|
||||
}
|
||||
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
|
||||
}
|
||||
|
||||
async fn get_size(&self) -> Result<u64> {
|
||||
self.get_size()
|
||||
}
|
||||
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.dump(path)).await?
|
||||
}
|
||||
}
|
|
@ -38,7 +38,6 @@
|
|||
//! Most of the routes use [extractors] to handle the authentication.
|
||||
|
||||
#![allow(rustdoc::private_intra_doc_links)]
|
||||
pub mod data;
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
#[macro_use]
|
||||
|
@ -46,11 +45,8 @@ pub mod extractors;
|
|||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub mod analytics;
|
||||
pub mod helpers;
|
||||
mod index;
|
||||
mod index_controller;
|
||||
pub mod option;
|
||||
pub mod routes;
|
||||
pub use self::data::Data;
|
||||
use crate::extractors::authentication::AuthConfig;
|
||||
pub use option::Opt;
|
||||
|
||||
|
@ -58,6 +54,7 @@ use actix_web::web;
|
|||
|
||||
use extractors::authentication::policies::*;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use sha2::Digest;
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -86,14 +83,14 @@ impl ApiKeys {
|
|||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
data: Data,
|
||||
data: MeiliSearch,
|
||||
opt: &Opt,
|
||||
) {
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
||||
config
|
||||
.app_data(web::Data::new(data.clone()))
|
||||
// TODO!: Why are we passing the data with two different things?
|
||||
.app_data(data)
|
||||
// TODO!: Why are we passing the data with two different things?
|
||||
//.app_data(data)
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use std::env;
|
||||
|
||||
use actix_web::HttpServer;
|
||||
use meilisearch_http::{create_app, Data, Opt};
|
||||
use meilisearch_http::{create_app, Opt};
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use structopt::StructOpt;
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
|
@ -39,6 +40,26 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
||||
let mut meilisearch = MeiliSearch::builder();
|
||||
meilisearch
|
||||
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
|
||||
.set_max_update_store_size(opt.max_udb_size.get_bytes() as usize)
|
||||
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
|
||||
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
||||
.set_dump_dst(opt.dumps_dir.clone())
|
||||
.set_snapshot_dir(opt.snapshot_dir.clone());
|
||||
|
||||
if let Some(ref path) = opt.import_snapshot {
|
||||
meilisearch.set_import_snapshot(path.clone());
|
||||
}
|
||||
if let Some(ref path) = opt.import_dump {
|
||||
meilisearch.set_dump_src(path.clone());
|
||||
}
|
||||
|
||||
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let opt = Opt::from_args();
|
||||
|
@ -55,23 +76,23 @@ async fn main() -> anyhow::Result<()> {
|
|||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let data = Data::new(opt.clone())?;
|
||||
let meilisearch = setup_meilisearch(&opt)?;
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
if !opt.no_analytics {
|
||||
let analytics_data = data.clone();
|
||||
let analytics_data = meilisearch.clone();
|
||||
let analytics_opt = opt.clone();
|
||||
tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt));
|
||||
}
|
||||
|
||||
print_launch_resume(&opt);
|
||||
|
||||
run_http(data, opt).await?;
|
||||
run_http(meilisearch, opt).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> {
|
||||
async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> {
|
||||
let _enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone))
|
||||
|
|
|
@ -8,7 +8,6 @@ use std::sync::Arc;
|
|||
use std::fs;
|
||||
|
||||
use byte_unit::Byte;
|
||||
use milli::CompressionType;
|
||||
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use rustls::{
|
||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
|
||||
|
@ -16,56 +15,7 @@ use rustls::{
|
|||
};
|
||||
use structopt::StructOpt;
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
|
||||
#[derive(Debug, Clone, StructOpt)]
|
||||
pub struct IndexerOpts {
|
||||
/// The amount of documents to skip before printing
|
||||
/// a log regarding the indexing advancement.
|
||||
#[structopt(long, default_value = "100000")] // 100k
|
||||
pub log_every_n: usize,
|
||||
|
||||
/// Grenad max number of chunks in bytes.
|
||||
#[structopt(long)]
|
||||
pub max_nb_chunks: Option<usize>,
|
||||
|
||||
/// The maximum amount of memory the indexer will use. It defaults to 2/3
|
||||
/// of the available memory. It is recommended to use something like 80%-90%
|
||||
/// of the available memory, no more.
|
||||
///
|
||||
/// In case the engine is unable to retrieve the available memory the engine will
|
||||
/// try to use the memory it needs but without real limit, this can lead to
|
||||
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
|
||||
#[structopt(long, default_value)]
|
||||
pub max_memory: MaxMemory,
|
||||
|
||||
/// The name of the compression algorithm to use when compressing intermediate
|
||||
/// Grenad chunks while indexing documents.
|
||||
///
|
||||
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
|
||||
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
|
||||
pub chunk_compression_type: CompressionType,
|
||||
|
||||
/// The level of compression of the chosen algorithm.
|
||||
#[structopt(long, requires = "chunk-compression-type")]
|
||||
pub chunk_compression_level: Option<u32>,
|
||||
|
||||
/// Number of parallel jobs for indexing, defaults to # of CPUs.
|
||||
#[structopt(long)]
|
||||
pub indexing_jobs: Option<usize>,
|
||||
}
|
||||
|
||||
impl Default for IndexerOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
log_every_n: 100_000,
|
||||
max_nb_chunks: None,
|
||||
max_memory: MaxMemory::default(),
|
||||
chunk_compression_type: CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
indexing_jobs: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
use meilisearch_lib::options::IndexerOpts;
|
||||
|
||||
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
||||
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
use actix_web::{web, HttpResponse};
|
||||
use log::debug;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::Data;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(create_dump)))
|
||||
.service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status)));
|
||||
}
|
||||
|
||||
pub async fn create_dump(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
|
||||
pub async fn create_dump(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
|
||||
let res = data.create_dump().await?;
|
||||
|
||||
debug!("returns: {:?}", res);
|
||||
|
@ -30,10 +30,10 @@ struct DumpParam {
|
|||
}
|
||||
|
||||
async fn get_dump_status(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<DumpParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let res = data.dump_status(path.dump_uid.clone()).await?;
|
||||
let res = data.dump_info(path.dump_uid.clone()).await?;
|
||||
|
||||
debug!("returns: {:?}", res);
|
||||
Ok(HttpResponse::Ok().json(res))
|
||||
|
|
|
@ -3,6 +3,8 @@ use actix_web::{web, HttpResponse};
|
|||
use actix_web::web::Bytes;
|
||||
use futures::{Stream, StreamExt};
|
||||
use log::debug;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update};
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::Deserialize;
|
||||
//use serde_json::Value;
|
||||
|
@ -11,9 +13,7 @@ use tokio::sync::mpsc;
|
|||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::index_controller::{DocumentAdditionFormat, Update};
|
||||
use crate::routes::IndexParam;
|
||||
use crate::Data;
|
||||
|
||||
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
|
||||
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
|
||||
|
@ -88,20 +88,20 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||
}
|
||||
|
||||
pub async fn get_document(
|
||||
data: GuardedData<Public, Data>,
|
||||
data: GuardedData<Public, MeiliSearch>,
|
||||
path: web::Path<DocumentParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = path.index_uid.clone();
|
||||
let id = path.document_id.clone();
|
||||
let document = data
|
||||
.retrieve_document(index, id, None as Option<Vec<String>>)
|
||||
.document(index, id, None as Option<Vec<String>>)
|
||||
.await?;
|
||||
debug!("returns: {:?}", document);
|
||||
Ok(HttpResponse::Ok().json(document))
|
||||
}
|
||||
|
||||
//pub async fn delete_document(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//path: web::Path<DocumentParam>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
//let update_status = data
|
||||
|
@ -120,7 +120,7 @@ pub struct BrowseQuery {
|
|||
}
|
||||
|
||||
pub async fn get_all_documents(
|
||||
data: GuardedData<Public, Data>,
|
||||
data: GuardedData<Public, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<BrowseQuery>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
@ -137,7 +137,7 @@ pub async fn get_all_documents(
|
|||
});
|
||||
|
||||
let documents = data
|
||||
.retrieve_documents(
|
||||
.documents(
|
||||
path.index_uid.clone(),
|
||||
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
|
||||
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
|
||||
|
@ -157,7 +157,7 @@ pub struct UpdateDocumentsQuery {
|
|||
/// Route used when the payload type is "application/json"
|
||||
/// Used to add or replace documents
|
||||
pub async fn add_documents(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: Payload,
|
||||
|
@ -180,7 +180,7 @@ pub async fn add_documents(
|
|||
/// Route used when the payload type is "application/json"
|
||||
/// Used to add or replace documents
|
||||
pub async fn update_documents(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: Payload,
|
||||
|
@ -201,7 +201,7 @@ pub async fn update_documents(
|
|||
}
|
||||
|
||||
//pub async fn delete_documents(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//path: web::Path<IndexParam>,
|
||||
//body: web::Json<Vec<Value>>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
|
@ -221,7 +221,7 @@ pub async fn update_documents(
|
|||
//}
|
||||
|
||||
//pub async fn clear_all_documents(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//path: web::Path<IndexParam>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
//let update_status = data.clear_documents(path.index_uid.clone()).await?;
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
use actix_web::{web, HttpResponse};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::debug;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use meilisearch_lib::index_controller::IndexSettings;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::routes::IndexParam;
|
||||
use crate::Data;
|
||||
|
||||
pub mod documents;
|
||||
pub mod search;
|
||||
|
@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||
);
|
||||
}
|
||||
|
||||
pub async fn list_indexes(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
|
||||
pub async fn list_indexes(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
|
||||
let indexes = data.list_indexes().await?;
|
||||
debug!("returns: {:?}", indexes);
|
||||
Ok(HttpResponse::Ok().json(indexes))
|
||||
|
@ -49,7 +50,7 @@ pub struct IndexCreateRequest {
|
|||
}
|
||||
|
||||
//pub async fn create_index(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//body: web::Json<IndexCreateRequest>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
//let body = body.into_inner();
|
||||
|
@ -75,30 +76,34 @@ pub struct UpdateIndexResponse {
|
|||
}
|
||||
|
||||
pub async fn get_index(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let meta = data.index(path.index_uid.clone()).await?;
|
||||
let meta = data.get_index(path.index_uid.clone()).await?;
|
||||
debug!("returns: {:?}", meta);
|
||||
Ok(HttpResponse::Ok().json(meta))
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
body: web::Json<UpdateIndexRequest>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
let body = body.into_inner();
|
||||
let settings = IndexSettings {
|
||||
uid: body.uid,
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
let meta = data
|
||||
.update_index(path.into_inner().index_uid, body.primary_key, body.uid)
|
||||
.update_index(path.into_inner().index_uid, settings)
|
||||
.await?;
|
||||
debug!("returns: {:?}", meta);
|
||||
Ok(HttpResponse::Ok().json(meta))
|
||||
}
|
||||
|
||||
//pub async fn delete_index(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//path: web::Path<IndexParam>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
//data.delete_index(path.index_uid.clone()).await?;
|
||||
|
@ -106,7 +111,7 @@ pub async fn update_index(
|
|||
//}
|
||||
|
||||
pub async fn get_index_stats(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let response = data.get_index_stats(path.index_uid.clone()).await?;
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
use actix_web::{web, HttpResponse};
|
||||
use log::debug;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
|
||||
use crate::routes::IndexParam;
|
||||
use crate::Data;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
|
@ -82,7 +82,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||
}
|
||||
|
||||
pub async fn search_with_url_query(
|
||||
data: GuardedData<Public, Data>,
|
||||
data: GuardedData<Public, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Query<SearchQueryGet>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
@ -99,7 +99,7 @@ pub async fn search_with_url_query(
|
|||
}
|
||||
|
||||
pub async fn search_with_post(
|
||||
data: GuardedData<Public, Data>,
|
||||
data: GuardedData<Public, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
params: web::Json<SearchQuery>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
|
|
@ -148,7 +148,7 @@
|
|||
//);
|
||||
|
||||
//pub async fn update_all(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//index_uid: web::Path<String>,
|
||||
//body: web::Json<Settings<Unchecked>>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
|
@ -162,7 +162,7 @@
|
|||
//}
|
||||
|
||||
//pub async fn get_all(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//index_uid: web::Path<String>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
//let settings = data.settings(index_uid.into_inner()).await?;
|
||||
|
@ -171,7 +171,7 @@
|
|||
//}
|
||||
|
||||
//pub async fn delete_all(
|
||||
//data: GuardedData<Private, Data>,
|
||||
//data: GuardedData<Private, MeiliSearch>,
|
||||
//index_uid: web::Path<String>,
|
||||
//) -> Result<HttpResponse, ResponseError> {
|
||||
//let settings = Settings::cleared();
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
use actix_web::{web, HttpResponse};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::debug;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::routes::{IndexParam, UpdateStatusResponse};
|
||||
use crate::Data;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(get_all_updates_status)))
|
||||
|
@ -37,12 +37,12 @@ pub struct UpdateParam {
|
|||
}
|
||||
|
||||
pub async fn get_update_status(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<UpdateParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = path.into_inner();
|
||||
let meta = data
|
||||
.get_update_status(params.index_uid, params.update_id)
|
||||
.update_status(params.index_uid, params.update_id)
|
||||
.await?;
|
||||
let meta = UpdateStatusResponse::from(meta);
|
||||
debug!("returns: {:?}", meta);
|
||||
|
@ -50,10 +50,10 @@ pub async fn get_update_status(
|
|||
}
|
||||
|
||||
pub async fn get_all_updates_status(
|
||||
data: GuardedData<Private, Data>,
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let metas = data.get_updates_status(path.into_inner().index_uid).await?;
|
||||
let metas = data.all_update_status(path.into_inner().index_uid).await?;
|
||||
let metas = metas
|
||||
.into_iter()
|
||||
.map(UpdateStatusResponse::from)
|
||||
|
|
|
@ -5,12 +5,12 @@ use chrono::{DateTime, Utc};
|
|||
use log::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate};
|
||||
use meilisearch_lib::index::{Settings, Unchecked};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::index::{Settings, Unchecked};
|
||||
use crate::index_controller::update_actor::RegisterUpdate;
|
||||
use crate::index_controller::{UpdateResult, UpdateStatus};
|
||||
use crate::{ApiKeys, Data};
|
||||
use crate::ApiKeys;
|
||||
|
||||
mod dump;
|
||||
mod indexes;
|
||||
|
@ -187,15 +187,17 @@ impl From<UpdateStatus> for UpdateStatusResponse {
|
|||
let duration = Duration::from_millis(duration as u64).as_secs_f64();
|
||||
|
||||
let update_id = failed.id();
|
||||
let response = failed.error;
|
||||
let processed_at = failed.failed_at;
|
||||
let enqueued_at = failed.from.from.enqueued_at;
|
||||
let response = failed.into();
|
||||
|
||||
let content = FailedUpdateResult {
|
||||
update_id,
|
||||
update_type,
|
||||
response,
|
||||
duration,
|
||||
enqueued_at: failed.from.from.enqueued_at,
|
||||
processed_at: failed.failed_at,
|
||||
enqueued_at,
|
||||
processed_at,
|
||||
};
|
||||
UpdateStatusResponse::Failed { content }
|
||||
}
|
||||
|
@ -230,7 +232,7 @@ pub async fn running() -> HttpResponse {
|
|||
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
|
||||
}
|
||||
|
||||
async fn get_stats(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
|
||||
async fn get_stats(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
|
||||
let response = data.get_all_stats().await?;
|
||||
|
||||
debug!("returns: {:?}", response);
|
||||
|
@ -245,7 +247,7 @@ struct VersionResponse {
|
|||
pkg_version: String,
|
||||
}
|
||||
|
||||
async fn get_version(_data: GuardedData<Private, Data>) -> HttpResponse {
|
||||
async fn get_version(_data: GuardedData<Private, MeiliSearch>) -> HttpResponse {
|
||||
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
||||
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
||||
|
||||
|
@ -288,7 +290,7 @@ mod test {
|
|||
macro_rules! impl_is_policy {
|
||||
($($param:ident)*) => {
|
||||
impl<Policy, Func, $($param,)* Res> Is<Policy, (($($param,)*), Res)> for Func
|
||||
where Func: Fn(GuardedData<Policy, Data>, $($param,)*) -> Res {}
|
||||
where Func: Fn(GuardedData<Policy, MeiliSearch>, $($param,)*) -> Res {}
|
||||
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue