mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
split meilisearch-http and meilisearch-lib
This commit is contained in:
parent
09d4e37044
commit
60518449fc
63 changed files with 608 additions and 324 deletions
62
meilisearch-lib/src/error.rs
Normal file
62
meilisearch-lib/src/error.rs
Normal file
|
@ -0,0 +1,62 @@
|
|||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use milli::UserError;
|
||||
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
$(
|
||||
impl From<$other> for $target {
|
||||
fn from(other: $other) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MilliError<'a>(pub &'a milli::Error);
|
||||
|
||||
impl Error for MilliError<'_> {}
|
||||
|
||||
impl fmt::Display for MilliError<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for MilliError<'_> {
|
||||
fn error_code(&self) -> Code {
|
||||
match self.0 {
|
||||
milli::Error::InternalError(_) => Code::Internal,
|
||||
milli::Error::IoError(_) => Code::Internal,
|
||||
milli::Error::UserError(ref error) => {
|
||||
match error {
|
||||
// TODO: wait for spec for new error codes.
|
||||
| UserError::SerdeJson(_)
|
||||
| UserError::MaxDatabaseSizeReached
|
||||
| UserError::InvalidCriterionName { .. }
|
||||
| UserError::InvalidDocumentId { .. }
|
||||
| UserError::InvalidStoreFile
|
||||
| UserError::NoSpaceLeftOnDevice
|
||||
| UserError::InvalidAscDescSyntax { .. }
|
||||
| UserError::DocumentLimitReached => Code::Internal,
|
||||
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
|
||||
UserError::InvalidFilter(_) => Code::Filter,
|
||||
UserError::InvalidFilterAttribute(_) => Code::Filter,
|
||||
UserError::InvalidSortName { .. } => Code::Sort,
|
||||
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
|
||||
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
|
||||
UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent,
|
||||
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
|
||||
UserError::SortRankingRuleMissing => Code::Sort,
|
||||
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::Sort,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
166
meilisearch-lib/src/index/dump.rs
Normal file
166
meilisearch-lib/src/index/dump.rs
Normal file
|
@ -0,0 +1,166 @@
|
|||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use heed::RoTxn;
|
||||
use indexmap::IndexMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{Index, Settings, Unchecked};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct DumpMeta {
|
||||
settings: Settings<Unchecked>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
const META_FILE_NAME: &str = "meta.json";
|
||||
const DATA_FILE_NAME: &str = "documents.jsonl";
|
||||
|
||||
impl Index {
|
||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
// acquire write txn make sure any ongoing write is finished before we start.
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
self.dump_documents(&txn, &path)?;
|
||||
self.dump_meta(&txn, &path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
|
||||
let mut document_file = File::create(&document_file_path)?;
|
||||
|
||||
let documents = self.all_documents(txn)?;
|
||||
let fields_ids_map = self.fields_ids_map(txn)?;
|
||||
|
||||
// dump documents
|
||||
let mut json_map = IndexMap::new();
|
||||
for document in documents {
|
||||
let (_, reader) = document?;
|
||||
|
||||
for (fid, bytes) in reader.iter() {
|
||||
if let Some(name) = fields_ids_map.name(fid) {
|
||||
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut document_file, &json_map)?;
|
||||
document_file.write_all(b"\n")?;
|
||||
|
||||
json_map.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let meta_file_path = path.as_ref().join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_file_path)?;
|
||||
|
||||
let settings = self.settings_txn(txn)?.into_unchecked();
|
||||
let primary_key = self.primary_key(txn)?.map(String::from);
|
||||
let meta = DumpMeta {
|
||||
settings,
|
||||
primary_key,
|
||||
};
|
||||
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_size: usize,
|
||||
_indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
//let dir_name = src
|
||||
//.as_ref()
|
||||
//.file_name()
|
||||
//.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||
|
||||
//let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||
//create_dir_all(&dst_dir_path)?;
|
||||
|
||||
//let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||
//let mut meta_file = File::open(meta_path)?;
|
||||
|
||||
//// We first deserialize the dump meta into a serde_json::Value and change
|
||||
//// the custom ranking rules settings from the old format to the new format.
|
||||
//let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||
//if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||
//convert_custom_ranking_rules(ranking_rules);
|
||||
//}
|
||||
|
||||
//// Then we serialize it back into a vec to deserialize it
|
||||
//// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
||||
//let patched_meta = serde_json::to_vec(&meta)?;
|
||||
|
||||
//let DumpMeta {
|
||||
//settings,
|
||||
//primary_key,
|
||||
//} = serde_json::from_slice(&patched_meta)?;
|
||||
//let settings = settings.check();
|
||||
//let index = Self::open(&dst_dir_path, size)?;
|
||||
//let mut txn = index.write_txn()?;
|
||||
|
||||
//let handler = UpdateHandler::new(indexing_options)?;
|
||||
|
||||
//index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
|
||||
|
||||
//let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||
//let reader = File::open(&document_file_path)?;
|
||||
//let mut reader = BufReader::new(reader);
|
||||
//reader.fill_buf()?;
|
||||
// If the document file is empty, we don't perform the document addition, to prevent
|
||||
// a primary key error to be thrown.
|
||||
|
||||
todo!("fix obk document dumps")
|
||||
//if !reader.buffer().is_empty() {
|
||||
//index.update_documents_txn(
|
||||
//&mut txn,
|
||||
//IndexDocumentsMethod::UpdateDocuments,
|
||||
//Some(reader),
|
||||
//handler.update_builder(0),
|
||||
//primary_key.as_deref(),
|
||||
//)?;
|
||||
//}
|
||||
|
||||
//txn.commit()?;
|
||||
|
||||
//match Arc::try_unwrap(index.0) {
|
||||
//Ok(inner) => inner.prepare_for_closing().wait(),
|
||||
//Err(_) => bail!("Could not close index properly."),
|
||||
//}
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||
// ///
|
||||
// /// This is done for compatibility reasons, and to avoid a new dump version,
|
||||
// /// since the new syntax was introduced soon after the new dump version.
|
||||
//fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
//*ranking_rules = match ranking_rules.take() {
|
||||
//Value::Array(values) => values
|
||||
//.into_iter()
|
||||
//.filter_map(|value| match value {
|
||||
//Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:asc", f))
|
||||
//.map(Value::String),
|
||||
//Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:desc", f))
|
||||
//.map(Value::String),
|
||||
//otherwise => Some(otherwise),
|
||||
//})
|
||||
//.collect(),
|
||||
//otherwise => otherwise,
|
||||
//}
|
||||
//}
|
52
meilisearch-lib/src/index/error.rs
Normal file
52
meilisearch-lib/src/index/error.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
use std::error::Error;
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::MilliError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexError {
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("Document with id {0} not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("{0}")]
|
||||
Facet(#[from] FacetError),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
IndexError: std::io::Error,
|
||||
heed::Error,
|
||||
fst::Error,
|
||||
serde_json::Error
|
||||
);
|
||||
|
||||
impl ErrorCode for IndexError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexError::Internal(_) => Code::Internal,
|
||||
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
IndexError::Facet(e) => e.error_code(),
|
||||
IndexError::Milli(e) => MilliError(e).error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum FacetError {
|
||||
#[error("Invalid facet expression, expected {}, found: {1}", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
}
|
||||
|
||||
impl ErrorCode for FacetError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
FacetError::InvalidExpression(_, _) => Code::Facet,
|
||||
}
|
||||
}
|
||||
}
|
202
meilisearch-lib/src/index/mod.rs
Normal file
202
meilisearch-lib/src/index/mod.rs
Normal file
|
@ -0,0 +1,202 @@
|
|||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::fs::create_dir_all;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use heed::{EnvOpenOptions, RoTxn};
|
||||
use milli::update::Setting;
|
||||
use milli::{obkv_to_json, FieldId};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use error::Result;
|
||||
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
||||
pub use updates::{Checked, Facets, Settings, Unchecked};
|
||||
|
||||
use crate::EnvSizer;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
|
||||
use self::error::IndexError;
|
||||
|
||||
pub mod error;
|
||||
pub mod update_handler;
|
||||
|
||||
mod dump;
|
||||
mod search;
|
||||
mod updates;
|
||||
|
||||
pub type Document = Map<String, Value>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub inner: Arc<milli::Index>,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
}
|
||||
|
||||
impl Deref for Index {
|
||||
type Target = milli::Index;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.inner.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn open(path: impl AsRef<Path>, size: usize, update_file_store: Arc<UpdateFileStore>) -> Result<Self> {
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let inner = Arc::new(milli::Index::new(options, &path)?);
|
||||
Ok(Index { inner, update_file_store })
|
||||
}
|
||||
|
||||
pub fn settings(&self) -> Result<Settings<Checked>> {
|
||||
let txn = self.read_txn()?;
|
||||
self.settings_txn(&txn)
|
||||
}
|
||||
|
||||
pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> {
|
||||
let displayed_attributes = self
|
||||
.displayed_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let searchable_attributes = self
|
||||
.searchable_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let criteria = self
|
||||
.criteria(txn)?
|
||||
.into_iter()
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
|
||||
let stop_words = self
|
||||
.stop_words(txn)?
|
||||
.map(|stop_words| -> Result<BTreeSet<_>> {
|
||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_else(BTreeSet::new);
|
||||
let distinct_field = self.distinct_field(txn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = self
|
||||
.synonyms(txn)?
|
||||
.iter()
|
||||
.map(|(key, values)| {
|
||||
(
|
||||
key.join(" "),
|
||||
values.iter().map(|value| value.join(" ")).collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
searchable_attributes: match searchable_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn retrieve_documents<S: AsRef<str>>(
|
||||
&self,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Vec<Map<String, Value>>> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
let fields_to_display =
|
||||
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
||||
|
||||
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for entry in iter {
|
||||
let (_id, obkv) = entry?;
|
||||
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
|
||||
documents.push(object);
|
||||
}
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub fn retrieve_document<S: AsRef<str>>(
|
||||
&self,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Map<String, Value>> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
|
||||
let fields_to_display =
|
||||
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
||||
|
||||
let internal_id = self
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
|
||||
|
||||
let document = self
|
||||
.documents(&txn, std::iter::once(internal_id))?
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|(_, d)| d)
|
||||
.ok_or(IndexError::DocumentNotFound(doc_id))?;
|
||||
|
||||
let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)?;
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.env.size()
|
||||
}
|
||||
|
||||
fn fields_to_display<S: AsRef<str>>(
|
||||
&self,
|
||||
txn: &heed::RoTxn,
|
||||
attributes_to_retrieve: &Option<Vec<S>>,
|
||||
fields_ids_map: &milli::FieldsIdsMap,
|
||||
) -> Result<Vec<FieldId>> {
|
||||
let mut displayed_fields_ids = match self.displayed_fields_ids(txn)? {
|
||||
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
let attributes_to_retrieve_ids = match attributes_to_retrieve {
|
||||
Some(attrs) => attrs
|
||||
.iter()
|
||||
.filter_map(|f| fields_ids_map.id(f.as_ref()))
|
||||
.collect::<HashSet<_>>(),
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
|
||||
Ok(displayed_fields_ids)
|
||||
}
|
||||
}
|
1335
meilisearch-lib/src/index/search.rs
Normal file
1335
meilisearch-lib/src/index/search.rs
Normal file
File diff suppressed because it is too large
Load diff
72
meilisearch-lib/src/index/update_handler.rs
Normal file
72
meilisearch-lib/src/index/update_handler.rs
Normal file
|
@ -0,0 +1,72 @@
|
|||
use crate::index::Index;
|
||||
use milli::update::UpdateBuilder;
|
||||
use milli::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::index_controller::update_actor::RegisterUpdate;
|
||||
use crate::index_controller::{Failed, Processed, Processing};
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
pub struct UpdateHandler {
|
||||
max_nb_chunks: Option<usize>,
|
||||
chunk_compression_level: Option<u32>,
|
||||
thread_pool: ThreadPool,
|
||||
log_frequency: usize,
|
||||
max_memory: Option<usize>,
|
||||
chunk_compression_type: CompressionType,
|
||||
}
|
||||
|
||||
impl UpdateHandler {
|
||||
pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> {
|
||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2))
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
max_nb_chunks: opt.max_nb_chunks,
|
||||
chunk_compression_level: opt.chunk_compression_level,
|
||||
thread_pool,
|
||||
log_frequency: opt.log_every_n,
|
||||
max_memory: opt.max_memory.map(|m| m.get_bytes() as usize),
|
||||
chunk_compression_type: opt.chunk_compression_type,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn update_builder(&self, update_id: u64) -> UpdateBuilder {
|
||||
// We prepare the update by using the update builder.
|
||||
let mut update_builder = UpdateBuilder::new(update_id);
|
||||
if let Some(max_nb_chunks) = self.max_nb_chunks {
|
||||
update_builder.max_nb_chunks(max_nb_chunks);
|
||||
}
|
||||
if let Some(chunk_compression_level) = self.chunk_compression_level {
|
||||
update_builder.chunk_compression_level(chunk_compression_level);
|
||||
}
|
||||
update_builder.thread_pool(&self.thread_pool);
|
||||
update_builder.log_every_n(self.log_frequency);
|
||||
if let Some(max_memory) = self.max_memory {
|
||||
update_builder.max_memory(max_memory);
|
||||
}
|
||||
update_builder.chunk_compression_type(self.chunk_compression_type);
|
||||
update_builder
|
||||
}
|
||||
|
||||
pub fn handle_update(
|
||||
&self,
|
||||
index: Index,
|
||||
meta: Processing,
|
||||
) -> Result<Processed, Failed> {
|
||||
let update_id = meta.id();
|
||||
let update_builder = self.update_builder(update_id);
|
||||
|
||||
let result = match meta.meta() {
|
||||
RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => {
|
||||
index.update_documents(*method, *content_uuid, update_builder, primary_key.as_deref())
|
||||
}
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(result) => Ok(meta.process(result)),
|
||||
Err(e) => Err(meta.fail(e)),
|
||||
}
|
||||
}
|
||||
}
|
366
meilisearch-lib/src/index/updates.rs
Normal file
366
meilisearch-lib/src/index/updates.rs
Normal file
|
@ -0,0 +1,366 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::marker::PhantomData;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use log::{debug, info, trace};
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::UpdateResult;
|
||||
|
||||
use super::Index;
|
||||
use super::error::Result;
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let wildcard = vec!["*".to_string()];
|
||||
match field {
|
||||
Setting::Set(value) => Some(value),
|
||||
Setting::Reset => Some(&wildcard),
|
||||
Setting::NotSet => None,
|
||||
}
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize)]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
|
||||
pub struct Unchecked;
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Settings<Checked> {
|
||||
pub fn cleared() -> Settings<Checked> {
|
||||
Settings {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
||||
let Self {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
..
|
||||
} = self;
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Settings<Unchecked> {
|
||||
pub fn check(self) -> Settings<Checked> {
|
||||
let displayed_attributes = match self.displayed_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
let searchable_attributes = match self.searchable_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Facets {
|
||||
pub level_group_size: Option<NonZeroUsize>,
|
||||
pub min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn update_documents(
|
||||
&self,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
update_builder: UpdateBuilder,
|
||||
primary_key: Option<&str>,
|
||||
) -> Result<UpdateResult> {
|
||||
let mut txn = self.write_txn()?;
|
||||
let result = self.update_documents_txn(&mut txn, method, content_uuid, update_builder, primary_key)?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn update_documents_txn<'a, 'b>(
|
||||
&'a self,
|
||||
txn: &mut heed::RwTxn<'a, 'b>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
update_builder: UpdateBuilder,
|
||||
primary_key: Option<&str>,
|
||||
) -> Result<UpdateResult> {
|
||||
trace!("performing document addition");
|
||||
|
||||
// Set the primary key if not set already, ignore if already set.
|
||||
if let (None, Some(primary_key)) = (self.primary_key(txn)?, primary_key) {
|
||||
let mut builder = UpdateBuilder::new(0).settings(txn, self);
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
builder.execute(|_, _| ())?;
|
||||
}
|
||||
|
||||
let indexing_callback =
|
||||
|indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step);
|
||||
|
||||
let content_file = self.update_file_store.get_update(content_uuid).unwrap();
|
||||
let reader = DocumentBatchReader::from_reader(content_file).unwrap();
|
||||
|
||||
let mut builder = update_builder.index_documents(txn, self);
|
||||
builder.index_documents_method(method);
|
||||
let addition = builder.execute(reader, indexing_callback)?;
|
||||
|
||||
info!("document addition done: {:?}", addition);
|
||||
|
||||
Ok(UpdateResult::DocumentsAddition(addition))
|
||||
}
|
||||
|
||||
//pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> {
|
||||
//// We must use the write transaction of the update here.
|
||||
//let mut wtxn = self.write_txn()?;
|
||||
//let builder = update_builder.clear_documents(&mut wtxn, self);
|
||||
|
||||
//let _count = builder.execute()?;
|
||||
|
||||
//wtxn.commit()
|
||||
//.and(Ok(UpdateResult::Other))
|
||||
//.map_err(Into::into)
|
||||
//}
|
||||
|
||||
//pub fn update_settings_txn<'a, 'b>(
|
||||
//&'a self,
|
||||
//txn: &mut heed::RwTxn<'a, 'b>,
|
||||
//settings: &Settings<Checked>,
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//// We must use the write transaction of the update here.
|
||||
//let mut builder = update_builder.settings(txn, self);
|
||||
|
||||
//match settings.searchable_attributes {
|
||||
//Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||
//Setting::Reset => builder.reset_searchable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.displayed_attributes {
|
||||
//Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||
//Setting::Reset => builder.reset_displayed_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.filterable_attributes {
|
||||
//Setting::Set(ref facets) => {
|
||||
//builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_filterable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.sortable_attributes {
|
||||
//Setting::Set(ref fields) => {
|
||||
//builder.set_sortable_fields(fields.iter().cloned().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_sortable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.ranking_rules {
|
||||
//Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
||||
//Setting::Reset => builder.reset_criteria(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.stop_words {
|
||||
//Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||
//Setting::Reset => builder.reset_stop_words(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.synonyms {
|
||||
//Setting::Set(ref synonyms) => {
|
||||
//builder.set_synonyms(synonyms.clone().into_iter().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_synonyms(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.distinct_attribute {
|
||||
//Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||
//Setting::Reset => builder.reset_distinct_field(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//builder.execute(|indexing_step, update_id| {
|
||||
//debug!("update {}: {:?}", update_id, indexing_step)
|
||||
//})?;
|
||||
|
||||
//Ok(UpdateResult::Other)
|
||||
//}
|
||||
|
||||
//pub fn update_settings(
|
||||
//&self,
|
||||
//settings: &Settings<Checked>,
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//let mut txn = self.write_txn()?;
|
||||
//let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
|
||||
//txn.commit()?;
|
||||
//Ok(result)
|
||||
//}
|
||||
|
||||
//pub fn delete_documents(
|
||||
//&self,
|
||||
//document_ids: &[String],
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//let mut txn = self.write_txn()?;
|
||||
//let mut builder = update_builder.delete_documents(&mut txn, self)?;
|
||||
|
||||
//// We ignore unexisting document ids
|
||||
//document_ids.iter().for_each(|id| {
|
||||
//builder.delete_external_id(id);
|
||||
//});
|
||||
|
||||
//let deleted = builder.execute()?;
|
||||
//txn.commit()
|
||||
//.and(Ok(UpdateResult::DocumentDeletion { deleted }))
|
||||
//.map_err(Into::into)
|
||||
//}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_setting_check() {
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.clone().check();
|
||||
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
||||
assert_eq!(
|
||||
settings.searchable_attributes,
|
||||
checked.searchable_attributes
|
||||
);
|
||||
|
||||
// test wildcard
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.check();
|
||||
assert_eq!(checked.displayed_attributes, Setting::Reset);
|
||||
assert_eq!(checked.searchable_attributes, Setting::Reset);
|
||||
}
|
||||
}
|
157
meilisearch-lib/src/index_controller/dump_actor/actor.rs
Normal file
157
meilisearch-lib/src/index_controller/dump_actor/actor.rs
Normal file
|
@ -0,0 +1,157 @@
|
|||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
use chrono::Utc;
|
||||
use futures::{lock::Mutex, stream::StreamExt};
|
||||
use log::{error, trace};
|
||||
use tokio::sync::{mpsc, oneshot, RwLock};
|
||||
use update_actor::UpdateActorHandle;
|
||||
use uuid_resolver::UuidResolverHandle;
|
||||
|
||||
use super::error::{DumpActorError, Result};
|
||||
use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask};
|
||||
use crate::index_controller::{update_actor, uuid_resolver};
|
||||
|
||||
pub const CONCURRENT_DUMP_MSG: usize = 10;
|
||||
|
||||
pub struct DumpActor<UuidResolver, Update> {
|
||||
inbox: Option<mpsc::Receiver<DumpMsg>>,
|
||||
uuid_resolver: UuidResolver,
|
||||
update: Update,
|
||||
dump_path: PathBuf,
|
||||
lock: Arc<Mutex<()>>,
|
||||
dump_infos: Arc<RwLock<HashMap<String, DumpInfo>>>,
|
||||
update_db_size: usize,
|
||||
index_db_size: usize,
|
||||
}
|
||||
|
||||
/// Generate uid from creation date
|
||||
fn generate_uid() -> String {
|
||||
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
||||
}
|
||||
|
||||
impl<UuidResolver, Update> DumpActor<UuidResolver, Update>
|
||||
where
|
||||
UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||
Update: UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
inbox: mpsc::Receiver<DumpMsg>,
|
||||
uuid_resolver: UuidResolver,
|
||||
update: Update,
|
||||
dump_path: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
) -> Self {
|
||||
let dump_infos = Arc::new(RwLock::new(HashMap::new()));
|
||||
let lock = Arc::new(Mutex::new(()));
|
||||
Self {
|
||||
inbox: Some(inbox),
|
||||
uuid_resolver,
|
||||
update,
|
||||
dump_path: dump_path.as_ref().into(),
|
||||
dump_infos,
|
||||
lock,
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
trace!("Started dump actor.");
|
||||
|
||||
let mut inbox = self
|
||||
.inbox
|
||||
.take()
|
||||
.expect("Dump Actor must have a inbox at this point.");
|
||||
|
||||
let stream = stream! {
|
||||
loop {
|
||||
match inbox.recv().await {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stream
|
||||
.for_each_concurrent(Some(CONCURRENT_DUMP_MSG), |msg| self.handle_message(msg))
|
||||
.await;
|
||||
|
||||
error!("Dump actor stopped.");
|
||||
}
|
||||
|
||||
async fn handle_message(&self, msg: DumpMsg) {
|
||||
use DumpMsg::*;
|
||||
|
||||
match msg {
|
||||
CreateDump { ret } => {
|
||||
let _ = self.handle_create_dump(ret).await;
|
||||
}
|
||||
DumpInfo { ret, uid } => {
|
||||
let _ = ret.send(self.handle_dump_info(uid).await);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_create_dump(&self, ret: oneshot::Sender<Result<DumpInfo>>) {
|
||||
let uid = generate_uid();
|
||||
let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress);
|
||||
|
||||
let _lock = match self.lock.try_lock() {
|
||||
Some(lock) => lock,
|
||||
None => {
|
||||
ret.send(Err(DumpActorError::DumpAlreadyRunning))
|
||||
.expect("Dump actor is dead");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
self.dump_infos
|
||||
.write()
|
||||
.await
|
||||
.insert(uid.clone(), info.clone());
|
||||
|
||||
ret.send(Ok(info)).expect("Dump actor is dead");
|
||||
|
||||
let task = DumpTask {
|
||||
path: self.dump_path.clone(),
|
||||
uuid_resolver: self.uuid_resolver.clone(),
|
||||
update_handle: self.update.clone(),
|
||||
uid: uid.clone(),
|
||||
update_db_size: self.update_db_size,
|
||||
index_db_size: self.index_db_size,
|
||||
};
|
||||
|
||||
let task_result = tokio::task::spawn(task.run()).await;
|
||||
|
||||
let mut dump_infos = self.dump_infos.write().await;
|
||||
let dump_infos = dump_infos
|
||||
.get_mut(&uid)
|
||||
.expect("dump entry deleted while lock was acquired");
|
||||
|
||||
match task_result {
|
||||
Ok(Ok(())) => {
|
||||
dump_infos.done();
|
||||
trace!("Dump succeed");
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
dump_infos.with_error(e.to_string());
|
||||
error!("Dump failed: {}", e);
|
||||
}
|
||||
Err(_) => {
|
||||
dump_infos.with_error("Unexpected error while performing dump.".to_string());
|
||||
error!("Dump panicked. Dump status set to failed");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
async fn handle_dump_info(&self, uid: String) -> Result<DumpInfo> {
|
||||
match self.dump_infos.read().await.get(&uid) {
|
||||
Some(info) => Ok(info.clone()),
|
||||
_ => Err(DumpActorError::DumpDoesNotExist(uid)),
|
||||
}
|
||||
}
|
||||
}
|
52
meilisearch-lib/src/index_controller/dump_actor/error.rs
Normal file
52
meilisearch-lib/src/index_controller/dump_actor/error.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
use crate::index_controller::update_actor::error::UpdateActorError;
|
||||
use crate::index_controller::uuid_resolver::error::UuidResolverError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, DumpActorError>;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum DumpActorError {
|
||||
#[error("Another dump is already in progress")]
|
||||
DumpAlreadyRunning,
|
||||
#[error("Dump `{0}` not found")]
|
||||
DumpDoesNotExist(String),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
UuidResolver(#[from] UuidResolverError),
|
||||
#[error("{0}")]
|
||||
UpdateActor(#[from] UpdateActorError),
|
||||
}
|
||||
|
||||
macro_rules! internal_error {
|
||||
($($other:path), *) => {
|
||||
$(
|
||||
impl From<$other> for DumpActorError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
heed::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
serde_json::error::Error,
|
||||
tempfile::PersistError
|
||||
);
|
||||
|
||||
impl ErrorCode for DumpActorError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress,
|
||||
DumpActorError::DumpDoesNotExist(_) => Code::NotFound,
|
||||
DumpActorError::Internal(_) => Code::Internal,
|
||||
DumpActorError::UuidResolver(e) => e.error_code(),
|
||||
DumpActorError::UpdateActor(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
use std::path::Path;
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DumpActorHandleImpl {
|
||||
sender: mpsc::Sender<DumpMsg>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl DumpActorHandle for DumpActorHandleImpl {
|
||||
async fn create_dump(&self) -> Result<DumpInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = DumpMsg::CreateDump { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
|
||||
async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = DumpMsg::DumpInfo { ret, uid };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpActorHandleImpl {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl,
|
||||
update: crate::index_controller::update_actor::UpdateActorHandleImpl,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (sender, receiver) = mpsc::channel(10);
|
||||
let actor = DumpActor::new(
|
||||
receiver,
|
||||
uuid_resolver,
|
||||
update,
|
||||
path,
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
);
|
||||
|
||||
tokio::task::spawn(actor.run());
|
||||
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
pub mod v1;
|
||||
pub mod v2;
|
224
meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs
Normal file
224
meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs
Normal file
|
@ -0,0 +1,224 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::marker::PhantomData;
|
||||
use std::path::Path;
|
||||
|
||||
use log::{error, info, warn};
|
||||
use milli::update::Setting;
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata};
|
||||
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
|
||||
use crate::{
|
||||
index::Unchecked,
|
||||
options::IndexerOpts,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MetadataV1 {
|
||||
db_version: String,
|
||||
indexes: Vec<IndexMetadata>,
|
||||
}
|
||||
|
||||
impl MetadataV1 {
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
size: usize,
|
||||
indexer_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"Loading dump, dump database version: {}, dump version: V1",
|
||||
self.db_version
|
||||
);
|
||||
|
||||
let uuid_store = HeedUuidStore::new(&dst)?;
|
||||
for index in self.indexes {
|
||||
let uuid = Uuid::new_v4();
|
||||
uuid_store.insert(index.uid.clone(), uuid)?;
|
||||
let src = src.as_ref().join(index.uid);
|
||||
load_index(
|
||||
&src,
|
||||
&dst,
|
||||
uuid,
|
||||
index.meta.primary_key.as_deref(),
|
||||
size,
|
||||
indexer_options,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result<Option<T>, D::Error>
|
||||
where
|
||||
T: Deserialize<'de>,
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(Some)
|
||||
}
|
||||
|
||||
// These are the settings used in legacy meilisearch (<v0.21.0).
|
||||
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct Settings {
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub ranking_rules: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub distinct_attribute: Option<Option<String>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attributes_for_faceting: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
fn load_index(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_uuid: Uuid,
|
||||
_primary_key: Option<&str>,
|
||||
_size: usize,
|
||||
_indexer_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
todo!("fix dump obkv documents")
|
||||
//let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid));
|
||||
|
||||
//create_dir_all(&index_path)?;
|
||||
//let mut options = EnvOpenOptions::new();
|
||||
//options.map_size(size);
|
||||
//let index = milli::Index::new(options, index_path)?;
|
||||
//let index = Index(Arc::new(index));
|
||||
|
||||
//// extract `settings.json` file and import content
|
||||
//let settings = import_settings(&src)?;
|
||||
//let settings: index_controller::Settings<Unchecked> = settings.into();
|
||||
|
||||
//let mut txn = index.write_txn()?;
|
||||
|
||||
//let handler = UpdateHandler::new(indexer_options)?;
|
||||
|
||||
//index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?;
|
||||
|
||||
//let file = File::open(&src.as_ref().join("documents.jsonl"))?;
|
||||
//let mut reader = std::io::BufReader::new(file);
|
||||
//reader.fill_buf()?;
|
||||
//if !reader.buffer().is_empty() {
|
||||
//index.update_documents_txn(
|
||||
//&mut txn,
|
||||
//IndexDocumentsMethod::ReplaceDocuments,
|
||||
//Some(reader),
|
||||
//handler.update_builder(0),
|
||||
//primary_key,
|
||||
//)?;
|
||||
//}
|
||||
|
||||
//txn.commit()?;
|
||||
|
||||
//// Finaly, we extract the original milli::Index and close it
|
||||
//Arc::try_unwrap(index.0)
|
||||
//.map_err(|_e| "Couldn't close the index properly")
|
||||
//.unwrap()
|
||||
//.prepare_for_closing()
|
||||
//.wait();
|
||||
|
||||
//// Updates are ignored in dumps V1.
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
|
||||
/// we need to **always** be able to convert the old settings to the settings currently being used
|
||||
impl From<Settings> for index_controller::Settings<Unchecked> {
|
||||
fn from(settings: Settings) -> Self {
|
||||
Self {
|
||||
distinct_attribute: match settings.distinct_attribute {
|
||||
Some(Some(attr)) => Setting::Set(attr),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
displayed_attributes: match settings.displayed_attributes {
|
||||
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
searchable_attributes: match settings.searchable_attributes {
|
||||
Some(Some(attrs)) => Setting::Set(attrs),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
filterable_attributes: match settings.attributes_for_faceting {
|
||||
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: match settings.ranking_rules {
|
||||
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| {
|
||||
match criterion.as_str() {
|
||||
"words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion),
|
||||
s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
|
||||
s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
|
||||
"wordsPosition" => {
|
||||
warn!("The criteria `attribute` and `wordsPosition` have been merged \
|
||||
into a single criterion `attribute` so `wordsPositon` will be \
|
||||
ignored");
|
||||
None
|
||||
}
|
||||
s => {
|
||||
error!("Unknown criterion found in the dump: `{}`, it will be ignored", s);
|
||||
None
|
||||
}
|
||||
}
|
||||
}).collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
stop_words: match settings.stop_words {
|
||||
Some(Some(stop_words)) => Setting::Set(stop_words.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
// we need to convert the old `Vec<String>` into a `BTreeMap<String>`
|
||||
synonyms: match settings.synonyms {
|
||||
Some(Some(synonyms)) => Setting::Set(synonyms.into_iter().collect()),
|
||||
Some(None) => Setting::Reset,
|
||||
None => Setting::NotSet
|
||||
},
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// /// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||
//fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> {
|
||||
//let path = dir_path.as_ref().join("settings.json");
|
||||
//let file = File::open(path)?;
|
||||
//let reader = std::io::BufReader::new(file);
|
||||
//let metadata = serde_json::from_reader(reader)?;
|
||||
|
||||
//Ok(metadata)
|
||||
//}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn settings_format_regression() {
|
||||
let settings = Settings::default();
|
||||
assert_eq!(
|
||||
r##"{"rankingRules":null,"distinctAttribute":null,"searchableAttributes":null,"displayedAttributes":null,"stopWords":null,"synonyms":null,"attributesForFaceting":null}"##,
|
||||
serde_json::to_string(&settings).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
use std::path::Path;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore};
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MetadataV2 {
|
||||
db_version: String,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
dump_date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl MetadataV2 {
|
||||
pub fn new(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
Self {
|
||||
db_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
dump_date: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"Loading dump from {}, dump database version: {}, dump version: V2",
|
||||
self.dump_date, self.db_version
|
||||
);
|
||||
|
||||
info!("Loading index database.");
|
||||
HeedUuidStore::load_dump(src.as_ref(), &dst)?;
|
||||
|
||||
info!("Loading updates.");
|
||||
UpdateStore::load_dump(&src, &dst, update_db_size)?;
|
||||
|
||||
info!("Loading indexes.");
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
let indexes = indexes_path.read_dir()?;
|
||||
for index in indexes {
|
||||
let index = index?;
|
||||
Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
14
meilisearch-lib/src/index_controller/dump_actor/message.rs
Normal file
14
meilisearch-lib/src/index_controller/dump_actor/message.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use tokio::sync::oneshot;
|
||||
|
||||
use super::error::Result;
|
||||
use super::DumpInfo;
|
||||
|
||||
pub enum DumpMsg {
|
||||
CreateDump {
|
||||
ret: oneshot::Sender<Result<DumpInfo>>,
|
||||
},
|
||||
DumpInfo {
|
||||
uid: String,
|
||||
ret: oneshot::Sender<Result<DumpInfo>>,
|
||||
},
|
||||
}
|
204
meilisearch-lib/src/index_controller/dump_actor/mod.rs
Normal file
204
meilisearch-lib/src/index_controller/dump_actor/mod.rs
Normal file
|
@ -0,0 +1,204 @@
|
|||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::Context;
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::{info, trace, warn};
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::fs::create_dir_all;
|
||||
|
||||
use loaders::v1::MetadataV1;
|
||||
use loaders::v2::MetadataV2;
|
||||
|
||||
pub use actor::DumpActor;
|
||||
pub use handle_impl::*;
|
||||
pub use message::DumpMsg;
|
||||
|
||||
use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle};
|
||||
use crate::index_controller::dump_actor::error::DumpActorError;
|
||||
use crate::options::IndexerOpts;
|
||||
use error::Result;
|
||||
|
||||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod loaders;
|
||||
mod message;
|
||||
|
||||
const META_FILE_NAME: &str = "metadata.json";
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, automock)]
|
||||
pub trait DumpActorHandle {
|
||||
/// Start the creation of a dump
|
||||
/// Implementation: [handle_impl::DumpActorHandleImpl::create_dump]
|
||||
async fn create_dump(&self) -> Result<DumpInfo>;
|
||||
|
||||
/// Return the status of an already created dump
|
||||
/// Implementation: [handle_impl::DumpActorHandleImpl::dump_info]
|
||||
async fn dump_info(&self, uid: String) -> Result<DumpInfo>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "dumpVersion")]
|
||||
pub enum Metadata {
|
||||
V1(MetadataV1),
|
||||
V2(MetadataV2),
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
pub fn new_v2(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
let meta = MetadataV2::new(index_db_size, update_db_size);
|
||||
Self::V2(meta)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DumpStatus {
|
||||
Done,
|
||||
InProgress,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DumpInfo {
|
||||
pub uid: String,
|
||||
pub status: DumpStatus,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
started_at: DateTime<Utc>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
finished_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
impl DumpInfo {
|
||||
pub fn new(uid: String, status: DumpStatus) -> Self {
|
||||
Self {
|
||||
uid,
|
||||
status,
|
||||
error: None,
|
||||
started_at: Utc::now(),
|
||||
finished_at: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_error(&mut self, error: String) {
|
||||
self.status = DumpStatus::Failed;
|
||||
self.finished_at = Some(Utc::now());
|
||||
self.error = Some(error);
|
||||
}
|
||||
|
||||
pub fn done(&mut self) {
|
||||
self.finished_at = Some(Utc::now());
|
||||
self.status = DumpStatus::Done;
|
||||
}
|
||||
|
||||
pub fn dump_already_in_progress(&self) -> bool {
|
||||
self.status == DumpStatus::InProgress
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
dst_path: impl AsRef<Path>,
|
||||
src_path: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
let tmp_src = tempfile::tempdir_in(".")?;
|
||||
let tmp_src_path = tmp_src.path();
|
||||
|
||||
crate::from_tar_gz(&src_path, tmp_src_path)?;
|
||||
|
||||
let meta_path = tmp_src_path.join(META_FILE_NAME);
|
||||
let mut meta_file = File::open(&meta_path)?;
|
||||
let meta: Metadata = serde_json::from_reader(&mut meta_file)?;
|
||||
|
||||
let dst_dir = dst_path
|
||||
.as_ref()
|
||||
.parent()
|
||||
.with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?;
|
||||
|
||||
let tmp_dst = tempfile::tempdir_in(dst_dir)?;
|
||||
|
||||
match meta {
|
||||
Metadata::V1(meta) => {
|
||||
meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)?
|
||||
}
|
||||
Metadata::V2(meta) => meta.load_dump(
|
||||
&tmp_src_path,
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?,
|
||||
}
|
||||
// Persist and atomically rename the db
|
||||
let persisted_dump = tmp_dst.into_path();
|
||||
if dst_path.as_ref().exists() {
|
||||
warn!("Overwriting database at {}", dst_path.as_ref().display());
|
||||
std::fs::remove_dir_all(&dst_path)?;
|
||||
}
|
||||
|
||||
std::fs::rename(&persisted_dump, &dst_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct DumpTask<U, P> {
|
||||
path: PathBuf,
|
||||
uuid_resolver: U,
|
||||
update_handle: P,
|
||||
uid: String,
|
||||
update_db_size: usize,
|
||||
index_db_size: usize,
|
||||
}
|
||||
|
||||
impl<U, P> DumpTask<U, P>
|
||||
where
|
||||
U: UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||
P: UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||
|
||||
{
|
||||
async fn run(self) -> Result<()> {
|
||||
trace!("Performing dump.");
|
||||
|
||||
create_dir_all(&self.path).await?;
|
||||
|
||||
let path_clone = self.path.clone();
|
||||
let temp_dump_dir =
|
||||
tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??;
|
||||
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||
|
||||
let meta = Metadata::new_v2(self.index_db_size, self.update_db_size);
|
||||
let meta_path = temp_dump_path.join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_path)?;
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
|
||||
let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?;
|
||||
|
||||
self.update_handle
|
||||
.dump(uuids, temp_dump_path.clone())
|
||||
.await?;
|
||||
|
||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
|
||||
crate::to_tar_gz(temp_dump_path, temp_dump_file.path())
|
||||
.map_err(|e| DumpActorError::Internal(e.into()))?;
|
||||
|
||||
let dump_path = self.path.join(self.uid).with_extension("dump");
|
||||
temp_dump_file.persist(&dump_path)?;
|
||||
|
||||
Ok(dump_path)
|
||||
})
|
||||
.await??;
|
||||
|
||||
info!("Created dump in {:?}.", dump_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
40
meilisearch-lib/src/index_controller/error.rs
Normal file
40
meilisearch-lib/src/index_controller/error.rs
Normal file
|
@ -0,0 +1,40 @@
|
|||
use meilisearch_error::Code;
|
||||
use meilisearch_error::ErrorCode;
|
||||
|
||||
use crate::index::error::IndexError;
|
||||
|
||||
use super::dump_actor::error::DumpActorError;
|
||||
use super::index_actor::error::IndexActorError;
|
||||
use super::update_actor::error::UpdateActorError;
|
||||
use super::uuid_resolver::error::UuidResolverError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexControllerError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexControllerError {
|
||||
#[error("Index creation must have an uid")]
|
||||
MissingUid,
|
||||
#[error("{0}")]
|
||||
Uuid(#[from] UuidResolverError),
|
||||
#[error("{0}")]
|
||||
IndexActor(#[from] IndexActorError),
|
||||
#[error("{0}")]
|
||||
UpdateActor(#[from] UpdateActorError),
|
||||
#[error("{0}")]
|
||||
DumpActor(#[from] DumpActorError),
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
}
|
||||
|
||||
impl ErrorCode for IndexControllerError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexControllerError::MissingUid => Code::BadRequest,
|
||||
IndexControllerError::Uuid(e) => e.error_code(),
|
||||
IndexControllerError::IndexActor(e) => e.error_code(),
|
||||
IndexControllerError::UpdateActor(e) => e.error_code(),
|
||||
IndexControllerError::DumpActor(e) => e.error_code(),
|
||||
IndexControllerError::IndexError(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
351
meilisearch-lib/src/index_controller/index_actor/actor.rs
Normal file
351
meilisearch-lib/src/index_controller/index_actor/actor.rs
Normal file
|
@ -0,0 +1,351 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
use futures::stream::StreamExt;
|
||||
use heed::CompactionOption;
|
||||
use log::debug;
|
||||
use milli::update::UpdateBuilder;
|
||||
use tokio::task::spawn_blocking;
|
||||
use tokio::{fs, sync::mpsc};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{
|
||||
update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings,
|
||||
};
|
||||
use crate::index_controller::{
|
||||
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
|
||||
};
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
use super::error::{IndexActorError, Result};
|
||||
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
|
||||
|
||||
pub const CONCURRENT_INDEX_MSG: usize = 10;
|
||||
|
||||
pub struct IndexActor<S> {
|
||||
receiver: Option<mpsc::Receiver<IndexMsg>>,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
store: S,
|
||||
}
|
||||
|
||||
impl<S> IndexActor<S>
|
||||
where S: IndexStore + Sync + Send,
|
||||
{
|
||||
pub fn new(
|
||||
receiver: mpsc::Receiver<IndexMsg>,
|
||||
store: S,
|
||||
options: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let update_handler = UpdateHandler::new(options)?;
|
||||
let update_handler = Arc::new(update_handler);
|
||||
let receiver = Some(receiver);
|
||||
|
||||
Ok(Self {
|
||||
receiver,
|
||||
update_handler,
|
||||
store,
|
||||
})
|
||||
}
|
||||
|
||||
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
|
||||
/// through the read channel are processed concurrently, the messages sent through the write
|
||||
/// channel are processed one at a time.
|
||||
pub async fn run(mut self) {
|
||||
let mut receiver = self
|
||||
.receiver
|
||||
.take()
|
||||
.expect("Index Actor must have a inbox at this point.");
|
||||
|
||||
let stream = stream! {
|
||||
loop {
|
||||
match receiver.recv().await {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stream
|
||||
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_message(&self, msg: IndexMsg) {
|
||||
use IndexMsg::*;
|
||||
match msg {
|
||||
CreateIndex {
|
||||
uuid,
|
||||
primary_key,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
|
||||
}
|
||||
Update {
|
||||
ret,
|
||||
meta,
|
||||
uuid,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_update(uuid, meta).await);
|
||||
}
|
||||
Search { ret, query, uuid } => {
|
||||
let _ = ret.send(self.handle_search(uuid, query).await);
|
||||
}
|
||||
Settings { ret, uuid } => {
|
||||
let _ = ret.send(self.handle_settings(uuid).await);
|
||||
}
|
||||
Documents {
|
||||
ret,
|
||||
uuid,
|
||||
attributes_to_retrieve,
|
||||
offset,
|
||||
limit,
|
||||
} => {
|
||||
let _ = ret.send(
|
||||
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
|
||||
.await,
|
||||
);
|
||||
}
|
||||
Document {
|
||||
uuid,
|
||||
attributes_to_retrieve,
|
||||
doc_id,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(
|
||||
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
|
||||
.await,
|
||||
);
|
||||
}
|
||||
Delete { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_delete(uuid).await);
|
||||
}
|
||||
GetMeta { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_get_meta(uuid).await);
|
||||
}
|
||||
UpdateIndex {
|
||||
uuid,
|
||||
index_settings,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_update_index(uuid, index_settings).await);
|
||||
}
|
||||
Snapshot { uuid, path, ret } => {
|
||||
let _ = ret.send(self.handle_snapshot(uuid, path).await);
|
||||
}
|
||||
Dump { uuid, path, ret } => {
|
||||
let _ = ret.send(self.handle_dump(uuid, path).await);
|
||||
}
|
||||
GetStats { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_get_stats(uuid).await);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
let result = spawn_blocking(move || index.perform_search(query)).await??;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_create_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
) -> Result<IndexMeta> {
|
||||
let index = self.store.create(uuid, primary_key).await?;
|
||||
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
async fn handle_update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
) -> Result<std::result::Result<Processed, Failed>> {
|
||||
debug!("Processing update {}", meta.id());
|
||||
let update_handler = self.update_handler.clone();
|
||||
let index = match self.store.get(uuid).await? {
|
||||
Some(index) => index,
|
||||
None => self.store.create(uuid, None).await?,
|
||||
};
|
||||
|
||||
Ok(spawn_blocking(move || update_handler.handle_update(index, meta)).await?)
|
||||
}
|
||||
|
||||
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
let result = spawn_blocking(move || index.settings()).await??;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_fetch_documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
let result =
|
||||
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
|
||||
.await??;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_fetch_document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
let result =
|
||||
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
|
||||
.await??;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let index = self.store.delete(uuid).await?;
|
||||
|
||||
if let Some(index) = index {
|
||||
tokio::task::spawn(async move {
|
||||
let index = index.inner;
|
||||
let store = get_arc_ownership_blocking(index).await;
|
||||
spawn_blocking(move || {
|
||||
store.prepare_for_closing().wait();
|
||||
debug!("Index closed");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
|
||||
match self.store.get(uuid).await? {
|
||||
Some(index) => {
|
||||
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
|
||||
Ok(meta)
|
||||
}
|
||||
None => Err(IndexActorError::UnexistingIndex),
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_update_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
index_settings: IndexSettings,
|
||||
) -> Result<IndexMeta> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
let result = spawn_blocking(move || match index_settings.primary_key {
|
||||
Some(primary_key) => {
|
||||
let mut txn = index.write_txn()?;
|
||||
if index.primary_key(&txn)?.is_some() {
|
||||
return Err(IndexActorError::ExistingPrimaryKey);
|
||||
}
|
||||
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_, _| ())?;
|
||||
let meta = IndexMeta::new_txn(&index, &txn)?;
|
||||
txn.commit()?;
|
||||
Ok(meta)
|
||||
}
|
||||
None => {
|
||||
let meta = IndexMeta::new(&index)?;
|
||||
Ok(meta)
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
|
||||
use tokio::fs::create_dir_all;
|
||||
|
||||
path.push("indexes");
|
||||
create_dir_all(&path).await?;
|
||||
|
||||
if let Some(index) = self.store.get(uuid).await? {
|
||||
let mut index_path = path.join(format!("index-{}", uuid));
|
||||
|
||||
create_dir_all(&index_path).await?;
|
||||
|
||||
index_path.push("data.mdb");
|
||||
spawn_blocking(move || -> Result<()> {
|
||||
// Get write txn to wait for ongoing write transaction before snapshot.
|
||||
let _txn = index.write_txn()?;
|
||||
index
|
||||
.env
|
||||
.copy_to_path(index_path, CompactionOption::Enabled)?;
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the
|
||||
/// documents and all the settings.
|
||||
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
let path = path.join(format!("indexes/index-{}/", uuid));
|
||||
fs::create_dir_all(&path).await?;
|
||||
|
||||
tokio::task::spawn_blocking(move || index.dump(path)).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||
let index = self
|
||||
.store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or(IndexActorError::UnexistingIndex)?;
|
||||
|
||||
spawn_blocking(move || {
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
Ok(IndexStats {
|
||||
size: index.size(),
|
||||
number_of_documents: index.number_of_documents(&rtxn)?,
|
||||
is_indexing: None,
|
||||
field_distribution: index.field_distribution(&rtxn)?,
|
||||
})
|
||||
})
|
||||
.await?
|
||||
}
|
||||
}
|
48
meilisearch-lib/src/index_controller/index_actor/error.rs
Normal file
48
meilisearch-lib/src/index_controller/index_actor/error.rs
Normal file
|
@ -0,0 +1,48 @@
|
|||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
use crate::{error::MilliError, index::error::IndexError};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexActorError>;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum IndexActorError {
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
#[error("Index already exists")]
|
||||
IndexAlreadyExists,
|
||||
#[error("Index not found")]
|
||||
UnexistingIndex,
|
||||
#[error("A primary key is already present. It's impossible to update it")]
|
||||
ExistingPrimaryKey,
|
||||
#[error("Internal Error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
}
|
||||
|
||||
macro_rules! internal_error {
|
||||
($($other:path), *) => {
|
||||
$(
|
||||
impl From<$other> for IndexActorError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self::Internal(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(heed::Error, tokio::task::JoinError, std::io::Error);
|
||||
|
||||
impl ErrorCode for IndexActorError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexActorError::IndexError(e) => e.error_code(),
|
||||
IndexActorError::IndexAlreadyExists => Code::IndexAlreadyExists,
|
||||
IndexActorError::UnexistingIndex => Code::IndexNotFound,
|
||||
IndexActorError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
|
||||
IndexActorError::Internal(_) => Code::Internal,
|
||||
IndexActorError::Milli(e) => MilliError(e).error_code(),
|
||||
}
|
||||
}
|
||||
}
|
162
meilisearch-lib/src/index_controller/index_actor/handle_impl.rs
Normal file
162
meilisearch-lib/src/index_controller/index_actor/handle_impl.rs
Normal file
|
@ -0,0 +1,162 @@
|
|||
use crate::options::IndexerOpts;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
index::Checked,
|
||||
index_controller::{IndexSettings, IndexStats, Processing},
|
||||
};
|
||||
use crate::{
|
||||
index::{Document, SearchQuery, SearchResult, Settings},
|
||||
index_controller::{Failed, Processed},
|
||||
};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IndexActorHandleImpl {
|
||||
sender: mpsc::Sender<IndexMsg>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexActorHandle for IndexActorHandleImpl {
|
||||
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::CreateIndex {
|
||||
ret,
|
||||
uuid,
|
||||
primary_key,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
) -> Result<std::result::Result<Processed, Failed>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Update {
|
||||
ret,
|
||||
meta,
|
||||
uuid,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Search { uuid, query, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Settings { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Documents {
|
||||
uuid,
|
||||
ret,
|
||||
offset,
|
||||
attributes_to_retrieve,
|
||||
limit,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Document {
|
||||
uuid,
|
||||
ret,
|
||||
doc_id,
|
||||
attributes_to_retrieve,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Delete { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::GetMeta { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::UpdateIndex {
|
||||
uuid,
|
||||
index_settings,
|
||||
ret,
|
||||
};
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Snapshot { uuid, path, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::Dump { uuid, path, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
|
||||
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = IndexMsg::GetStats { uuid, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver.await.expect("IndexActor has been killed")?)
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexActorHandleImpl {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
options: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
|
||||
let store = MapIndexStore::new(&path, index_size);
|
||||
let actor = IndexActor::new(receiver, store, options)?;
|
||||
tokio::task::spawn(actor.run());
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
73
meilisearch-lib/src/index_controller/index_actor/message.rs
Normal file
73
meilisearch-lib/src/index_controller/index_actor/message.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use tokio::sync::oneshot;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result as IndexResult;
|
||||
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
||||
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
|
||||
|
||||
use super::{IndexMeta, IndexSettings};
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum IndexMsg {
|
||||
CreateIndex {
|
||||
uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
ret: oneshot::Sender<IndexResult<IndexMeta>>,
|
||||
},
|
||||
Update {
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>,
|
||||
},
|
||||
Search {
|
||||
uuid: Uuid,
|
||||
query: SearchQuery,
|
||||
ret: oneshot::Sender<IndexResult<SearchResult>>,
|
||||
},
|
||||
Settings {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<Settings<Checked>>>,
|
||||
},
|
||||
Documents {
|
||||
uuid: Uuid,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
ret: oneshot::Sender<IndexResult<Vec<Document>>>,
|
||||
},
|
||||
Document {
|
||||
uuid: Uuid,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
doc_id: String,
|
||||
ret: oneshot::Sender<IndexResult<Document>>,
|
||||
},
|
||||
Delete {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<()>>,
|
||||
},
|
||||
GetMeta {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<IndexMeta>>,
|
||||
},
|
||||
UpdateIndex {
|
||||
uuid: Uuid,
|
||||
index_settings: IndexSettings,
|
||||
ret: oneshot::Sender<IndexResult<IndexMeta>>,
|
||||
},
|
||||
Snapshot {
|
||||
uuid: Uuid,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<IndexResult<()>>,
|
||||
},
|
||||
Dump {
|
||||
uuid: Uuid,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<IndexResult<()>>,
|
||||
},
|
||||
GetStats {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<IndexResult<IndexStats>>,
|
||||
},
|
||||
}
|
167
meilisearch-lib/src/index_controller/index_actor/mod.rs
Normal file
167
meilisearch-lib/src/index_controller/index_actor/mod.rs
Normal file
|
@ -0,0 +1,167 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use actor::IndexActor;
|
||||
pub use actor::CONCURRENT_INDEX_MSG;
|
||||
pub use handle_impl::IndexActorHandleImpl;
|
||||
use message::IndexMsg;
|
||||
use store::{IndexStore, MapIndexStore};
|
||||
|
||||
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
|
||||
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
|
||||
use error::Result;
|
||||
|
||||
use super::IndexSettings;
|
||||
|
||||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod message;
|
||||
mod store;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMeta {
|
||||
created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
impl IndexMeta {
|
||||
fn new(index: &Index) -> Result<Self> {
|
||||
let txn = index.read_txn()?;
|
||||
Self::new_txn(index, &txn)
|
||||
}
|
||||
|
||||
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
|
||||
let created_at = index.created_at(txn)?;
|
||||
let updated_at = index.updated_at(txn)?;
|
||||
let primary_key = index.primary_key(txn)?.map(String::from);
|
||||
Ok(Self {
|
||||
created_at,
|
||||
updated_at,
|
||||
primary_key,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, automock)]
|
||||
pub trait IndexActorHandle {
|
||||
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>;
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
) -> Result<std::result::Result<Processed, Failed>>;
|
||||
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>;
|
||||
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>>;
|
||||
|
||||
async fn documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>>;
|
||||
async fn document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
||||
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>;
|
||||
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>;
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
|
||||
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
|
||||
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
|
||||
impl IndexActorHandle for Arc<MockIndexActorHandle> {
|
||||
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
|
||||
self.as_ref().create_index(uuid, primary_key).await
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
meta: Processing,
|
||||
data: Option<std::fs::File>,
|
||||
) -> Result<std::result::Result<Processed, Failed>> {
|
||||
self.as_ref().update(uuid, meta, data).await
|
||||
}
|
||||
|
||||
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
|
||||
self.as_ref().search(uuid, query).await
|
||||
}
|
||||
|
||||
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
|
||||
self.as_ref().settings(uuid).await
|
||||
}
|
||||
|
||||
async fn documents(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
self.as_ref()
|
||||
.documents(uuid, offset, limit, attributes_to_retrieve)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn document(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
self.as_ref()
|
||||
.document(uuid, doc_id, attributes_to_retrieve)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
self.as_ref().delete(uuid).await
|
||||
}
|
||||
|
||||
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
|
||||
self.as_ref().get_index_meta(uuid).await
|
||||
}
|
||||
|
||||
async fn update_index(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
index_settings: IndexSettings,
|
||||
) -> Result<IndexMeta> {
|
||||
self.as_ref().update_index(uuid, index_settings).await
|
||||
}
|
||||
|
||||
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
self.as_ref().snapshot(uuid, path).await
|
||||
}
|
||||
|
||||
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
|
||||
self.as_ref().dump(uuid, path).await
|
||||
}
|
||||
|
||||
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
|
||||
self.as_ref().get_index_stats(uuid).await
|
||||
}
|
||||
}
|
||||
}
|
109
meilisearch-lib/src/index_controller/index_actor/store.rs
Normal file
109
meilisearch-lib/src/index_controller/index_actor/store.rs
Normal file
|
@ -0,0 +1,109 @@
|
|||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::UpdateBuilder;
|
||||
use tokio::fs;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexActorError, Result};
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
|
||||
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait IndexStore {
|
||||
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
|
||||
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
}
|
||||
|
||||
pub struct MapIndexStore {
|
||||
index_store: AsyncMap<Uuid, Index>,
|
||||
path: PathBuf,
|
||||
index_size: usize,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
}
|
||||
|
||||
impl MapIndexStore {
|
||||
pub fn new(path: impl AsRef<Path>, index_size: usize) -> Self {
|
||||
let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap());
|
||||
let path = path.as_ref().join("indexes/");
|
||||
let index_store = Arc::new(RwLock::new(HashMap::new()));
|
||||
Self {
|
||||
index_store,
|
||||
path,
|
||||
index_size,
|
||||
update_file_store,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexStore for MapIndexStore {
|
||||
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
|
||||
// We need to keep the lock until we are sure the db file has been opened correclty, to
|
||||
// ensure that another db is not created at the same time.
|
||||
let mut lock = self.index_store.write().await;
|
||||
|
||||
if let Some(index) = lock.get(&uuid) {
|
||||
return Ok(index.clone());
|
||||
}
|
||||
let path = self.path.join(format!("index-{}", uuid));
|
||||
if path.exists() {
|
||||
return Err(IndexActorError::IndexAlreadyExists);
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let file_store = self.update_file_store.clone();
|
||||
let index = spawn_blocking(move || -> Result<Index> {
|
||||
let index = Index::open(path, index_size, file_store)?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
let mut txn = index.write_txn()?;
|
||||
|
||||
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_, _| ())?;
|
||||
|
||||
txn.commit()?;
|
||||
}
|
||||
Ok(index)
|
||||
})
|
||||
.await??;
|
||||
|
||||
lock.insert(uuid, index.clone());
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
|
||||
let guard = self.index_store.read().await;
|
||||
match guard.get(&uuid) {
|
||||
Some(index) => Ok(Some(index.clone())),
|
||||
None => {
|
||||
// drop the guard here so we can perform the write after without deadlocking;
|
||||
drop(guard);
|
||||
let path = self.path.join(format!("index-{}", uuid));
|
||||
if !path.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let file_store = self.update_file_store.clone();
|
||||
let index = spawn_blocking(move || Index::open(path, index_size, file_store)).await??;
|
||||
self.index_store.write().await.insert(uuid, index.clone());
|
||||
Ok(Some(index))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
|
||||
let db_path = self.path.join(format!("index-{}", uuid));
|
||||
fs::remove_dir_all(db_path).await?;
|
||||
let index = self.index_store.write().await.remove(&uuid);
|
||||
Ok(index)
|
||||
}
|
||||
}
|
557
meilisearch-lib/src/index_controller/mod.rs
Normal file
557
meilisearch-lib/src/index_controller/mod.rs
Normal file
|
@ -0,0 +1,557 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_web::error::PayloadError;
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::Stream;
|
||||
use log::info;
|
||||
use milli::FieldDistribution;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::sleep;
|
||||
use uuid::Uuid;
|
||||
|
||||
use dump_actor::DumpActorHandle;
|
||||
pub use dump_actor::{DumpInfo, DumpStatus};
|
||||
use index_actor::IndexActorHandle;
|
||||
use snapshot::load_snapshot;
|
||||
use update_actor::UpdateActorHandle;
|
||||
pub use updates::*;
|
||||
use uuid_resolver::{error::UuidResolverError, UuidResolverHandle};
|
||||
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
|
||||
use error::Result;
|
||||
|
||||
use self::dump_actor::load_dump;
|
||||
|
||||
mod dump_actor;
|
||||
pub mod error;
|
||||
pub mod index_actor;
|
||||
mod snapshot;
|
||||
pub mod update_actor;
|
||||
mod updates;
|
||||
mod uuid_resolver;
|
||||
pub mod update_file_store;
|
||||
|
||||
pub type Payload = Box<dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin>;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMetadata {
|
||||
#[serde(skip)]
|
||||
pub uuid: Uuid,
|
||||
pub uid: String,
|
||||
name: String,
|
||||
#[serde(flatten)]
|
||||
pub meta: index_actor::IndexMeta,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IndexSettings {
|
||||
pub uid: Option<String>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexStats {
|
||||
#[serde(skip)]
|
||||
pub size: u64,
|
||||
pub number_of_documents: u64,
|
||||
/// Whether the current index is performing an update. It is initially `None` when the
|
||||
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
|
||||
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
|
||||
pub is_indexing: Option<bool>,
|
||||
pub field_distribution: FieldDistribution,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IndexController {
|
||||
uuid_resolver: uuid_resolver::UuidResolverHandleImpl,
|
||||
index_handle: index_actor::IndexActorHandleImpl,
|
||||
update_handle: update_actor::UpdateActorHandleImpl,
|
||||
dump_handle: dump_actor::DumpActorHandleImpl,
|
||||
}
|
||||
|
||||
pub enum DocumentAdditionFormat {
|
||||
Json,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Stats {
|
||||
pub database_size: u64,
|
||||
pub last_update: Option<DateTime<Utc>>,
|
||||
pub indexes: BTreeMap<String, IndexStats>,
|
||||
}
|
||||
|
||||
pub enum Update {
|
||||
DocumentAddition {
|
||||
payload: Payload,
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
format: DocumentAdditionFormat,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct IndexControllerBuilder {
|
||||
max_index_size: Option<usize>,
|
||||
max_update_store_size: Option<usize>,
|
||||
snapshot_dir: Option<PathBuf>,
|
||||
import_snapshot: Option<PathBuf>,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
dump_src: Option<PathBuf>,
|
||||
dump_dst: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl IndexControllerBuilder {
|
||||
pub fn build(self, db_path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<IndexController> {
|
||||
let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
|
||||
let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
|
||||
|
||||
if let Some(ref path) = self.import_snapshot {
|
||||
info!("Loading from snapshot {:?}", path);
|
||||
load_snapshot(
|
||||
db_path.as_ref(),
|
||||
path,
|
||||
self.ignore_snapshot_if_db_exists,
|
||||
self.ignore_missing_snapshot,
|
||||
)?;
|
||||
} else if let Some(ref src_path) = self.dump_src {
|
||||
load_dump(
|
||||
db_path.as_ref(),
|
||||
src_path,
|
||||
index_size,
|
||||
update_store_size,
|
||||
&indexer_options,
|
||||
)?;
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(db_path.as_ref())?;
|
||||
|
||||
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?;
|
||||
let index_handle =
|
||||
index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?;
|
||||
let update_handle = update_actor::UpdateActorHandleImpl::new(
|
||||
index_handle.clone(),
|
||||
&db_path,
|
||||
update_store_size,
|
||||
)?;
|
||||
|
||||
let dump_handle = dump_actor::DumpActorHandleImpl::new(
|
||||
&self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?,
|
||||
uuid_resolver.clone(),
|
||||
update_handle.clone(),
|
||||
index_size,
|
||||
update_store_size,
|
||||
)?;
|
||||
|
||||
//if options.schedule_snapshot {
|
||||
//let snapshot_service = SnapshotService::new(
|
||||
//uuid_resolver.clone(),
|
||||
//update_handle.clone(),
|
||||
//Duration::from_secs(options.snapshot_interval_sec),
|
||||
//options.snapshot_dir.clone(),
|
||||
//options
|
||||
//.db_path
|
||||
//.file_name()
|
||||
//.map(|n| n.to_owned().into_string().expect("invalid path"))
|
||||
//.unwrap_or_else(|| String::from("data.ms")),
|
||||
//);
|
||||
|
||||
//tokio::task::spawn(snapshot_service.run());
|
||||
//}
|
||||
|
||||
Ok(IndexController {
|
||||
uuid_resolver,
|
||||
index_handle,
|
||||
update_handle,
|
||||
dump_handle,
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the index controller builder's max update store size.
|
||||
pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
|
||||
self.max_update_store_size.replace(max_update_store_size);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_max_index_size(&mut self, size: usize) -> &mut Self {
|
||||
self.max_index_size.replace(size);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's snapshot path.
|
||||
pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self {
|
||||
self.snapshot_dir.replace(snapshot_dir);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore snapshot if db exists.
|
||||
pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self {
|
||||
self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's ignore missing snapshot.
|
||||
pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self {
|
||||
self.ignore_missing_snapshot = ignore_missing_snapshot;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump src.
|
||||
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
|
||||
self.dump_src.replace(dump_src);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's dump dst.
|
||||
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
|
||||
self.dump_dst.replace(dump_dst);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the index controller builder's import snapshot.
|
||||
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
|
||||
self.import_snapshot.replace(import_snapshot);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexController {
|
||||
pub fn builder() -> IndexControllerBuilder {
|
||||
IndexControllerBuilder::default()
|
||||
}
|
||||
|
||||
pub async fn register_update(&self, uid: &str, update: Update) -> Result<UpdateStatus> {
|
||||
match self.uuid_resolver.get(uid.to_string()).await {
|
||||
Ok(uuid) => {
|
||||
let update_result = self.update_handle.update(uuid, update).await?;
|
||||
Ok(update_result)
|
||||
},
|
||||
Err(UuidResolverError::UnexistingIndex(name)) => {
|
||||
let uuid = Uuid::new_v4();
|
||||
let update_result = self.update_handle.update(uuid, update).await?;
|
||||
// ignore if index creation fails now, since it may already have been created
|
||||
let _ = self.index_handle.create_index(uuid, None).await;
|
||||
self.uuid_resolver.insert(name, uuid).await?;
|
||||
Ok(update_result)
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
//pub async fn add_documents(
|
||||
//&self,
|
||||
//uid: String,
|
||||
//method: milli::update::IndexDocumentsMethod,
|
||||
//payload: Payload,
|
||||
//primary_key: Option<String>,
|
||||
//) -> Result<UpdateStatus> {
|
||||
//let perform_update = |uuid| async move {
|
||||
//let meta = UpdateMeta::DocumentsAddition {
|
||||
//method,
|
||||
//primary_key,
|
||||
//};
|
||||
//let (sender, receiver) = mpsc::channel(10);
|
||||
|
||||
//// It is necessary to spawn a local task to send the payload to the update handle to
|
||||
//// prevent dead_locking between the update_handle::update that waits for the update to be
|
||||
//// registered and the update_actor that waits for the the payload to be sent to it.
|
||||
//tokio::task::spawn_local(async move {
|
||||
//payload
|
||||
//.for_each(|r| async {
|
||||
//let _ = sender.send(r).await;
|
||||
//})
|
||||
//.await
|
||||
//});
|
||||
|
||||
//// This must be done *AFTER* spawning the task.
|
||||
//self.update_handle.update(meta, receiver, uuid).await
|
||||
//};
|
||||
|
||||
//match self.uuid_resolver.get(uid).await {
|
||||
//Ok(uuid) => Ok(perform_update(uuid).await?),
|
||||
//Err(UuidResolverError::UnexistingIndex(name)) => {
|
||||
//let uuid = Uuid::new_v4();
|
||||
//let status = perform_update(uuid).await?;
|
||||
//// ignore if index creation fails now, since it may already have been created
|
||||
//let _ = self.index_handle.create_index(uuid, None).await;
|
||||
//self.uuid_resolver.insert(name, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
//Err(e) => Err(e.into()),
|
||||
//}
|
||||
//}
|
||||
|
||||
//pub async fn clear_documents(&self, uid: String) -> Result<UpdateStatus> {
|
||||
//let uuid = self.uuid_resolver.get(uid).await?;
|
||||
//let meta = UpdateMeta::ClearDocuments;
|
||||
//let (_, receiver) = mpsc::channel(1);
|
||||
//let status = self.update_handle.update(meta, receiver, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
|
||||
//pub async fn delete_documents(
|
||||
//&self,
|
||||
//uid: String,
|
||||
//documents: Vec<String>,
|
||||
//) -> Result<UpdateStatus> {
|
||||
//let uuid = self.uuid_resolver.get(uid).await?;
|
||||
//let meta = UpdateMeta::DeleteDocuments { ids: documents };
|
||||
//let (_, receiver) = mpsc::channel(1);
|
||||
//let status = self.update_handle.update(meta, receiver, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
|
||||
//pub async fn update_settings(
|
||||
//&self,
|
||||
//uid: String,
|
||||
//settings: Settings<Checked>,
|
||||
//create: bool,
|
||||
//) -> Result<UpdateStatus> {
|
||||
//let perform_udpate = |uuid| async move {
|
||||
//let meta = UpdateMeta::Settings(settings.into_unchecked());
|
||||
//// Nothing so send, drop the sender right away, as not to block the update actor.
|
||||
//let (_, receiver) = mpsc::channel(1);
|
||||
//self.update_handle.update(meta, receiver, uuid).await
|
||||
//};
|
||||
|
||||
//match self.uuid_resolver.get(uid).await {
|
||||
//Ok(uuid) => Ok(perform_udpate(uuid).await?),
|
||||
//Err(UuidResolverError::UnexistingIndex(name)) if create => {
|
||||
//let uuid = Uuid::new_v4();
|
||||
//let status = perform_udpate(uuid).await?;
|
||||
//// ignore if index creation fails now, since it may already have been created
|
||||
//let _ = self.index_handle.create_index(uuid, None).await;
|
||||
//self.uuid_resolver.insert(name, uuid).await?;
|
||||
//Ok(status)
|
||||
//}
|
||||
//Err(e) => Err(e.into()),
|
||||
//}
|
||||
//}
|
||||
|
||||
//pub async fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata> {
|
||||
//let IndexSettings { uid, primary_key } = index_settings;
|
||||
//let uid = uid.ok_or(IndexControllerError::MissingUid)?;
|
||||
//let uuid = Uuid::new_v4();
|
||||
//let meta = self.index_handle.create_index(uuid, primary_key).await?;
|
||||
//self.uuid_resolver.insert(uid.clone(), uuid).await?;
|
||||
//let meta = IndexMetadata {
|
||||
//uuid,
|
||||
//name: uid.clone(),
|
||||
//uid,
|
||||
//meta,
|
||||
//};
|
||||
|
||||
//Ok(meta)
|
||||
//}
|
||||
|
||||
//pub async fn delete_index(&self, uid: String) -> Result<()> {
|
||||
//let uuid = self.uuid_resolver.delete(uid).await?;
|
||||
|
||||
//// We remove the index from the resolver synchronously, and effectively perform the index
|
||||
//// deletion as a background task.
|
||||
//let update_handle = self.update_handle.clone();
|
||||
//let index_handle = self.index_handle.clone();
|
||||
//tokio::spawn(async move {
|
||||
//if let Err(e) = update_handle.delete(uuid).await {
|
||||
//error!("Error while deleting index: {}", e);
|
||||
//}
|
||||
//if let Err(e) = index_handle.delete(uuid).await {
|
||||
//error!("Error while deleting index: {}", e);
|
||||
//}
|
||||
//});
|
||||
|
||||
//Ok(())
|
||||
//}
|
||||
|
||||
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let result = self.update_handle.update_status(uuid, id).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let result = self.update_handle.get_all_updates_status(uuid).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
|
||||
let uuids = self.uuid_resolver.list().await?;
|
||||
|
||||
let mut ret = Vec::new();
|
||||
|
||||
for (uid, uuid) in uuids {
|
||||
let meta = self.index_handle.get_index_meta(uuid).await?;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
ret.push(meta);
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let settings = self.index_handle.settings(uuid).await?;
|
||||
Ok(settings)
|
||||
}
|
||||
|
||||
pub async fn documents(
|
||||
&self,
|
||||
uid: String,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Vec<Document>> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let documents = self
|
||||
.index_handle
|
||||
.documents(uuid, offset, limit, attributes_to_retrieve)
|
||||
.await?;
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub async fn document(
|
||||
&self,
|
||||
uid: String,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<String>>,
|
||||
) -> Result<Document> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let document = self
|
||||
.index_handle
|
||||
.document(uuid, doc_id, attributes_to_retrieve)
|
||||
.await?;
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
&self,
|
||||
uid: String,
|
||||
mut index_settings: IndexSettings,
|
||||
) -> Result<IndexMetadata> {
|
||||
if index_settings.uid.is_some() {
|
||||
index_settings.uid.take();
|
||||
}
|
||||
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let meta = self.index_handle.update_index(uuid, index_settings).await?;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let result = self.index_handle.search(uuid, query).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> {
|
||||
let uuid = self.uuid_resolver.get(uid.clone()).await?;
|
||||
let meta = self.index_handle.get_index_meta(uuid).await?;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub async fn get_uuids_size(&self) -> Result<u64> {
|
||||
Ok(self.uuid_resolver.get_size().await?)
|
||||
}
|
||||
|
||||
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
|
||||
let uuid = self.uuid_resolver.get(uid).await?;
|
||||
let update_infos = self.update_handle.get_info().await?;
|
||||
let mut stats = self.index_handle.get_index_stats(uuid).await?;
|
||||
// Check if the currently indexing update is from out index.
|
||||
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
pub async fn get_all_stats(&self) -> Result<Stats> {
|
||||
let update_infos = self.update_handle.get_info().await?;
|
||||
let mut database_size = self.get_uuids_size().await? + update_infos.size;
|
||||
let mut last_update: Option<DateTime<_>> = None;
|
||||
let mut indexes = BTreeMap::new();
|
||||
|
||||
for index in self.list_indexes().await? {
|
||||
let mut index_stats = self.index_handle.get_index_stats(index.uuid).await?;
|
||||
database_size += index_stats.size;
|
||||
|
||||
last_update = last_update.map_or(Some(index.meta.updated_at), |last| {
|
||||
Some(last.max(index.meta.updated_at))
|
||||
});
|
||||
|
||||
index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing);
|
||||
|
||||
indexes.insert(index.uid, index_stats);
|
||||
}
|
||||
|
||||
Ok(Stats {
|
||||
database_size,
|
||||
last_update,
|
||||
indexes,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn create_dump(&self) -> Result<DumpInfo> {
|
||||
Ok(self.dump_handle.create_dump().await?)
|
||||
}
|
||||
|
||||
pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
|
||||
Ok(self.dump_handle.dump_info(uid).await?)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
||||
loop {
|
||||
match Arc::try_unwrap(item) {
|
||||
Ok(item) => return item,
|
||||
Err(item_arc) => {
|
||||
item = item_arc;
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("asc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("desc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
fn update_files_path(path: impl AsRef<Path>) -> PathBuf {
|
||||
path.as_ref().join("updates/updates_files")
|
||||
}
|
259
meilisearch-lib/src/index_controller/snapshot.rs
Normal file
259
meilisearch-lib/src/index_controller/snapshot.rs
Normal file
|
@ -0,0 +1,259 @@
|
|||
use std::path::Path;
|
||||
|
||||
use anyhow::bail;
|
||||
|
||||
//pub struct SnapshotService<U, R> {
|
||||
//uuid_resolver_handle: R,
|
||||
//update_handle: U,
|
||||
//snapshot_period: Duration,
|
||||
//snapshot_path: PathBuf,
|
||||
//db_name: String,
|
||||
//}
|
||||
|
||||
//impl<U, R> SnapshotService<U, R>
|
||||
//where
|
||||
//U: UpdateActorHandle,
|
||||
//R: UuidResolverHandle,
|
||||
//{
|
||||
//pub fn new(
|
||||
//uuid_resolver_handle: R,
|
||||
//update_handle: U,
|
||||
//snapshot_period: Duration,
|
||||
//snapshot_path: PathBuf,
|
||||
//db_name: String,
|
||||
//) -> Self {
|
||||
//Self {
|
||||
//uuid_resolver_handle,
|
||||
//update_handle,
|
||||
//snapshot_period,
|
||||
//snapshot_path,
|
||||
//db_name,
|
||||
//}
|
||||
//}
|
||||
|
||||
//pub async fn run(self) {
|
||||
//info!(
|
||||
//"Snapshot scheduled every {}s.",
|
||||
//self.snapshot_period.as_secs()
|
||||
//);
|
||||
//loop {
|
||||
//if let Err(e) = self.perform_snapshot().await {
|
||||
//error!("Error while performing snapshot: {}", e);
|
||||
//}
|
||||
//sleep(self.snapshot_period).await;
|
||||
//}
|
||||
//}
|
||||
|
||||
//async fn perform_snapshot(&self) -> anyhow::Result<()> {
|
||||
//trace!("Performing snapshot.");
|
||||
|
||||
//let snapshot_dir = self.snapshot_path.clone();
|
||||
//fs::create_dir_all(&snapshot_dir).await?;
|
||||
//let temp_snapshot_dir =
|
||||
//spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??;
|
||||
//let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
|
||||
|
||||
//let uuids = self
|
||||
//.uuid_resolver_handle
|
||||
//.snapshot(temp_snapshot_path.clone())
|
||||
//.await?;
|
||||
|
||||
//if uuids.is_empty() {
|
||||
//return Ok(());
|
||||
//}
|
||||
|
||||
//self.update_handle
|
||||
//.snapshot(uuids, temp_snapshot_path.clone())
|
||||
//.await?;
|
||||
//let snapshot_dir = self.snapshot_path.clone();
|
||||
//let snapshot_path = self
|
||||
//.snapshot_path
|
||||
//.join(format!("{}.snapshot", self.db_name));
|
||||
//let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
||||
//let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?;
|
||||
//let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
|
||||
//compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
|
||||
//temp_snapshot_file.persist(&snapshot_path)?;
|
||||
//Ok(snapshot_path)
|
||||
//})
|
||||
//.await??;
|
||||
|
||||
//trace!("Created snapshot in {:?}.", snapshot_path);
|
||||
|
||||
//Ok(())
|
||||
//}
|
||||
//}
|
||||
|
||||
pub fn load_snapshot(
|
||||
db_path: impl AsRef<Path>,
|
||||
snapshot_path: impl AsRef<Path>,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
|
||||
match crate::from_tar_gz(snapshot_path, &db_path) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
// clean created db folder
|
||||
std::fs::remove_dir_all(&db_path)?;
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
db_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
|
||||
bail!(
|
||||
"snapshot doesn't exist at {:?}",
|
||||
snapshot_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
|
||||
)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::iter::FromIterator;
|
||||
use std::{collections::HashSet, sync::Arc};
|
||||
|
||||
use futures::future::{err, ok};
|
||||
use rand::Rng;
|
||||
use tokio::time::timeout;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::index_controller::index_actor::MockIndexActorHandle;
|
||||
use crate::index_controller::update_actor::{
|
||||
error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl,
|
||||
};
|
||||
use crate::index_controller::uuid_resolver::{
|
||||
error::UuidResolverError, MockUuidResolverHandle,
|
||||
};
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_normal() {
|
||||
let mut rng = rand::thread_rng();
|
||||
let uuids_num: usize = rng.gen_range(5..10);
|
||||
let uuids = (0..uuids_num)
|
||||
.map(|_| Uuid::new_v4())
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
let uuids_clone = uuids.clone();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
.times(1)
|
||||
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
|
||||
|
||||
let uuids_clone = uuids.clone();
|
||||
let mut index_handle = MockIndexActorHandle::new();
|
||||
index_handle
|
||||
.expect_snapshot()
|
||||
.withf(move |uuid, _path| uuids_clone.contains(uuid))
|
||||
.times(uuids_num)
|
||||
.returning(move |_, _| Box::pin(ok(())));
|
||||
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let handle = Arc::new(index_handle);
|
||||
let update_handle =
|
||||
UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
snapshot_service.perform_snapshot().await.unwrap();
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_performing_uuid_snapshot() {
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
.times(1)
|
||||
// abitrary error
|
||||
.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
|
||||
|
||||
let update_handle = MockUpdateActorHandle::new();
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
assert!(snapshot_service.perform_snapshot().await.is_err());
|
||||
// Nothing was written to the file
|
||||
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_performing_index_snapshot() {
|
||||
let uuid = Uuid::new_v4();
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
.times(1)
|
||||
.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
|
||||
|
||||
let mut update_handle = MockUpdateActorHandle::new();
|
||||
update_handle
|
||||
.expect_snapshot()
|
||||
// abitrary error
|
||||
.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0))));
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
assert!(snapshot_service.perform_snapshot().await.is_err());
|
||||
// Nothing was written to the file
|
||||
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_loop() {
|
||||
let mut uuid_resolver = MockUuidResolverHandle::new();
|
||||
uuid_resolver
|
||||
.expect_snapshot()
|
||||
// we expect the funtion to be called between 2 and 3 time in the given interval.
|
||||
.times(2..4)
|
||||
// abitrary error, to short-circuit the function
|
||||
.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
|
||||
|
||||
let update_handle = MockUpdateActorHandle::new();
|
||||
|
||||
let snapshot_path = tempfile::tempdir_in(".").unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
uuid_resolver,
|
||||
update_handle,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await;
|
||||
}
|
||||
}
|
260
meilisearch-lib/src/index_controller/update_actor/actor.rs
Normal file
260
meilisearch-lib/src/index_controller/update_actor/actor.rs
Normal file
|
@ -0,0 +1,260 @@
|
|||
use std::collections::HashSet;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::error::PayloadError;
|
||||
use async_stream::stream;
|
||||
use bytes::Bytes;
|
||||
use futures::{Stream, StreamExt};
|
||||
use log::trace;
|
||||
use milli::documents::DocumentBatchBuilder;
|
||||
use serde_json::{Map, Value};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{Result, UpdateActorError};
|
||||
use super::RegisterUpdate;
|
||||
use super::{UpdateMsg, UpdateStore, UpdateStoreInfo, Update};
|
||||
use crate::index_controller::index_actor::IndexActorHandle;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use crate::index_controller::{DocumentAdditionFormat, Payload, UpdateStatus};
|
||||
|
||||
pub struct UpdateActor<I> {
|
||||
store: Arc<UpdateStore>,
|
||||
inbox: Option<mpsc::Receiver<UpdateMsg>>,
|
||||
update_file_store: UpdateFileStore,
|
||||
index_handle: I,
|
||||
must_exit: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
struct StreamReader<S> {
|
||||
stream: S,
|
||||
current: Option<Bytes>,
|
||||
}
|
||||
|
||||
impl<S> StreamReader<S> {
|
||||
fn new(stream: S) -> Self {
|
||||
Self { stream, current: None }
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<S: Stream<Item = std::result::Result<Bytes, PayloadError>> + Unpin> io::Read for StreamReader<S> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
match self.current.take() {
|
||||
Some(mut bytes) => {
|
||||
let copied = bytes.split_to(buf.len());
|
||||
buf.copy_from_slice(&copied);
|
||||
if !bytes.is_empty() {
|
||||
self.current.replace(bytes);
|
||||
}
|
||||
Ok(copied.len())
|
||||
}
|
||||
None => {
|
||||
match tokio::runtime::Handle::current().block_on(self.stream.next()) {
|
||||
Some(Ok(bytes)) => {
|
||||
self.current.replace(bytes);
|
||||
self.read(buf)
|
||||
},
|
||||
Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)),
|
||||
None => return Ok(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> UpdateActor<I>
|
||||
where
|
||||
I: IndexActorHandle + Clone + Sync + Send + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
update_db_size: usize,
|
||||
inbox: mpsc::Receiver<UpdateMsg>,
|
||||
path: impl AsRef<Path>,
|
||||
index_handle: I,
|
||||
) -> anyhow::Result<Self> {
|
||||
let path = path.as_ref().to_owned();
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(update_db_size);
|
||||
|
||||
let must_exit = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let store = UpdateStore::open(options, &path, index_handle.clone(), must_exit.clone())?;
|
||||
|
||||
let inbox = Some(inbox);
|
||||
|
||||
let update_file_store = UpdateFileStore::new(&path).unwrap();
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
inbox,
|
||||
index_handle,
|
||||
must_exit,
|
||||
update_file_store
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
use UpdateMsg::*;
|
||||
|
||||
trace!("Started update actor.");
|
||||
|
||||
let mut inbox = self
|
||||
.inbox
|
||||
.take()
|
||||
.expect("A receiver should be present by now.");
|
||||
|
||||
let must_exit = self.must_exit.clone();
|
||||
let stream = stream! {
|
||||
loop {
|
||||
let msg = inbox.recv().await;
|
||||
|
||||
if must_exit.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
break;
|
||||
}
|
||||
|
||||
match msg {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stream
|
||||
.for_each_concurrent(Some(10), |msg| async {
|
||||
match msg {
|
||||
Update {
|
||||
uuid,
|
||||
update,
|
||||
ret,
|
||||
} => {
|
||||
let _ = ret.send(self.handle_update(uuid, update).await);
|
||||
}
|
||||
ListUpdates { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_list_updates(uuid).await);
|
||||
}
|
||||
GetUpdate { uuid, ret, id } => {
|
||||
let _ = ret.send(self.handle_get_update(uuid, id).await);
|
||||
}
|
||||
Delete { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_delete(uuid).await);
|
||||
}
|
||||
Snapshot { uuids, path, ret } => {
|
||||
let _ = ret.send(self.handle_snapshot(uuids, path).await);
|
||||
}
|
||||
GetInfo { ret } => {
|
||||
let _ = ret.send(self.handle_get_info().await);
|
||||
}
|
||||
Dump { uuids, path, ret } => {
|
||||
let _ = ret.send(self.handle_dump(uuids, path).await);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_update(
|
||||
&self,
|
||||
index_uuid: Uuid,
|
||||
update: Update,
|
||||
) -> Result<UpdateStatus> {
|
||||
let registration = match update {
|
||||
Update::DocumentAddition { payload, primary_key, method, format } => {
|
||||
let content_uuid = match format {
|
||||
DocumentAdditionFormat::Json => self.documents_from_json(payload).await?,
|
||||
};
|
||||
|
||||
RegisterUpdate::DocumentAddition { primary_key, method, content_uuid }
|
||||
}
|
||||
};
|
||||
|
||||
let store = self.store.clone();
|
||||
let status = tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration)).await??;
|
||||
|
||||
Ok(status.into())
|
||||
}
|
||||
|
||||
async fn documents_from_json(&self, payload: Payload) -> Result<Uuid> {
|
||||
let file_store = self.update_file_store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let (uuid, mut file) = file_store.new_update().unwrap();
|
||||
let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap();
|
||||
|
||||
let documents: Vec<Map<String, Value>> = serde_json::from_reader(StreamReader::new(payload))?;
|
||||
builder.add_documents(documents).unwrap();
|
||||
builder.finish().unwrap();
|
||||
|
||||
file.persist();
|
||||
|
||||
Ok(uuid)
|
||||
}).await?
|
||||
}
|
||||
|
||||
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let update_store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = update_store.list(uuid)?;
|
||||
Ok(result)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
|
||||
let store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = store
|
||||
.meta(uuid, id)?
|
||||
.ok_or(UpdateActorError::UnexistingUpdate(id))?;
|
||||
Ok(result)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let index_handle = self.index_handle.clone();
|
||||
let update_store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle))
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let index_handle = self.index_handle.clone();
|
||||
let update_store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||
update_store.dump(&uuids, path.to_path_buf(), index_handle)?;
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let update_store = self.store.clone();
|
||||
let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> {
|
||||
let info = update_store.get_info()?;
|
||||
Ok(info)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
}
|
61
meilisearch-lib/src/index_controller/update_actor/error.rs
Normal file
61
meilisearch-lib/src/index_controller/update_actor/error.rs
Normal file
|
@ -0,0 +1,61 @@
|
|||
use std::error::Error;
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
use crate::index_controller::index_actor::error::IndexActorError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UpdateActorError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum UpdateActorError {
|
||||
#[error("Update {0} not found.")]
|
||||
UnexistingUpdate(u64),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
IndexActor(#[from] IndexActorError),
|
||||
#[error(
|
||||
"update store was shut down due to a fatal error, please check your logs for more info."
|
||||
)]
|
||||
FatalUpdateStoreError,
|
||||
#[error("{0}")]
|
||||
InvalidPayload(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
PayloadError(#[from] actix_web::error::PayloadError),
|
||||
}
|
||||
|
||||
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateActorError {
|
||||
fn from(_: tokio::sync::mpsc::error::SendError<T>) -> Self {
|
||||
Self::FatalUpdateStoreError
|
||||
}
|
||||
}
|
||||
|
||||
impl From<tokio::sync::oneshot::error::RecvError> for UpdateActorError {
|
||||
fn from(_: tokio::sync::oneshot::error::RecvError) -> Self {
|
||||
Self::FatalUpdateStoreError
|
||||
}
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
UpdateActorError: heed::Error,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
tokio::task::JoinError
|
||||
);
|
||||
|
||||
impl ErrorCode for UpdateActorError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
UpdateActorError::UnexistingUpdate(_) => Code::NotFound,
|
||||
UpdateActorError::Internal(_) => Code::Internal,
|
||||
UpdateActorError::IndexActor(e) => e.error_code(),
|
||||
UpdateActorError::FatalUpdateStoreError => Code::Internal,
|
||||
UpdateActorError::InvalidPayload(_) => Code::BadRequest,
|
||||
UpdateActorError::PayloadError(error) => match error {
|
||||
actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
||||
_ => Code::Internal,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::{IndexActorHandle, Update, UpdateStatus};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{UpdateActor, UpdateActorHandle, UpdateMsg, UpdateStoreInfo};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateActorHandleImpl {
|
||||
sender: mpsc::Sender<UpdateMsg>,
|
||||
}
|
||||
|
||||
impl UpdateActorHandleImpl {
|
||||
pub fn new<I>(
|
||||
index_handle: I,
|
||||
path: impl AsRef<Path>,
|
||||
update_store_size: usize,
|
||||
) -> anyhow::Result<Self>
|
||||
where
|
||||
I: IndexActorHandle + Clone + Sync + Send +'static,
|
||||
{
|
||||
let path = path.as_ref().to_owned();
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?;
|
||||
|
||||
tokio::task::spawn_local(actor.run());
|
||||
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UpdateActorHandle for UpdateActorHandleImpl {
|
||||
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::ListUpdates { uuid, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::GetUpdate { uuid, id, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Delete { uuid, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Snapshot { uuids, path, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Dump { uuids, path, ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::GetInfo { ret };
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
) -> Result<UpdateStatus> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UpdateMsg::Update {
|
||||
uuid,
|
||||
update,
|
||||
ret,
|
||||
};
|
||||
self.sender.send(msg).await?;
|
||||
receiver.await?
|
||||
}
|
||||
}
|
42
meilisearch-lib/src/index_controller/update_actor/message.rs
Normal file
42
meilisearch-lib/src/index_controller/update_actor/message.rs
Normal file
|
@ -0,0 +1,42 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use tokio::sync::oneshot;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{UpdateStatus, UpdateStoreInfo, Update};
|
||||
|
||||
pub enum UpdateMsg {
|
||||
Update {
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
},
|
||||
ListUpdates {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
|
||||
},
|
||||
GetUpdate {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
id: u64,
|
||||
},
|
||||
Delete {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
Snapshot {
|
||||
uuids: HashSet<Uuid>,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
Dump {
|
||||
uuids: HashSet<Uuid>,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
GetInfo {
|
||||
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
|
||||
},
|
||||
}
|
49
meilisearch-lib/src/index_controller/update_actor/mod.rs
Normal file
49
meilisearch-lib/src/index_controller/update_actor/mod.rs
Normal file
|
@ -0,0 +1,49 @@
|
|||
use std::{collections::HashSet, path::PathBuf};
|
||||
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use uuid::Uuid;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
use crate::index_controller::UpdateStatus;
|
||||
use super::Update;
|
||||
|
||||
use actor::UpdateActor;
|
||||
use error::Result;
|
||||
use message::UpdateMsg;
|
||||
|
||||
pub use handle_impl::UpdateActorHandleImpl;
|
||||
pub use store::{UpdateStore, UpdateStoreInfo};
|
||||
|
||||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod message;
|
||||
pub mod store;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum RegisterUpdate {
|
||||
DocumentAddition {
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait UpdateActorHandle {
|
||||
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
|
||||
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<()>;
|
||||
async fn snapshot(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
||||
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
|
||||
async fn get_info(&self) -> Result<UpdateStoreInfo>;
|
||||
async fn update(
|
||||
&self,
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
) -> Result<UpdateStatus>;
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
use std::{borrow::Cow, convert::TryInto, mem::size_of};
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct NextIdCodec;
|
||||
|
||||
pub enum NextIdKey {
|
||||
Global,
|
||||
Index(Uuid),
|
||||
}
|
||||
|
||||
impl<'a> BytesEncode<'a> for NextIdCodec {
|
||||
type EItem = NextIdKey;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
match item {
|
||||
NextIdKey::Global => Some(Cow::Borrowed(b"__global__")),
|
||||
NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PendingKeyCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for PendingKeyCodec {
|
||||
type EItem = (u64, Uuid, u64);
|
||||
|
||||
fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||
bytes.extend_from_slice(&global_id.to_be_bytes());
|
||||
bytes.extend_from_slice(uuid.as_bytes());
|
||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for PendingKeyCodec {
|
||||
type DItem = (u64, Uuid, u64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?;
|
||||
let global_id = u64::from_be_bytes(global_id_bytes);
|
||||
|
||||
let uuid_bytes = bytes
|
||||
.get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))?
|
||||
.try_into()
|
||||
.ok()?;
|
||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||
|
||||
let update_id_bytes = bytes
|
||||
.get((size_of::<u64>() + size_of::<Uuid>())..)?
|
||||
.try_into()
|
||||
.ok()?;
|
||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||
|
||||
Some((global_id, uuid, update_id))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateKeyCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for UpdateKeyCodec {
|
||||
type EItem = (Uuid, u64);
|
||||
|
||||
fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||
bytes.extend_from_slice(uuid.as_bytes());
|
||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for UpdateKeyCodec {
|
||||
type DItem = (Uuid, u64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?;
|
||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||
|
||||
let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?;
|
||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||
|
||||
Some((uuid, update_id))
|
||||
}
|
||||
}
|
186
meilisearch-lib/src/index_controller/update_actor/store/dump.rs
Normal file
186
meilisearch-lib/src/index_controller/update_actor/store/dump.rs
Normal file
|
@ -0,0 +1,186 @@
|
|||
use std::{
|
||||
collections::HashSet,
|
||||
fs::{create_dir_all, File},
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use heed::RoTxn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{Result, State, UpdateStore};
|
||||
use crate::index_controller::{
|
||||
index_actor::IndexActorHandle,
|
||||
UpdateStatus,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct UpdateEntry {
|
||||
uuid: Uuid,
|
||||
update: UpdateStatus,
|
||||
}
|
||||
|
||||
impl UpdateStore {
|
||||
pub fn dump(
|
||||
&self,
|
||||
uuids: &HashSet<Uuid>,
|
||||
path: PathBuf,
|
||||
handle: impl IndexActorHandle,
|
||||
) -> Result<()> {
|
||||
let state_lock = self.state.write();
|
||||
state_lock.swap(State::Dumping);
|
||||
|
||||
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
let dump_path = path.join("updates");
|
||||
create_dir_all(&dump_path)?;
|
||||
|
||||
self.dump_updates(&txn, uuids, &dump_path)?;
|
||||
|
||||
let fut = dump_indexes(uuids, handle, &path);
|
||||
tokio::runtime::Handle::current().block_on(fut)?;
|
||||
|
||||
state_lock.swap(State::Idle);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_updates(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
uuids: &HashSet<Uuid>,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
let dump_data_path = path.as_ref().join("data.jsonl");
|
||||
let mut dump_data_file = File::create(dump_data_path)?;
|
||||
|
||||
let update_files_path = path.as_ref().join(super::UPDATE_DIR);
|
||||
create_dir_all(&update_files_path)?;
|
||||
|
||||
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
|
||||
self.dump_completed(txn, uuids, &mut dump_data_file)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_pending(
|
||||
&self,
|
||||
_txn: &RoTxn,
|
||||
_uuids: &HashSet<Uuid>,
|
||||
_file: &mut File,
|
||||
_dst_path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
todo!()
|
||||
//let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
|
||||
|
||||
//for pending in pendings {
|
||||
//let ((_, uuid, _), data) = pending?;
|
||||
//if uuids.contains(&uuid) {
|
||||
//let update = data.decode()?;
|
||||
|
||||
//if let Some(ref update_uuid) = update.content {
|
||||
//let src = super::update_uuid_to_file_path(&self.path, *update_uuid);
|
||||
//let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid);
|
||||
//std::fs::copy(src, dst)?;
|
||||
//}
|
||||
|
||||
//let update_json = UpdateEntry {
|
||||
//uuid,
|
||||
//update: update.into(),
|
||||
//};
|
||||
|
||||
//serde_json::to_writer(&mut file, &update_json)?;
|
||||
//file.write_all(b"\n")?;
|
||||
//}
|
||||
//}
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
|
||||
fn dump_completed(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
uuids: &HashSet<Uuid>,
|
||||
mut file: &mut File,
|
||||
) -> Result<()> {
|
||||
let updates = self.updates.iter(txn)?.lazily_decode_data();
|
||||
|
||||
for update in updates {
|
||||
let ((uuid, _), data) = update?;
|
||||
if uuids.contains(&uuid) {
|
||||
let update = data.decode()?;
|
||||
|
||||
let update_json = UpdateEntry { uuid, update };
|
||||
|
||||
serde_json::to_writer(&mut file, &update_json)?;
|
||||
file.write_all(b"\n")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_db_size: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
todo!()
|
||||
//let dst_update_path = dst.as_ref().join("updates/");
|
||||
//create_dir_all(&dst_update_path)?;
|
||||
|
||||
//let mut options = EnvOpenOptions::new();
|
||||
//options.map_size(db_size as usize);
|
||||
//let (store, _) = UpdateStore::new(options, &dst_update_path)?;
|
||||
|
||||
//let src_update_path = src.as_ref().join("updates");
|
||||
//let update_data = File::open(&src_update_path.join("data.jsonl"))?;
|
||||
//let mut update_data = BufReader::new(update_data);
|
||||
|
||||
//std::fs::create_dir_all(dst_update_path.join("update_files/"))?;
|
||||
|
||||
//let mut wtxn = store.env.write_txn()?;
|
||||
//let mut line = String::new();
|
||||
//loop {
|
||||
//match update_data.read_line(&mut line) {
|
||||
//Ok(0) => break,
|
||||
//Ok(_) => {
|
||||
//let UpdateEntry { uuid, update } = serde_json::from_str(&line)?;
|
||||
//store.register_raw_updates(&mut wtxn, &update, uuid)?;
|
||||
|
||||
//// Copy ascociated update path if it exists
|
||||
//if let UpdateStatus::Enqueued(Enqueued {
|
||||
//content: Some(uuid),
|
||||
//..
|
||||
//}) = update
|
||||
//{
|
||||
//let src = update_uuid_to_file_path(&src_update_path, uuid);
|
||||
//let dst = update_uuid_to_file_path(&dst_update_path, uuid);
|
||||
//std::fs::copy(src, dst)?;
|
||||
//}
|
||||
//}
|
||||
//_ => break,
|
||||
//}
|
||||
|
||||
//line.clear();
|
||||
//}
|
||||
|
||||
//wtxn.commit()?;
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
async fn dump_indexes(
|
||||
uuids: &HashSet<Uuid>,
|
||||
handle: impl IndexActorHandle,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
for uuid in uuids {
|
||||
handle.dump(*uuid, path.as_ref().to_owned()).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
708
meilisearch-lib/src/index_controller/update_actor/store/mod.rs
Normal file
708
meilisearch-lib/src/index_controller/update_actor/store/mod.rs
Normal file
|
@ -0,0 +1,708 @@
|
|||
mod codec;
|
||||
pub mod dump;
|
||||
|
||||
use std::fs::{create_dir_all, remove_file};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashSet},
|
||||
path::PathBuf,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use futures::StreamExt;
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeJson};
|
||||
use heed::zerocopy::U64;
|
||||
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||
use log::error;
|
||||
use parking_lot::{Mutex, MutexGuard};
|
||||
use tokio::runtime::Handle;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio::time::timeout;
|
||||
use uuid::Uuid;
|
||||
|
||||
use codec::*;
|
||||
|
||||
use super::RegisterUpdate;
|
||||
use super::error::Result;
|
||||
use crate::EnvSizer;
|
||||
use crate::index_controller::update_files_path;
|
||||
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle};
|
||||
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
type BEU64 = U64<heed::byteorder::BE>;
|
||||
|
||||
const UPDATE_DIR: &str = "update_files";
|
||||
|
||||
pub struct UpdateStoreInfo {
|
||||
/// Size of the update store in bytes.
|
||||
pub size: u64,
|
||||
/// Uuid of the currently processing update if it exists
|
||||
pub processing: Option<Uuid>,
|
||||
}
|
||||
|
||||
/// A data structure that allows concurrent reads AND exactly one writer.
|
||||
pub struct StateLock {
|
||||
lock: Mutex<()>,
|
||||
data: ArcSwap<State>,
|
||||
}
|
||||
|
||||
pub struct StateLockGuard<'a> {
|
||||
_lock: MutexGuard<'a, ()>,
|
||||
state: &'a StateLock,
|
||||
}
|
||||
|
||||
impl StateLockGuard<'_> {
|
||||
pub fn swap(&self, state: State) -> Arc<State> {
|
||||
self.state.data.swap(Arc::new(state))
|
||||
}
|
||||
}
|
||||
|
||||
impl StateLock {
|
||||
fn from_state(state: State) -> Self {
|
||||
let lock = Mutex::new(());
|
||||
let data = ArcSwap::from(Arc::new(state));
|
||||
Self { lock, data }
|
||||
}
|
||||
|
||||
pub fn read(&self) -> Arc<State> {
|
||||
self.data.load().clone()
|
||||
}
|
||||
|
||||
pub fn write(&self) -> StateLockGuard {
|
||||
let _lock = self.lock.lock();
|
||||
let state = &self;
|
||||
StateLockGuard { _lock, state }
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum State {
|
||||
Idle,
|
||||
Processing(Uuid, Processing),
|
||||
Snapshoting,
|
||||
Dumping,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateStore {
|
||||
pub env: Env,
|
||||
/// A queue containing the updates to process, ordered by arrival.
|
||||
/// The key are built as follow:
|
||||
/// | global_update_id | index_uuid | update_id |
|
||||
/// | 8-bytes | 16-bytes | 8-bytes |
|
||||
pending_queue: Database<PendingKeyCodec, SerdeJson<Enqueued>>,
|
||||
/// Map indexes to the next available update id. If NextIdKey::Global is queried, then the next
|
||||
/// global update id is returned
|
||||
next_update_id: Database<NextIdCodec, OwnedType<BEU64>>,
|
||||
/// Contains all the performed updates meta, be they failed, aborted, or processed.
|
||||
/// The keys are built as follow:
|
||||
/// | Uuid | id |
|
||||
/// | 16-bytes | 8-bytes |
|
||||
updates: Database<UpdateKeyCodec, SerdeJson<UpdateStatus>>,
|
||||
/// Indicates the current state of the update store,
|
||||
state: Arc<StateLock>,
|
||||
/// Wake up the loop when a new event occurs.
|
||||
notification_sender: mpsc::Sender<()>,
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateStore {
|
||||
fn new(
|
||||
mut options: EnvOpenOptions,
|
||||
path: impl AsRef<Path>,
|
||||
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
|
||||
options.max_dbs(5);
|
||||
|
||||
let update_path = path.as_ref().join("updates");
|
||||
std::fs::create_dir_all(&update_path)?;
|
||||
let env = options.open(update_path)?;
|
||||
let pending_queue = env.create_database(Some("pending-queue"))?;
|
||||
let next_update_id = env.create_database(Some("next-update-id"))?;
|
||||
let updates = env.create_database(Some("updates"))?;
|
||||
|
||||
let state = Arc::new(StateLock::from_state(State::Idle));
|
||||
|
||||
let (notification_sender, notification_receiver) = mpsc::channel(1);
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
env,
|
||||
pending_queue,
|
||||
next_update_id,
|
||||
updates,
|
||||
state,
|
||||
notification_sender,
|
||||
path: path.as_ref().to_owned(),
|
||||
},
|
||||
notification_receiver,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn open(
|
||||
options: EnvOpenOptions,
|
||||
path: impl AsRef<Path>,
|
||||
index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static,
|
||||
must_exit: Arc<AtomicBool>,
|
||||
) -> anyhow::Result<Arc<Self>> {
|
||||
let (update_store, mut notification_receiver) = Self::new(options, path)?;
|
||||
let update_store = Arc::new(update_store);
|
||||
|
||||
// Send a first notification to trigger the process.
|
||||
if let Err(TrySendError::Closed(())) = update_store.notification_sender.try_send(()) {
|
||||
panic!("Failed to init update store");
|
||||
}
|
||||
|
||||
// We need a weak reference so we can take ownership on the arc later when we
|
||||
// want to close the index.
|
||||
let duration = Duration::from_secs(10 * 60); // 10 minutes
|
||||
let update_store_weak = Arc::downgrade(&update_store);
|
||||
tokio::task::spawn_local(async move {
|
||||
// Block and wait for something to process with a timeout. The timeout
|
||||
// function returns a Result and we must just unlock the loop on Result.
|
||||
'outer: while timeout(duration, notification_receiver.recv())
|
||||
.await
|
||||
.map_or(true, |o| o.is_some())
|
||||
{
|
||||
loop {
|
||||
match update_store_weak.upgrade() {
|
||||
Some(update_store) => {
|
||||
let handler = index_handle.clone();
|
||||
let res = tokio::task::spawn_blocking(move || {
|
||||
update_store.process_pending_update(handler)
|
||||
})
|
||||
.await
|
||||
.expect("Fatal error processing update.");
|
||||
match res {
|
||||
Ok(Some(_)) => (),
|
||||
Ok(None) => break,
|
||||
Err(e) => {
|
||||
error!("Fatal error while processing an update that requires the update store to shutdown: {}", e);
|
||||
must_exit.store(true, Ordering::SeqCst);
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
// the ownership on the arc has been taken, we need to exit.
|
||||
None => break 'outer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error!("Update store loop exited.");
|
||||
});
|
||||
|
||||
Ok(update_store)
|
||||
}
|
||||
|
||||
/// Returns the next global update id and the next update id for a given `index_uuid`.
|
||||
fn next_update_id(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<(u64, u64)> {
|
||||
let global_id = self
|
||||
.next_update_id
|
||||
.get(txn, &NextIdKey::Global)?
|
||||
.map(U64::get)
|
||||
.unwrap_or_default();
|
||||
|
||||
self.next_update_id
|
||||
.put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?;
|
||||
|
||||
let update_id = self.next_update_id_raw(txn, index_uuid)?;
|
||||
|
||||
Ok((global_id, update_id))
|
||||
}
|
||||
|
||||
/// Returns the next next update id for a given `index_uuid` without
|
||||
/// incrementing the global update id. This is useful for the dumps.
|
||||
fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<u64> {
|
||||
let update_id = self
|
||||
.next_update_id
|
||||
.get(txn, &NextIdKey::Index(index_uuid))?
|
||||
.map(U64::get)
|
||||
.unwrap_or_default();
|
||||
|
||||
self.next_update_id.put(
|
||||
txn,
|
||||
&NextIdKey::Index(index_uuid),
|
||||
&BEU64::new(update_id + 1),
|
||||
)?;
|
||||
|
||||
Ok(update_id)
|
||||
}
|
||||
|
||||
/// Registers the update content in the pending store and the meta
|
||||
/// into the pending-meta store. Returns the new unique update id.
|
||||
pub fn register_update(
|
||||
&self,
|
||||
index_uuid: Uuid,
|
||||
update: RegisterUpdate,
|
||||
) -> heed::Result<Enqueued> {
|
||||
let mut txn = self.env.write_txn()?;
|
||||
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
|
||||
let meta = Enqueued::new(update, update_id);
|
||||
|
||||
self.pending_queue
|
||||
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
if let Err(TrySendError::Closed(())) = self.notification_sender.try_send(()) {
|
||||
panic!("Update store loop exited");
|
||||
}
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
// /// Push already processed update in the UpdateStore without triggering the notification
|
||||
// /// process. This is useful for the dumps.
|
||||
//pub fn register_raw_updates(
|
||||
//&self,
|
||||
//wtxn: &mut heed::RwTxn,
|
||||
//update: &UpdateStatus,
|
||||
//index_uuid: Uuid,
|
||||
//) -> heed::Result<()> {
|
||||
//match update {
|
||||
//UpdateStatus::Enqueued(enqueued) => {
|
||||
//let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
|
||||
//self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
|
||||
//wtxn,
|
||||
//&(global_id, index_uuid, enqueued.id()),
|
||||
//enqueued,
|
||||
//)?;
|
||||
//}
|
||||
//_ => {
|
||||
//let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
|
||||
//self.updates.put(wtxn, &(index_uuid, update.id()), update)?;
|
||||
//}
|
||||
//}
|
||||
//Ok(())
|
||||
//}
|
||||
|
||||
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
||||
/// This is asynchronous as it let the user process the update with a read-only txn and
|
||||
/// only writing the result meta to the processed-meta store *after* it has been processed.
|
||||
fn process_pending_update(&self, index_handle: impl IndexActorHandle) -> Result<Option<()>> {
|
||||
// Create a read transaction to be able to retrieve the pending update in order.
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let first_meta = self.pending_queue.first(&rtxn)?;
|
||||
drop(rtxn);
|
||||
|
||||
// If there is a pending update we process and only keep
|
||||
// a reader while processing it, not a writer.
|
||||
match first_meta {
|
||||
Some(((global_id, index_uuid, _), pending)) => {
|
||||
let processing = pending.processing();
|
||||
// Acquire the state lock and set the current state to processing.
|
||||
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||
let state = self.state.write();
|
||||
state.swap(State::Processing(index_uuid, processing.clone()));
|
||||
|
||||
let result =
|
||||
self.perform_update(processing, index_handle, index_uuid, global_id);
|
||||
|
||||
state.swap(State::Idle);
|
||||
|
||||
result
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn perform_update(
|
||||
&self,
|
||||
processing: Processing,
|
||||
index_handle: impl IndexActorHandle,
|
||||
index_uuid: Uuid,
|
||||
global_id: u64,
|
||||
) -> Result<Option<()>> {
|
||||
// Process the pending update using the provided user function.
|
||||
let handle = Handle::current();
|
||||
let update_id = processing.id();
|
||||
let result =
|
||||
match handle.block_on(index_handle.update(index_uuid, processing.clone())) {
|
||||
Ok(result) => result,
|
||||
Err(e) => Err(processing.fail(e)),
|
||||
};
|
||||
|
||||
// Once the pending update have been successfully processed
|
||||
// we must remove the content from the pending and processing stores and
|
||||
// write the *new* meta to the processed-meta store and commit.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.pending_queue
|
||||
.delete(&mut wtxn, &(global_id, index_uuid, update_id))?;
|
||||
|
||||
let result = match result {
|
||||
Ok(res) => res.into(),
|
||||
Err(res) => res.into(),
|
||||
};
|
||||
|
||||
self.updates
|
||||
.put(&mut wtxn, &(index_uuid, update_id), &result)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Some(()))
|
||||
}
|
||||
|
||||
/// List the updates for `index_uuid`.
|
||||
pub fn list(&self, index_uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let mut update_list = BTreeMap::<u64, UpdateStatus>::new();
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
for entry in pendings {
|
||||
let ((_, uuid, id), pending) = entry?;
|
||||
if uuid == index_uuid {
|
||||
update_list.insert(id, pending.decode()?.into());
|
||||
}
|
||||
}
|
||||
|
||||
let updates = self
|
||||
.updates
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(&txn, index_uuid.as_bytes())?;
|
||||
|
||||
for entry in updates {
|
||||
let (_, update) = entry?;
|
||||
update_list.insert(update.id(), update);
|
||||
}
|
||||
|
||||
// If the currently processing update is from this index, replace the corresponding pending update with this one.
|
||||
match *self.state.read() {
|
||||
State::Processing(uuid, ref processing) if uuid == index_uuid => {
|
||||
update_list.insert(processing.id(), processing.clone().into());
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
Ok(update_list.into_iter().map(|(_, v)| v).collect())
|
||||
}
|
||||
|
||||
/// Returns the update associated meta or `None` if the update doesn't exist.
|
||||
pub fn meta(&self, index_uuid: Uuid, update_id: u64) -> heed::Result<Option<UpdateStatus>> {
|
||||
// Check if the update is the one currently processing
|
||||
match *self.state.read() {
|
||||
State::Processing(uuid, ref processing)
|
||||
if uuid == index_uuid && processing.id() == update_id =>
|
||||
{
|
||||
return Ok(Some(processing.clone().into()));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
// Else, check if it is in the updates database:
|
||||
let update = self.updates.get(&txn, &(index_uuid, update_id))?;
|
||||
|
||||
if let Some(update) = update {
|
||||
return Ok(Some(update));
|
||||
}
|
||||
|
||||
// If nothing was found yet, we resolve to iterate over the pending queue.
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
|
||||
for entry in pendings {
|
||||
let ((_, uuid, id), pending) = entry?;
|
||||
if uuid == index_uuid && id == update_id {
|
||||
return Ok(Some(pending.decode()?.into()));
|
||||
}
|
||||
}
|
||||
|
||||
// No update was found.
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Delete all updates for an index from the update store. If the currently processing update
|
||||
/// is for `index_uuid`, the call will block until the update is terminated.
|
||||
pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> {
|
||||
let mut txn = self.env.write_txn()?;
|
||||
// Contains all the content file paths that we need to be removed if the deletion was successful.
|
||||
let uuids_to_remove = Vec::new();
|
||||
|
||||
let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data();
|
||||
|
||||
while let Some(Ok(((_, uuid, _), pending))) = pendings.next() {
|
||||
if uuid == index_uuid {
|
||||
let mut _pending = pending.decode()?;
|
||||
//if let Some(update_uuid) = pending.content.take() {
|
||||
//uuids_to_remove.push(update_uuid);
|
||||
//}
|
||||
|
||||
// Invariant check: we can only delete the current entry when we don't hold
|
||||
// references to it anymore. This must be done after we have retrieved its content.
|
||||
unsafe {
|
||||
pendings.del_current()?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop(pendings);
|
||||
|
||||
let mut updates = self
|
||||
.updates
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter_mut(&mut txn, index_uuid.as_bytes())?
|
||||
.lazily_decode_data();
|
||||
|
||||
while let Some(_) = updates.next() {
|
||||
unsafe {
|
||||
updates.del_current()?;
|
||||
}
|
||||
}
|
||||
|
||||
drop(updates);
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
// If the currently processing update is from our index, we wait until it is
|
||||
// finished before returning. This ensure that no write to the index occurs after we delete it.
|
||||
if let State::Processing(uuid, _) = *self.state.read() {
|
||||
if uuid == index_uuid {
|
||||
// wait for a write lock, do nothing with it.
|
||||
self.state.write();
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, remove any outstanding update files. This must be done after waiting for the
|
||||
// last update to ensure that the update files are not deleted before the update needs
|
||||
// them.
|
||||
uuids_to_remove
|
||||
.iter()
|
||||
.map(|uuid: &Uuid| update_files_path(&self.path).join(uuid.to_string()))
|
||||
.for_each(|path| {
|
||||
let _ = remove_file(path);
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn snapshot(
|
||||
&self,
|
||||
uuids: &HashSet<Uuid>,
|
||||
path: impl AsRef<Path>,
|
||||
handle: impl IndexActorHandle + Clone,
|
||||
) -> Result<()> {
|
||||
let state_lock = self.state.write();
|
||||
state_lock.swap(State::Snapshoting);
|
||||
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
let update_path = path.as_ref().join("updates");
|
||||
create_dir_all(&update_path)?;
|
||||
|
||||
// acquire write lock to prevent further writes during snapshot
|
||||
create_dir_all(&update_path)?;
|
||||
let db_path = update_path.join("data.mdb");
|
||||
|
||||
// create db snapshot
|
||||
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
|
||||
|
||||
let update_files_path = update_path.join(UPDATE_DIR);
|
||||
create_dir_all(&update_files_path)?;
|
||||
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
|
||||
for entry in pendings {
|
||||
let ((_, _uuid, _), _pending) = entry?;
|
||||
//if uuids.contains(&uuid) {
|
||||
//if let Enqueued {
|
||||
//content: Some(uuid),
|
||||
//..
|
||||
//} = pending.decode()?
|
||||
//{
|
||||
//let path = update_uuid_to_file_path(&self.path, uuid);
|
||||
//copy(path, &update_files_path)?;
|
||||
//}
|
||||
//}
|
||||
}
|
||||
|
||||
let path = &path.as_ref().to_path_buf();
|
||||
let handle = &handle;
|
||||
// Perform the snapshot of each index concurently. Only a third of the capabilities of
|
||||
// the index actor at a time not to put too much pressure on the index actor
|
||||
let mut stream = futures::stream::iter(uuids.iter())
|
||||
.map(move |uuid| handle.snapshot(*uuid, path.clone()))
|
||||
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
|
||||
|
||||
Handle::current().block_on(async {
|
||||
while let Some(res) = stream.next().await {
|
||||
res?;
|
||||
}
|
||||
Ok(()) as Result<()>
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let size = self.env.size();
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.pending_queue.iter(&txn)? {
|
||||
let (_, _pending) = entry?;
|
||||
//if let Enqueued {
|
||||
//content: Some(uuid),
|
||||
//..
|
||||
//} = pending
|
||||
//{
|
||||
//let path = update_uuid_to_file_path(&self.path, uuid);
|
||||
//size += File::open(path)?.metadata()?.len();
|
||||
//}
|
||||
}
|
||||
let processing = match *self.state.read() {
|
||||
State::Processing(uuid, _) => Some(uuid),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Ok(UpdateStoreInfo { size, processing })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::index_controller::{
|
||||
index_actor::{error::IndexActorError, MockIndexActorHandle},
|
||||
UpdateResult,
|
||||
};
|
||||
|
||||
use futures::future::ok;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_next_id() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let handle = Arc::new(MockIndexActorHandle::new());
|
||||
options.map_size(4096 * 100);
|
||||
let update_store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
handle,
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let index1_uuid = Uuid::new_v4();
|
||||
let index2_uuid = Uuid::new_v4();
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((0, 0), ids);
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((1, 0), ids);
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((2, 1), ids);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_register_update() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let handle = Arc::new(MockIndexActorHandle::new());
|
||||
options.map_size(4096 * 100);
|
||||
let update_store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
handle,
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
)
|
||||
.unwrap();
|
||||
let meta = UpdateMeta::ClearDocuments;
|
||||
let uuid = Uuid::new_v4();
|
||||
let store_clone = update_store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
store_clone.register_update(meta, None, uuid).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let txn = update_store.env.read_txn().unwrap();
|
||||
assert!(update_store
|
||||
.pending_queue
|
||||
.get(&txn, &(0, uuid, 0))
|
||||
.unwrap()
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_process_update() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let mut handle = MockIndexActorHandle::new();
|
||||
|
||||
handle
|
||||
.expect_update()
|
||||
.times(2)
|
||||
.returning(|_index_uuid, processing, _file| {
|
||||
if processing.id() == 0 {
|
||||
Box::pin(ok(Ok(processing.process(UpdateResult::Other))))
|
||||
} else {
|
||||
Box::pin(ok(Err(
|
||||
processing.fail(IndexActorError::ExistingPrimaryKey.into())
|
||||
)))
|
||||
}
|
||||
});
|
||||
|
||||
let handle = Arc::new(handle);
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100);
|
||||
let store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
handle.clone(),
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// wait a bit for the event loop exit.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
|
||||
let mut txn = store.env.write_txn().unwrap();
|
||||
|
||||
let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None);
|
||||
let uuid = Uuid::new_v4();
|
||||
|
||||
store
|
||||
.pending_queue
|
||||
.put(&mut txn, &(0, uuid, 0), &update)
|
||||
.unwrap();
|
||||
|
||||
let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None);
|
||||
|
||||
store
|
||||
.pending_queue
|
||||
.put(&mut txn, &(1, uuid, 1), &update)
|
||||
.unwrap();
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
// Process the pending, and check that it has been moved to the update databases, and
|
||||
// removed from the pending database.
|
||||
let store_clone = store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
store_clone.process_pending_update(handle.clone()).unwrap();
|
||||
store_clone.process_pending_update(handle).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let txn = store.env.read_txn().unwrap();
|
||||
|
||||
assert!(store.pending_queue.first(&txn).unwrap().is_none());
|
||||
let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap();
|
||||
|
||||
assert!(matches!(update, UpdateStatus::Processed(_)));
|
||||
let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap();
|
||||
|
||||
assert!(matches!(update, UpdateStatus::Failed(_)));
|
||||
}
|
||||
}
|
63
meilisearch-lib/src/index_controller/update_file_store.rs
Normal file
63
meilisearch-lib/src/index_controller/update_file_store.rs
Normal file
|
@ -0,0 +1,63 @@
|
|||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use tempfile::NamedTempFile;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::Result;
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
file: NamedTempFile,
|
||||
}
|
||||
|
||||
impl UpdateFile {
|
||||
pub fn persist(self) {
|
||||
println!("persisting in {}", self.path.display());
|
||||
self.file.persist(&self.path).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for UpdateFile {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for UpdateFile {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateFileStore {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateFileStore {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join("updates/updates_files");
|
||||
std::fs::create_dir_all(&path).unwrap();
|
||||
Ok(Self { path })
|
||||
}
|
||||
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
let file = NamedTempFile::new().unwrap();
|
||||
let uuid = Uuid::new_v4();
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = UpdateFile { file, path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
println!("reading in {}", path.display());
|
||||
let file = File::open(path).unwrap();
|
||||
Ok(file)
|
||||
}
|
||||
}
|
250
meilisearch-lib/src/index_controller/updates.rs
Normal file
250
meilisearch-lib/src/index_controller/updates.rs
Normal file
|
@ -0,0 +1,250 @@
|
|||
use std::{error::Error, fmt::Display};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::index::{Settings, Unchecked};
|
||||
|
||||
use super::update_actor::RegisterUpdate;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateResult {
|
||||
DocumentsAddition(DocumentAdditionResult),
|
||||
DocumentDeletion { deleted: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: RegisterUpdate,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Enqueued {
|
||||
pub fn new(meta: RegisterUpdate, update_id: u64) -> Self {
|
||||
Self {
|
||||
enqueued_at: Utc::now(),
|
||||
meta,
|
||||
update_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn processing(self) -> Processing {
|
||||
Processing {
|
||||
from: self,
|
||||
started_processing_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn abort(self) -> Aborted {
|
||||
Aborted {
|
||||
from: self,
|
||||
aborted_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
&self.meta
|
||||
}
|
||||
|
||||
pub fn id(&self) -> u64 {
|
||||
self.update_id
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: UpdateResult,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
impl Processed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub started_processing_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Processing {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
|
||||
pub fn process(self, success: UpdateResult) -> Processed {
|
||||
Processed {
|
||||
success,
|
||||
from: self,
|
||||
processed_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fail(self, error: impl ErrorCode) -> Failed {
|
||||
let msg = error.to_string();
|
||||
let code = error.error_code();
|
||||
Failed {
|
||||
from: self,
|
||||
msg,
|
||||
code,
|
||||
failed_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Aborted {
|
||||
#[serde(flatten)]
|
||||
from: Enqueued,
|
||||
aborted_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Aborted {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub msg: String,
|
||||
pub code: Code,
|
||||
pub failed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Display for Failed {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.msg.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for Failed { }
|
||||
|
||||
impl ErrorCode for Failed {
|
||||
fn error_code(&self) -> Code {
|
||||
self.code
|
||||
}
|
||||
}
|
||||
|
||||
impl Failed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Aborted(Aborted),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
impl UpdateStatus {
|
||||
pub fn id(&self) -> u64 {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.id(),
|
||||
UpdateStatus::Enqueued(u) => u.id(),
|
||||
UpdateStatus::Processed(u) => u.id(),
|
||||
UpdateStatus::Aborted(u) => u.id(),
|
||||
UpdateStatus::Failed(u) => u.id(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &RegisterUpdate {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.meta(),
|
||||
UpdateStatus::Enqueued(u) => u.meta(),
|
||||
UpdateStatus::Processed(u) => u.meta(),
|
||||
UpdateStatus::Aborted(u) => u.meta(),
|
||||
UpdateStatus::Failed(u) => u.meta(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn processed(&self) -> Option<&Processed> {
|
||||
match self {
|
||||
UpdateStatus::Processed(p) => Some(p),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Enqueued> for UpdateStatus {
|
||||
fn from(other: Enqueued) -> Self {
|
||||
Self::Enqueued(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Aborted> for UpdateStatus {
|
||||
fn from(other: Aborted) -> Self {
|
||||
Self::Aborted(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Processed> for UpdateStatus {
|
||||
fn from(other: Processed) -> Self {
|
||||
Self::Processed(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Processing> for UpdateStatus {
|
||||
fn from(other: Processing) -> Self {
|
||||
Self::Processing(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Failed> for UpdateStatus {
|
||||
fn from(other: Failed) -> Self {
|
||||
Self::Failed(other)
|
||||
}
|
||||
}
|
98
meilisearch-lib/src/index_controller/uuid_resolver/actor.rs
Normal file
98
meilisearch-lib/src/index_controller/uuid_resolver/actor.rs
Normal file
|
@ -0,0 +1,98 @@
|
|||
use std::{collections::HashSet, path::PathBuf};
|
||||
|
||||
use log::{trace, warn};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{error::UuidResolverError, Result, UuidResolveMsg, UuidStore};
|
||||
|
||||
pub struct UuidResolverActor<S> {
|
||||
inbox: mpsc::Receiver<UuidResolveMsg>,
|
||||
store: S,
|
||||
}
|
||||
|
||||
impl<S: UuidStore> UuidResolverActor<S> {
|
||||
pub fn new(inbox: mpsc::Receiver<UuidResolveMsg>, store: S) -> Self {
|
||||
Self { inbox, store }
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
use UuidResolveMsg::*;
|
||||
|
||||
trace!("uuid resolver started");
|
||||
|
||||
loop {
|
||||
match self.inbox.recv().await {
|
||||
Some(Get { uid: name, ret }) => {
|
||||
let _ = ret.send(self.handle_get(name).await);
|
||||
}
|
||||
Some(Delete { uid: name, ret }) => {
|
||||
let _ = ret.send(self.handle_delete(name).await);
|
||||
}
|
||||
Some(List { ret }) => {
|
||||
let _ = ret.send(self.handle_list().await);
|
||||
}
|
||||
Some(Insert { ret, uuid, name }) => {
|
||||
let _ = ret.send(self.handle_insert(name, uuid).await);
|
||||
}
|
||||
Some(SnapshotRequest { path, ret }) => {
|
||||
let _ = ret.send(self.handle_snapshot(path).await);
|
||||
}
|
||||
Some(GetSize { ret }) => {
|
||||
let _ = ret.send(self.handle_get_size().await);
|
||||
}
|
||||
Some(DumpRequest { path, ret }) => {
|
||||
let _ = ret.send(self.handle_dump(path).await);
|
||||
}
|
||||
// all senders have been dropped, need to quit.
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
warn!("exiting uuid resolver loop");
|
||||
}
|
||||
|
||||
async fn handle_get(&self, uid: String) -> Result<Uuid> {
|
||||
self.store
|
||||
.get_uuid(uid.clone())
|
||||
.await?
|
||||
.ok_or(UuidResolverError::UnexistingIndex(uid))
|
||||
}
|
||||
|
||||
async fn handle_delete(&self, uid: String) -> Result<Uuid> {
|
||||
self.store
|
||||
.delete(uid.clone())
|
||||
.await?
|
||||
.ok_or(UuidResolverError::UnexistingIndex(uid))
|
||||
}
|
||||
|
||||
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let result = self.store.list().await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
self.store.snapshot(path).await
|
||||
}
|
||||
|
||||
async fn handle_dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
self.store.dump(path).await
|
||||
}
|
||||
|
||||
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
|
||||
if !is_index_uid_valid(&uid) {
|
||||
return Err(UuidResolverError::BadlyFormatted(uid));
|
||||
}
|
||||
self.store.insert(uid, uuid).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_size(&self) -> Result<u64> {
|
||||
self.store.get_size().await
|
||||
}
|
||||
}
|
||||
|
||||
fn is_index_uid_valid(uid: &str) -> bool {
|
||||
uid.chars()
|
||||
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
||||
}
|
34
meilisearch-lib/src/index_controller/uuid_resolver/error.rs
Normal file
34
meilisearch-lib/src/index_controller/uuid_resolver/error.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, UuidResolverError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum UuidResolverError {
|
||||
#[error("Index already exists.")]
|
||||
NameAlreadyExist,
|
||||
#[error("Index \"{0}\" not found.")]
|
||||
UnexistingIndex(String),
|
||||
#[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")]
|
||||
BadlyFormatted(String),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Sync + Send + 'static>),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
UuidResolverError: heed::Error,
|
||||
uuid::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
serde_json::Error
|
||||
);
|
||||
|
||||
impl ErrorCode for UuidResolverError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
UuidResolverError::NameAlreadyExist => Code::IndexAlreadyExists,
|
||||
UuidResolverError::UnexistingIndex(_) => Code::IndexNotFound,
|
||||
UuidResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
|
||||
UuidResolverError::Internal(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{HeedUuidStore, Result, UuidResolveMsg, UuidResolverActor, UuidResolverHandle};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UuidResolverHandleImpl {
|
||||
sender: mpsc::Sender<UuidResolveMsg>,
|
||||
}
|
||||
|
||||
impl UuidResolverHandleImpl {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let (sender, reveiver) = mpsc::channel(100);
|
||||
let store = HeedUuidStore::new(path)?;
|
||||
let actor = UuidResolverActor::new(reveiver, store);
|
||||
tokio::spawn(actor.run());
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UuidResolverHandle for UuidResolverHandleImpl {
|
||||
async fn get(&self, name: String) -> Result<Uuid> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::Get { uid: name, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn delete(&self, name: String) -> Result<Uuid> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::Delete { uid: name, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::List { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::Insert { ret, name, uuid };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::SnapshotRequest { path, ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
|
||||
async fn get_size(&self) -> Result<u64> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::GetSize { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = UuidResolveMsg::DumpRequest { ret, path };
|
||||
let _ = self.sender.send(msg).await;
|
||||
Ok(receiver
|
||||
.await
|
||||
.expect("Uuid resolver actor has been killed")?)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use tokio::sync::oneshot;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::Result;
|
||||
|
||||
pub enum UuidResolveMsg {
|
||||
Get {
|
||||
uid: String,
|
||||
ret: oneshot::Sender<Result<Uuid>>,
|
||||
},
|
||||
Delete {
|
||||
uid: String,
|
||||
ret: oneshot::Sender<Result<Uuid>>,
|
||||
},
|
||||
List {
|
||||
ret: oneshot::Sender<Result<Vec<(String, Uuid)>>>,
|
||||
},
|
||||
Insert {
|
||||
uuid: Uuid,
|
||||
name: String,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
SnapshotRequest {
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
|
||||
},
|
||||
GetSize {
|
||||
ret: oneshot::Sender<Result<u64>>,
|
||||
},
|
||||
DumpRequest {
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
|
||||
},
|
||||
}
|
35
meilisearch-lib/src/index_controller/uuid_resolver/mod.rs
Normal file
35
meilisearch-lib/src/index_controller/uuid_resolver/mod.rs
Normal file
|
@ -0,0 +1,35 @@
|
|||
mod actor;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod message;
|
||||
pub mod store;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use actor::UuidResolverActor;
|
||||
use error::Result;
|
||||
use message::UuidResolveMsg;
|
||||
use store::UuidStore;
|
||||
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
|
||||
pub use handle_impl::UuidResolverHandleImpl;
|
||||
pub use store::HeedUuidStore;
|
||||
|
||||
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, automock)]
|
||||
pub trait UuidResolverHandle {
|
||||
async fn get(&self, name: String) -> Result<Uuid>;
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
||||
async fn delete(&self, name: String) -> Result<Uuid>;
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
}
|
224
meilisearch-lib/src/index_controller/uuid_resolver/store.rs
Normal file
224
meilisearch-lib/src/index_controller/uuid_resolver/store.rs
Normal file
|
@ -0,0 +1,224 @@
|
|||
use std::collections::HashSet;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use heed::types::{ByteSlice, Str};
|
||||
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{error::UuidResolverError, Result, UUID_STORE_SIZE};
|
||||
use crate::EnvSizer;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct DumpEntry {
|
||||
uuid: Uuid,
|
||||
uid: String,
|
||||
}
|
||||
|
||||
const UUIDS_DB_PATH: &str = "index_uuids";
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait UuidStore: Sized {
|
||||
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
||||
// the uuid otherwise.
|
||||
async fn get_uuid(&self, uid: String) -> Result<Option<Uuid>>;
|
||||
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HeedUuidStore {
|
||||
env: Env,
|
||||
db: Database<Str, ByteSlice>,
|
||||
}
|
||||
|
||||
impl HeedUuidStore {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(UUIDS_DB_PATH);
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(UUID_STORE_SIZE); // 1GB
|
||||
let env = options.open(path)?;
|
||||
let db = env.create_database(None)?;
|
||||
Ok(Self { env, db })
|
||||
}
|
||||
|
||||
pub fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
match db.get(&txn, &name)? {
|
||||
Some(uuid) => {
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
match db.get(&txn, &uid)? {
|
||||
Some(uuid) => {
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
db.delete(&mut txn, &uid)?;
|
||||
txn.commit()?;
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
let mut entries = Vec::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (name, uuid) = entry?;
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
entries.push((name.to_owned(), uuid))
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
|
||||
if db.get(&txn, &name)?.is_some() {
|
||||
return Err(UuidResolverError::NameAlreadyExist);
|
||||
}
|
||||
|
||||
db.put(&mut txn, &name, uuid.as_bytes())?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
// Write transaction to acquire a lock on the database.
|
||||
let txn = env.write_txn()?;
|
||||
let mut entries = HashSet::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (_, uuid) = entry?;
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
entries.insert(uuid);
|
||||
}
|
||||
|
||||
// only perform snapshot if there are indexes
|
||||
if !entries.is_empty() {
|
||||
path.push(UUIDS_DB_PATH);
|
||||
create_dir_all(&path).unwrap();
|
||||
path.push("data.mdb");
|
||||
env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub fn get_size(&self) -> Result<u64> {
|
||||
Ok(self.env.size())
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let dump_path = path.join(UUIDS_DB_PATH);
|
||||
create_dir_all(&dump_path)?;
|
||||
let dump_file_path = dump_path.join("data.jsonl");
|
||||
let mut dump_file = File::create(&dump_file_path)?;
|
||||
let mut uuids = HashSet::new();
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (uid, uuid) = entry?;
|
||||
let uid = uid.to_string();
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
|
||||
let entry = DumpEntry { uuid, uid };
|
||||
serde_json::to_writer(&mut dump_file, &entry)?;
|
||||
dump_file.write_all(b"\n").unwrap();
|
||||
|
||||
uuids.insert(uuid);
|
||||
}
|
||||
|
||||
Ok(uuids)
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH);
|
||||
std::fs::create_dir_all(&uuid_resolver_path)?;
|
||||
|
||||
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
|
||||
let indexes = File::open(&src_indexes)?;
|
||||
let mut indexes = BufReader::new(indexes);
|
||||
let mut line = String::new();
|
||||
|
||||
let db = Self::new(dst)?;
|
||||
let mut txn = db.env.write_txn()?;
|
||||
|
||||
loop {
|
||||
match indexes.read_line(&mut line) {
|
||||
Ok(0) => break,
|
||||
Ok(_) => {
|
||||
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
|
||||
println!("importing {} {}", uid, uuid);
|
||||
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
|
||||
line.clear();
|
||||
}
|
||||
txn.commit()?;
|
||||
|
||||
db.env.prepare_for_closing().wait();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UuidStore for HeedUuidStore {
|
||||
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.get_uuid(name)).await?
|
||||
}
|
||||
|
||||
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.list()).await?
|
||||
}
|
||||
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
|
||||
}
|
||||
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
|
||||
}
|
||||
|
||||
async fn get_size(&self) -> Result<u64> {
|
||||
self.get_size()
|
||||
}
|
||||
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.dump(path)).await?
|
||||
}
|
||||
}
|
53
meilisearch-lib/src/lib.rs
Normal file
53
meilisearch-lib/src/lib.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod options;
|
||||
|
||||
pub mod index;
|
||||
pub mod index_controller;
|
||||
|
||||
pub use index_controller::{UpdateResult, UpdateStatus, IndexController as MeiliSearch, update_actor::RegisterUpdate};
|
||||
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub trait EnvSizer {
|
||||
fn size(&self) -> u64;
|
||||
}
|
||||
|
||||
impl EnvSizer for heed::Env {
|
||||
fn size(&self) -> u64 {
|
||||
WalkDir::new(self.path())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len())
|
||||
}
|
||||
}
|
||||
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
|
||||
use tar::{Archive, Builder};
|
||||
|
||||
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let mut f = File::create(dest)?;
|
||||
let gz_encoder = GzEncoder::new(&mut f, Compression::default());
|
||||
let mut tar_encoder = Builder::new(gz_encoder);
|
||||
tar_encoder.append_dir_all(".", src)?;
|
||||
let gz_encoder = tar_encoder.into_inner()?;
|
||||
gz_encoder.finish()?;
|
||||
f.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let f = File::open(&src)?;
|
||||
let gz = GzDecoder::new(f);
|
||||
let mut ar = Archive::new(gz);
|
||||
create_dir_all(&dest)?;
|
||||
ar.unpack(&dest)?;
|
||||
Ok(())
|
||||
}
|
||||
|
115
meilisearch-lib/src/options.rs
Normal file
115
meilisearch-lib/src/options.rs
Normal file
|
@ -0,0 +1,115 @@
|
|||
use core::fmt;
|
||||
use std::{ops::Deref, str::FromStr};
|
||||
|
||||
use byte_unit::{Byte, ByteError};
|
||||
use milli::CompressionType;
|
||||
use structopt::StructOpt;
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
|
||||
#[derive(Debug, Clone, StructOpt)]
|
||||
pub struct IndexerOpts {
|
||||
/// The amount of documents to skip before printing
|
||||
/// a log regarding the indexing advancement.
|
||||
#[structopt(long, default_value = "100000")] // 100k
|
||||
pub log_every_n: usize,
|
||||
|
||||
/// Grenad max number of chunks in bytes.
|
||||
#[structopt(long)]
|
||||
pub max_nb_chunks: Option<usize>,
|
||||
|
||||
/// The maximum amount of memory the indexer will use. It defaults to 2/3
|
||||
/// of the available memory. It is recommended to use something like 80%-90%
|
||||
/// of the available memory, no more.
|
||||
///
|
||||
/// In case the engine is unable to retrieve the available memory the engine will
|
||||
/// try to use the memory it needs but without real limit, this can lead to
|
||||
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
|
||||
#[structopt(long, default_value)]
|
||||
pub max_memory: MaxMemory,
|
||||
|
||||
/// The name of the compression algorithm to use when compressing intermediate
|
||||
/// Grenad chunks while indexing documents.
|
||||
///
|
||||
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
|
||||
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
|
||||
pub chunk_compression_type: CompressionType,
|
||||
|
||||
/// The level of compression of the chosen algorithm.
|
||||
#[structopt(long, requires = "chunk-compression-type")]
|
||||
pub chunk_compression_level: Option<u32>,
|
||||
|
||||
/// Number of parallel jobs for indexing, defaults to # of CPUs.
|
||||
#[structopt(long)]
|
||||
pub indexing_jobs: Option<usize>,
|
||||
}
|
||||
|
||||
impl Default for IndexerOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
log_every_n: 100_000,
|
||||
max_nb_chunks: None,
|
||||
max_memory: MaxMemory::default(),
|
||||
chunk_compression_type: CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
indexing_jobs: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A type used to detect the max memory available and use 2/3 of it.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct MaxMemory(Option<Byte>);
|
||||
|
||||
impl FromStr for MaxMemory {
|
||||
type Err = ByteError;
|
||||
|
||||
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
|
||||
Byte::from_str(s).map(Some).map(MaxMemory)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxMemory {
|
||||
fn default() -> MaxMemory {
|
||||
MaxMemory(
|
||||
total_memory_bytes()
|
||||
.map(|bytes| bytes * 2 / 3)
|
||||
.map(Byte::from_bytes),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxMemory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
|
||||
None => f.write_str("unknown"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MaxMemory {
|
||||
type Target = Option<Byte>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl MaxMemory {
|
||||
pub fn unlimited() -> Self {
|
||||
Self(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
||||
fn total_memory_bytes() -> Option<u64> {
|
||||
if System::IS_SUPPORTED {
|
||||
let memory_kind = RefreshKind::new().with_memory();
|
||||
let mut system = System::new_with_specifics(memory_kind);
|
||||
system.refresh_memory();
|
||||
Some(system.total_memory() * 1024) // KiB into bytes
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue