split meilisearch-http and meilisearch-lib

This commit is contained in:
mpostma 2021-09-21 13:23:22 +02:00
parent 09d4e37044
commit 60518449fc
63 changed files with 608 additions and 324 deletions

View file

@ -4,8 +4,8 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use log::debug;
use serde::Serialize;
use siphasher::sip::SipHasher;
use meilisearch_lib::MeiliSearch;
use crate::Data;
use crate::Opt;
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
@ -18,8 +18,8 @@ struct EventProperties {
}
impl EventProperties {
async fn from(data: Data) -> anyhow::Result<EventProperties> {
let stats = data.index_controller.get_all_stats().await?;
async fn from(data: MeiliSearch) -> anyhow::Result<EventProperties> {
let stats = data.get_all_stats().await?;
let database_size = stats.database_size;
let last_update_timestamp = stats.last_update.map(|u| u.timestamp());
@ -62,7 +62,7 @@ struct AmplitudeRequest<'a> {
events: Vec<Event<'a>>,
}
pub async fn analytics_sender(data: Data, opt: Opt) {
pub async fn analytics_sender(data: MeiliSearch, opt: Opt) {
let username = whoami::username();
let hostname = whoami::hostname();
let platform = whoami::platform();

View file

@ -1,86 +0,0 @@
use std::ops::Deref;
use std::sync::Arc;
use crate::index::{Checked, Settings};
use crate::index_controller::{
error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats,
};
use crate::option::Opt;
pub mod search;
mod updates;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
pub struct DataInner {
pub index_controller: IndexController,
//pub api_keys: ApiKeys,
}
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
let index_controller = IndexController::new(&path, &options)?;
let inner = DataInner {
index_controller,
};
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
self.index_controller.settings(uid).await
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes().await
}
pub async fn index(&self, uid: String) -> Result<IndexMetadata> {
self.index_controller.get_index(uid).await
}
//pub async fn create_index(
//&self,
//uid: String,
//primary_key: Option<String>,
//) -> Result<IndexMetadata> {
//let settings = IndexSettings {
//uid: Some(uid),
//primary_key,
//};
//let meta = self.index_controller.create_index(settings).await?;
//Ok(meta)
//}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
Ok(self.index_controller.get_index_stats(uid).await?)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
Ok(self.index_controller.get_all_stats().await?)
}
pub async fn create_dump(&self) -> Result<DumpInfo> {
Ok(self.index_controller.create_dump().await?)
}
pub async fn dump_status(&self, uid: String) -> Result<DumpInfo> {
Ok(self.index_controller.dump_info(uid).await?)
}
}

View file

@ -1,34 +0,0 @@
use serde_json::{Map, Value};
use super::Data;
use crate::index::{SearchQuery, SearchResult};
use crate::index_controller::error::Result;
impl Data {
pub async fn search(&self, index: String, search_query: SearchQuery) -> Result<SearchResult> {
self.index_controller.search(index, search_query).await
}
pub async fn retrieve_documents(
&self,
index: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Map<String, Value>>> {
self.index_controller
.documents(index, offset, limit, attributes_to_retrieve)
.await
}
pub async fn retrieve_document(
&self,
index: String,
document_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Map<String, Value>> {
self.index_controller
.document(index, document_id, attributes_to_retrieve)
.await
}
}

View file

@ -1,32 +0,0 @@
use crate::index_controller::Update;
use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus};
use crate::Data;
impl Data {
pub async fn register_update(&self, index_uid: &str, update: Update) -> Result<UpdateStatus> {
let status = self.index_controller.register_update(index_uid, update).await?;
Ok(status)
}
pub async fn get_update_status(&self, index: String, uid: u64) -> Result<UpdateStatus> {
self.index_controller.update_status(index, uid).await
}
pub async fn get_updates_status(&self, index: String) -> Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index).await
}
pub async fn update_index(
&self,
uid: String,
primary_key: Option<String>,
new_uid: Option<String>,
) -> Result<IndexMetadata> {
let settings = IndexSettings {
uid: new_uid,
primary_key,
};
self.index_controller.update_index(uid, settings).await
}
}

View file

@ -55,18 +55,6 @@ impl aweb::error::ResponseError for ResponseError {
}
}
macro_rules! internal_error {
($target:ty : $($other:path), *) => {
$(
impl From<$other> for $target {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);

View file

@ -1,166 +0,0 @@
use std::fs::File;
use std::io::Write;
use std::path::Path;
use heed::RoTxn;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use crate::option::IndexerOpts;
use super::error::Result;
use super::{Index, Settings, Unchecked};
#[derive(Serialize, Deserialize)]
struct DumpMeta {
settings: Settings<Unchecked>,
primary_key: Option<String>,
}
const META_FILE_NAME: &str = "meta.json";
const DATA_FILE_NAME: &str = "documents.jsonl";
impl Index {
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
// acquire write txn make sure any ongoing write is finished before we start.
let txn = self.env.write_txn()?;
self.dump_documents(&txn, &path)?;
self.dump_meta(&txn, &path)?;
Ok(())
}
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
let mut document_file = File::create(&document_file_path)?;
let documents = self.all_documents(txn)?;
let fields_ids_map = self.fields_ids_map(txn)?;
// dump documents
let mut json_map = IndexMap::new();
for document in documents {
let (_, reader) = document?;
for (fid, bytes) in reader.iter() {
if let Some(name) = fields_ids_map.name(fid) {
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
}
}
serde_json::to_writer(&mut document_file, &json_map)?;
document_file.write_all(b"\n")?;
json_map.clear();
}
Ok(())
}
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
let meta_file_path = path.as_ref().join(META_FILE_NAME);
let mut meta_file = File::create(&meta_file_path)?;
let settings = self.settings_txn(txn)?.into_unchecked();
let primary_key = self.primary_key(txn)?.map(String::from);
let meta = DumpMeta {
settings,
primary_key,
};
serde_json::to_writer(&mut meta_file, &meta)?;
Ok(())
}
pub fn load_dump(
_src: impl AsRef<Path>,
_dst: impl AsRef<Path>,
_size: usize,
_indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
//let dir_name = src
//.as_ref()
//.file_name()
//.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
//let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
//create_dir_all(&dst_dir_path)?;
//let meta_path = src.as_ref().join(META_FILE_NAME);
//let mut meta_file = File::open(meta_path)?;
//// We first deserialize the dump meta into a serde_json::Value and change
//// the custom ranking rules settings from the old format to the new format.
//let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
//if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
//convert_custom_ranking_rules(ranking_rules);
//}
//// Then we serialize it back into a vec to deserialize it
//// into a `DumpMeta` struct with the newly patched `rankingRules` format.
//let patched_meta = serde_json::to_vec(&meta)?;
//let DumpMeta {
//settings,
//primary_key,
//} = serde_json::from_slice(&patched_meta)?;
//let settings = settings.check();
//let index = Self::open(&dst_dir_path, size)?;
//let mut txn = index.write_txn()?;
//let handler = UpdateHandler::new(indexing_options)?;
//index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
//let document_file_path = src.as_ref().join(DATA_FILE_NAME);
//let reader = File::open(&document_file_path)?;
//let mut reader = BufReader::new(reader);
//reader.fill_buf()?;
// If the document file is empty, we don't perform the document addition, to prevent
// a primary key error to be thrown.
todo!("fix obk document dumps")
//if !reader.buffer().is_empty() {
//index.update_documents_txn(
//&mut txn,
//IndexDocumentsMethod::UpdateDocuments,
//Some(reader),
//handler.update_builder(0),
//primary_key.as_deref(),
//)?;
//}
//txn.commit()?;
//match Arc::try_unwrap(index.0) {
//Ok(inner) => inner.prepare_for_closing().wait(),
//Err(_) => bail!("Could not close index properly."),
//}
//Ok(())
}
}
// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
// ///
// /// This is done for compatibility reasons, and to avoid a new dump version,
// /// since the new syntax was introduced soon after the new dump version.
//fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
//*ranking_rules = match ranking_rules.take() {
//Value::Array(values) => values
//.into_iter()
//.filter_map(|value| match value {
//Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
//.map(|f| format!("{}:asc", f))
//.map(Value::String),
//Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
//.map(|f| format!("{}:desc", f))
//.map(Value::String),
//otherwise => Some(otherwise),
//})
//.collect(),
//otherwise => otherwise,
//}
//}

View file

@ -1,52 +0,0 @@
use std::error::Error;
use meilisearch_error::{Code, ErrorCode};
use serde_json::Value;
use crate::error::MilliError;
pub type Result<T> = std::result::Result<T, IndexError>;
#[derive(Debug, thiserror::Error)]
pub enum IndexError {
#[error("Internal error: {0}")]
Internal(Box<dyn Error + Send + Sync + 'static>),
#[error("Document with id {0} not found.")]
DocumentNotFound(String),
#[error("{0}")]
Facet(#[from] FacetError),
#[error("{0}")]
Milli(#[from] milli::Error),
}
internal_error!(
IndexError: std::io::Error,
heed::Error,
fst::Error,
serde_json::Error
);
impl ErrorCode for IndexError {
fn error_code(&self) -> Code {
match self {
IndexError::Internal(_) => Code::Internal,
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
IndexError::Facet(e) => e.error_code(),
IndexError::Milli(e) => MilliError(e).error_code(),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum FacetError {
#[error("Invalid facet expression, expected {}, found: {1}", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
}
impl ErrorCode for FacetError {
fn error_code(&self) -> Code {
match self {
FacetError::InvalidExpression(_, _) => Code::Facet,
}
}
}

View file

@ -1,202 +0,0 @@
use std::collections::{BTreeSet, HashSet};
use std::fs::create_dir_all;
use std::marker::PhantomData;
use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use heed::{EnvOpenOptions, RoTxn};
use milli::update::Setting;
use milli::{obkv_to_json, FieldId};
use serde_json::{Map, Value};
use error::Result;
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Checked, Facets, Settings, Unchecked};
use crate::helpers::EnvSizer;
use crate::index_controller::update_file_store::UpdateFileStore;
use self::error::IndexError;
pub mod error;
pub mod update_handler;
mod dump;
mod search;
mod updates;
pub type Document = Map<String, Value>;
#[derive(Clone)]
pub struct Index {
pub inner: Arc<milli::Index>,
update_file_store: Arc<UpdateFileStore>,
}
impl Deref for Index {
type Target = milli::Index;
fn deref(&self) -> &Self::Target {
self.inner.as_ref()
}
}
impl Index {
pub fn open(path: impl AsRef<Path>, size: usize, update_file_store: Arc<UpdateFileStore>) -> Result<Self> {
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
let inner = Arc::new(milli::Index::new(options, &path)?);
Ok(Index { inner, update_file_store })
}
pub fn settings(&self) -> Result<Settings<Checked>> {
let txn = self.read_txn()?;
self.settings_txn(&txn)
}
pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> {
let displayed_attributes = self
.displayed_fields(txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let searchable_attributes = self
.searchable_fields(txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
let criteria = self
.criteria(txn)?
.into_iter()
.map(|c| c.to_string())
.collect();
let stop_words = self
.stop_words(txn)?
.map(|stop_words| -> Result<BTreeSet<_>> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_else(BTreeSet::new);
let distinct_field = self.distinct_field(txn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms = self
.synonyms(txn)?
.iter()
.map(|(key, values)| {
(
key.join(" "),
values.iter().map(|value| value.join(" ")).collect(),
)
})
.collect();
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
searchable_attributes: match searchable_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
_kind: PhantomData,
})
}
pub fn retrieve_documents<S: AsRef<str>>(
&self,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Vec<Map<String, Value>>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
let mut documents = Vec::new();
for entry in iter {
let (_id, obkv) = entry?;
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
documents.push(object);
}
Ok(documents)
}
pub fn retrieve_document<S: AsRef<str>>(
&self,
doc_id: String,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Map<String, Value>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
let internal_id = self
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
let document = self
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or(IndexError::DocumentNotFound(doc_id))?;
let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)?;
Ok(document)
}
pub fn size(&self) -> u64 {
self.env.size()
}
fn fields_to_display<S: AsRef<str>>(
&self,
txn: &heed::RoTxn,
attributes_to_retrieve: &Option<Vec<S>>,
fields_ids_map: &milli::FieldsIdsMap,
) -> Result<Vec<FieldId>> {
let mut displayed_fields_ids = match self.displayed_fields_ids(txn)? {
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<HashSet<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
Ok(displayed_fields_ids)
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,72 +0,0 @@
use crate::index::Index;
use milli::update::UpdateBuilder;
use milli::CompressionType;
use rayon::ThreadPool;
use crate::index_controller::update_actor::RegisterUpdate;
use crate::index_controller::{Failed, Processed, Processing};
use crate::option::IndexerOpts;
pub struct UpdateHandler {
max_nb_chunks: Option<usize>,
chunk_compression_level: Option<u32>,
thread_pool: ThreadPool,
log_frequency: usize,
max_memory: Option<usize>,
chunk_compression_type: CompressionType,
}
impl UpdateHandler {
pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2))
.build()?;
Ok(Self {
max_nb_chunks: opt.max_nb_chunks,
chunk_compression_level: opt.chunk_compression_level,
thread_pool,
log_frequency: opt.log_every_n,
max_memory: opt.max_memory.map(|m| m.get_bytes() as usize),
chunk_compression_type: opt.chunk_compression_type,
})
}
pub fn update_builder(&self, update_id: u64) -> UpdateBuilder {
// We prepare the update by using the update builder.
let mut update_builder = UpdateBuilder::new(update_id);
if let Some(max_nb_chunks) = self.max_nb_chunks {
update_builder.max_nb_chunks(max_nb_chunks);
}
if let Some(chunk_compression_level) = self.chunk_compression_level {
update_builder.chunk_compression_level(chunk_compression_level);
}
update_builder.thread_pool(&self.thread_pool);
update_builder.log_every_n(self.log_frequency);
if let Some(max_memory) = self.max_memory {
update_builder.max_memory(max_memory);
}
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder
}
pub fn handle_update(
&self,
index: Index,
meta: Processing,
) -> Result<Processed, Failed> {
let update_id = meta.id();
let update_builder = self.update_builder(update_id);
let result = match meta.meta() {
RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => {
index.update_documents(*method, *content_uuid, update_builder, primary_key.as_deref())
}
};
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.into())),
}
}
}

View file

@ -1,366 +0,0 @@
use std::collections::{BTreeMap, BTreeSet};
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use log::{debug, info, trace};
use milli::documents::DocumentBatchReader;
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
use serde::{Deserialize, Serialize, Serializer};
use uuid::Uuid;
use crate::index_controller::UpdateResult;
use super::Index;
use super::error::Result;
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
let wildcard = vec!["*".to_string()];
match field {
Setting::Set(value) => Some(value),
Setting::Reset => Some(&wildcard),
Setting::NotSet => None,
}
.serialize(s)
}
#[derive(Clone, Default, Debug, Serialize)]
pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
pub struct Unchecked;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
pub struct Settings<T> {
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
pub displayed_attributes: Setting<Vec<String>>,
#[serde(
default,
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Setting::is_not_set"
)]
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub ranking_rules: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub distinct_attribute: Setting<String>,
#[serde(skip)]
pub _kind: PhantomData<T>,
}
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
_kind: PhantomData,
}
}
pub fn into_unchecked(self) -> Settings<Unchecked> {
let Self {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
..
} = self;
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
_kind: PhantomData,
}
}
}
impl Settings<Unchecked> {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
_kind: PhantomData,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
impl Index {
pub fn update_documents(
&self,
method: IndexDocumentsMethod,
content_uuid: Uuid,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> Result<UpdateResult> {
let mut txn = self.write_txn()?;
let result = self.update_documents_txn(&mut txn, method, content_uuid, update_builder, primary_key)?;
txn.commit()?;
Ok(result)
}
pub fn update_documents_txn<'a, 'b>(
&'a self,
txn: &mut heed::RwTxn<'a, 'b>,
method: IndexDocumentsMethod,
content_uuid: Uuid,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> Result<UpdateResult> {
trace!("performing document addition");
// Set the primary key if not set already, ignore if already set.
if let (None, Some(primary_key)) = (self.primary_key(txn)?, primary_key) {
let mut builder = UpdateBuilder::new(0).settings(txn, self);
builder.set_primary_key(primary_key.to_string());
builder.execute(|_, _| ())?;
}
let indexing_callback =
|indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step);
let content_file = self.update_file_store.get_update(content_uuid).unwrap();
let reader = DocumentBatchReader::from_reader(content_file).unwrap();
let mut builder = update_builder.index_documents(txn, self);
builder.index_documents_method(method);
let addition = builder.execute(reader, indexing_callback)?;
info!("document addition done: {:?}", addition);
Ok(UpdateResult::DocumentsAddition(addition))
}
//pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> {
//// We must use the write transaction of the update here.
//let mut wtxn = self.write_txn()?;
//let builder = update_builder.clear_documents(&mut wtxn, self);
//let _count = builder.execute()?;
//wtxn.commit()
//.and(Ok(UpdateResult::Other))
//.map_err(Into::into)
//}
//pub fn update_settings_txn<'a, 'b>(
//&'a self,
//txn: &mut heed::RwTxn<'a, 'b>,
//settings: &Settings<Checked>,
//update_builder: UpdateBuilder,
//) -> Result<UpdateResult> {
//// We must use the write transaction of the update here.
//let mut builder = update_builder.settings(txn, self);
//match settings.searchable_attributes {
//Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
//Setting::Reset => builder.reset_searchable_fields(),
//Setting::NotSet => (),
//}
//match settings.displayed_attributes {
//Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
//Setting::Reset => builder.reset_displayed_fields(),
//Setting::NotSet => (),
//}
//match settings.filterable_attributes {
//Setting::Set(ref facets) => {
//builder.set_filterable_fields(facets.clone().into_iter().collect())
//}
//Setting::Reset => builder.reset_filterable_fields(),
//Setting::NotSet => (),
//}
//match settings.sortable_attributes {
//Setting::Set(ref fields) => {
//builder.set_sortable_fields(fields.iter().cloned().collect())
//}
//Setting::Reset => builder.reset_sortable_fields(),
//Setting::NotSet => (),
//}
//match settings.ranking_rules {
//Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
//Setting::Reset => builder.reset_criteria(),
//Setting::NotSet => (),
//}
//match settings.stop_words {
//Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
//Setting::Reset => builder.reset_stop_words(),
//Setting::NotSet => (),
//}
//match settings.synonyms {
//Setting::Set(ref synonyms) => {
//builder.set_synonyms(synonyms.clone().into_iter().collect())
//}
//Setting::Reset => builder.reset_synonyms(),
//Setting::NotSet => (),
//}
//match settings.distinct_attribute {
//Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
//Setting::Reset => builder.reset_distinct_field(),
//Setting::NotSet => (),
//}
//builder.execute(|indexing_step, update_id| {
//debug!("update {}: {:?}", update_id, indexing_step)
//})?;
//Ok(UpdateResult::Other)
//}
//pub fn update_settings(
//&self,
//settings: &Settings<Checked>,
//update_builder: UpdateBuilder,
//) -> Result<UpdateResult> {
//let mut txn = self.write_txn()?;
//let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
//txn.commit()?;
//Ok(result)
//}
//pub fn delete_documents(
//&self,
//document_ids: &[String],
//update_builder: UpdateBuilder,
//) -> Result<UpdateResult> {
//let mut txn = self.write_txn()?;
//let mut builder = update_builder.delete_documents(&mut txn, self)?;
//// We ignore unexisting document ids
//document_ids.iter().for_each(|id| {
//builder.delete_external_id(id);
//});
//let deleted = builder.execute()?;
//txn.commit()
//.and(Ok(UpdateResult::DocumentDeletion { deleted }))
//.map_err(Into::into)
//}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_setting_check() {
// test no changes
let settings = Settings {
displayed_attributes: Setting::Set(vec![String::from("hello")]),
searchable_attributes: Setting::Set(vec![String::from("hello")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
let checked = settings.clone().check();
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
assert_eq!(
settings.searchable_attributes,
checked.searchable_attributes
);
// test wildcard
// test no changes
let settings = Settings {
displayed_attributes: Setting::Set(vec![String::from("*")]),
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
let checked = settings.check();
assert_eq!(checked.displayed_attributes, Setting::Reset);
assert_eq!(checked.searchable_attributes, Setting::Reset);
}
}

View file

@ -1,157 +0,0 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use async_stream::stream;
use chrono::Utc;
use futures::{lock::Mutex, stream::StreamExt};
use log::{error, trace};
use tokio::sync::{mpsc, oneshot, RwLock};
use update_actor::UpdateActorHandle;
use uuid_resolver::UuidResolverHandle;
use super::error::{DumpActorError, Result};
use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask};
use crate::index_controller::{update_actor, uuid_resolver};
pub const CONCURRENT_DUMP_MSG: usize = 10;
pub struct DumpActor<UuidResolver, Update> {
inbox: Option<mpsc::Receiver<DumpMsg>>,
uuid_resolver: UuidResolver,
update: Update,
dump_path: PathBuf,
lock: Arc<Mutex<()>>,
dump_infos: Arc<RwLock<HashMap<String, DumpInfo>>>,
update_db_size: usize,
index_db_size: usize,
}
/// Generate uid from creation date
fn generate_uid() -> String {
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
}
impl<UuidResolver, Update> DumpActor<UuidResolver, Update>
where
UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static,
Update: UpdateActorHandle + Send + Sync + Clone + 'static,
{
pub fn new(
inbox: mpsc::Receiver<DumpMsg>,
uuid_resolver: UuidResolver,
update: Update,
dump_path: impl AsRef<Path>,
index_db_size: usize,
update_db_size: usize,
) -> Self {
let dump_infos = Arc::new(RwLock::new(HashMap::new()));
let lock = Arc::new(Mutex::new(()));
Self {
inbox: Some(inbox),
uuid_resolver,
update,
dump_path: dump_path.as_ref().into(),
dump_infos,
lock,
index_db_size,
update_db_size,
}
}
pub async fn run(mut self) {
trace!("Started dump actor.");
let mut inbox = self
.inbox
.take()
.expect("Dump Actor must have a inbox at this point.");
let stream = stream! {
loop {
match inbox.recv().await {
Some(msg) => yield msg,
None => break,
}
}
};
stream
.for_each_concurrent(Some(CONCURRENT_DUMP_MSG), |msg| self.handle_message(msg))
.await;
error!("Dump actor stopped.");
}
async fn handle_message(&self, msg: DumpMsg) {
use DumpMsg::*;
match msg {
CreateDump { ret } => {
let _ = self.handle_create_dump(ret).await;
}
DumpInfo { ret, uid } => {
let _ = ret.send(self.handle_dump_info(uid).await);
}
}
}
async fn handle_create_dump(&self, ret: oneshot::Sender<Result<DumpInfo>>) {
let uid = generate_uid();
let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress);
let _lock = match self.lock.try_lock() {
Some(lock) => lock,
None => {
ret.send(Err(DumpActorError::DumpAlreadyRunning))
.expect("Dump actor is dead");
return;
}
};
self.dump_infos
.write()
.await
.insert(uid.clone(), info.clone());
ret.send(Ok(info)).expect("Dump actor is dead");
let task = DumpTask {
path: self.dump_path.clone(),
uuid_resolver: self.uuid_resolver.clone(),
update_handle: self.update.clone(),
uid: uid.clone(),
update_db_size: self.update_db_size,
index_db_size: self.index_db_size,
};
let task_result = tokio::task::spawn(task.run()).await;
let mut dump_infos = self.dump_infos.write().await;
let dump_infos = dump_infos
.get_mut(&uid)
.expect("dump entry deleted while lock was acquired");
match task_result {
Ok(Ok(())) => {
dump_infos.done();
trace!("Dump succeed");
}
Ok(Err(e)) => {
dump_infos.with_error(e.to_string());
error!("Dump failed: {}", e);
}
Err(_) => {
dump_infos.with_error("Unexpected error while performing dump.".to_string());
error!("Dump panicked. Dump status set to failed");
}
};
}
async fn handle_dump_info(&self, uid: String) -> Result<DumpInfo> {
match self.dump_infos.read().await.get(&uid) {
Some(info) => Ok(info.clone()),
_ => Err(DumpActorError::DumpDoesNotExist(uid)),
}
}
}

View file

@ -1,52 +0,0 @@
use meilisearch_error::{Code, ErrorCode};
use crate::index_controller::update_actor::error::UpdateActorError;
use crate::index_controller::uuid_resolver::error::UuidResolverError;
pub type Result<T> = std::result::Result<T, DumpActorError>;
#[derive(thiserror::Error, Debug)]
pub enum DumpActorError {
#[error("Another dump is already in progress")]
DumpAlreadyRunning,
#[error("Dump `{0}` not found")]
DumpDoesNotExist(String),
#[error("Internal error: {0}")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
UuidResolver(#[from] UuidResolverError),
#[error("{0}")]
UpdateActor(#[from] UpdateActorError),
}
macro_rules! internal_error {
($($other:path), *) => {
$(
impl From<$other> for DumpActorError {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
internal_error!(
heed::Error,
std::io::Error,
tokio::task::JoinError,
serde_json::error::Error,
tempfile::PersistError
);
impl ErrorCode for DumpActorError {
fn error_code(&self) -> Code {
match self {
DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress,
DumpActorError::DumpDoesNotExist(_) => Code::NotFound,
DumpActorError::Internal(_) => Code::Internal,
DumpActorError::UuidResolver(e) => e.error_code(),
DumpActorError::UpdateActor(e) => e.error_code(),
}
}
}

View file

@ -1,52 +0,0 @@
use std::path::Path;
use tokio::sync::{mpsc, oneshot};
use super::error::Result;
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg};
#[derive(Clone)]
pub struct DumpActorHandleImpl {
sender: mpsc::Sender<DumpMsg>,
}
#[async_trait::async_trait]
impl DumpActorHandle for DumpActorHandleImpl {
async fn create_dump(&self) -> Result<DumpInfo> {
let (ret, receiver) = oneshot::channel();
let msg = DumpMsg::CreateDump { ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("IndexActor has been killed")
}
async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
let (ret, receiver) = oneshot::channel();
let msg = DumpMsg::DumpInfo { ret, uid };
let _ = self.sender.send(msg).await;
receiver.await.expect("IndexActor has been killed")
}
}
impl DumpActorHandleImpl {
pub fn new(
path: impl AsRef<Path>,
uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl,
update: crate::index_controller::update_actor::UpdateActorHandleImpl,
index_db_size: usize,
update_db_size: usize,
) -> anyhow::Result<Self> {
let (sender, receiver) = mpsc::channel(10);
let actor = DumpActor::new(
receiver,
uuid_resolver,
update,
path,
index_db_size,
update_db_size,
);
tokio::task::spawn(actor.run());
Ok(Self { sender })
}
}

View file

@ -1,2 +0,0 @@
pub mod v1;
pub mod v2;

View file

@ -1,224 +0,0 @@
use std::collections::{BTreeMap, BTreeSet};
use std::marker::PhantomData;
use std::path::Path;
use log::{error, info, warn};
use milli::update::Setting;
use serde::{Deserialize, Deserializer, Serialize};
use uuid::Uuid;
use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata};
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
use crate::{
index::Unchecked,
option::IndexerOpts,
};
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct MetadataV1 {
db_version: String,
indexes: Vec<IndexMetadata>,
}
impl MetadataV1 {
pub fn load_dump(
self,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
size: usize,
indexer_options: &IndexerOpts,
) -> anyhow::Result<()> {
info!(
"Loading dump, dump database version: {}, dump version: V1",
self.db_version
);
let uuid_store = HeedUuidStore::new(&dst)?;
for index in self.indexes {
let uuid = Uuid::new_v4();
uuid_store.insert(index.uid.clone(), uuid)?;
let src = src.as_ref().join(index.uid);
load_index(
&src,
&dst,
uuid,
index.meta.primary_key.as_deref(),
size,
indexer_options,
)?;
}
Ok(())
}
}
pub fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result<Option<T>, D::Error>
where
T: Deserialize<'de>,
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(Some)
}
// These are the settings used in legacy meilisearch (<v0.21.0).
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct Settings {
#[serde(default, deserialize_with = "deserialize_some")]
pub ranking_rules: Option<Option<Vec<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub distinct_attribute: Option<Option<String>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub stop_words: Option<Option<BTreeSet<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub attributes_for_faceting: Option<Option<Vec<String>>>,
}
fn load_index(
_src: impl AsRef<Path>,
_dst: impl AsRef<Path>,
_uuid: Uuid,
_primary_key: Option<&str>,
_size: usize,
_indexer_options: &IndexerOpts,
) -> anyhow::Result<()> {
todo!("fix dump obkv documents")
//let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid));
//create_dir_all(&index_path)?;
//let mut options = EnvOpenOptions::new();
//options.map_size(size);
//let index = milli::Index::new(options, index_path)?;
//let index = Index(Arc::new(index));
//// extract `settings.json` file and import content
//let settings = import_settings(&src)?;
//let settings: index_controller::Settings<Unchecked> = settings.into();
//let mut txn = index.write_txn()?;
//let handler = UpdateHandler::new(indexer_options)?;
//index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?;
//let file = File::open(&src.as_ref().join("documents.jsonl"))?;
//let mut reader = std::io::BufReader::new(file);
//reader.fill_buf()?;
//if !reader.buffer().is_empty() {
//index.update_documents_txn(
//&mut txn,
//IndexDocumentsMethod::ReplaceDocuments,
//Some(reader),
//handler.update_builder(0),
//primary_key,
//)?;
//}
//txn.commit()?;
//// Finaly, we extract the original milli::Index and close it
//Arc::try_unwrap(index.0)
//.map_err(|_e| "Couldn't close the index properly")
//.unwrap()
//.prepare_for_closing()
//.wait();
//// Updates are ignored in dumps V1.
//Ok(())
}
/// we need to **always** be able to convert the old settings to the settings currently being used
impl From<Settings> for index_controller::Settings<Unchecked> {
fn from(settings: Settings) -> Self {
Self {
distinct_attribute: match settings.distinct_attribute {
Some(Some(attr)) => Setting::Set(attr),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
displayed_attributes: match settings.displayed_attributes {
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
searchable_attributes: match settings.searchable_attributes {
Some(Some(attrs)) => Setting::Set(attrs),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
filterable_attributes: match settings.attributes_for_faceting {
Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
sortable_attributes: Setting::NotSet,
ranking_rules: match settings.ranking_rules {
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| {
match criterion.as_str() {
"words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion),
s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
"wordsPosition" => {
warn!("The criteria `attribute` and `wordsPosition` have been merged \
into a single criterion `attribute` so `wordsPositon` will be \
ignored");
None
}
s => {
error!("Unknown criterion found in the dump: `{}`, it will be ignored", s);
None
}
}
}).collect()),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
stop_words: match settings.stop_words {
Some(Some(stop_words)) => Setting::Set(stop_words.into_iter().collect()),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
// we need to convert the old `Vec<String>` into a `BTreeMap<String>`
synonyms: match settings.synonyms {
Some(Some(synonyms)) => Setting::Set(synonyms.into_iter().collect()),
Some(None) => Setting::Reset,
None => Setting::NotSet
},
_kind: PhantomData,
}
}
}
// /// Extract Settings from `settings.json` file present at provided `dir_path`
//fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> {
//let path = dir_path.as_ref().join("settings.json");
//let file = File::open(path)?;
//let reader = std::io::BufReader::new(file);
//let metadata = serde_json::from_reader(reader)?;
//Ok(metadata)
//}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn settings_format_regression() {
let settings = Settings::default();
assert_eq!(
r##"{"rankingRules":null,"distinctAttribute":null,"searchableAttributes":null,"displayedAttributes":null,"stopWords":null,"synonyms":null,"attributesForFaceting":null}"##,
serde_json::to_string(&settings).unwrap()
);
}
}

View file

@ -1,59 +0,0 @@
use std::path::Path;
use chrono::{DateTime, Utc};
use log::info;
use serde::{Deserialize, Serialize};
use crate::index::Index;
use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore};
use crate::option::IndexerOpts;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct MetadataV2 {
db_version: String,
index_db_size: usize,
update_db_size: usize,
dump_date: DateTime<Utc>,
}
impl MetadataV2 {
pub fn new(index_db_size: usize, update_db_size: usize) -> Self {
Self {
db_version: env!("CARGO_PKG_VERSION").to_string(),
index_db_size,
update_db_size,
dump_date: Utc::now(),
}
}
pub fn load_dump(
self,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
update_db_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
info!(
"Loading dump from {}, dump database version: {}, dump version: V2",
self.dump_date, self.db_version
);
info!("Loading index database.");
HeedUuidStore::load_dump(src.as_ref(), &dst)?;
info!("Loading updates.");
UpdateStore::load_dump(&src, &dst, update_db_size)?;
info!("Loading indexes.");
let indexes_path = src.as_ref().join("indexes");
let indexes = indexes_path.read_dir()?;
for index in indexes {
let index = index?;
Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?;
}
Ok(())
}
}

View file

@ -1,14 +0,0 @@
use tokio::sync::oneshot;
use super::error::Result;
use super::DumpInfo;
pub enum DumpMsg {
CreateDump {
ret: oneshot::Sender<Result<DumpInfo>>,
},
DumpInfo {
uid: String,
ret: oneshot::Sender<Result<DumpInfo>>,
},
}

View file

@ -1,203 +0,0 @@
use std::fs::File;
use std::path::{Path, PathBuf};
use anyhow::Context;
use chrono::{DateTime, Utc};
use log::{info, trace, warn};
#[cfg(test)]
use mockall::automock;
use serde::{Deserialize, Serialize};
use tokio::fs::create_dir_all;
use loaders::v1::MetadataV1;
use loaders::v2::MetadataV2;
pub use actor::DumpActor;
pub use handle_impl::*;
pub use message::DumpMsg;
use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle};
use crate::index_controller::dump_actor::error::DumpActorError;
use crate::{helpers::compression, option::IndexerOpts};
use error::Result;
mod actor;
pub mod error;
mod handle_impl;
mod loaders;
mod message;
const META_FILE_NAME: &str = "metadata.json";
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait DumpActorHandle {
/// Start the creation of a dump
/// Implementation: [handle_impl::DumpActorHandleImpl::create_dump]
async fn create_dump(&self) -> Result<DumpInfo>;
/// Return the status of an already created dump
/// Implementation: [handle_impl::DumpActorHandleImpl::dump_info]
async fn dump_info(&self, uid: String) -> Result<DumpInfo>;
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "dumpVersion")]
pub enum Metadata {
V1(MetadataV1),
V2(MetadataV2),
}
impl Metadata {
pub fn new_v2(index_db_size: usize, update_db_size: usize) -> Self {
let meta = MetadataV2::new(index_db_size, update_db_size);
Self::V2(meta)
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
#[serde(rename_all = "snake_case")]
pub enum DumpStatus {
Done,
InProgress,
Failed,
}
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DumpInfo {
pub uid: String,
pub status: DumpStatus,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
started_at: DateTime<Utc>,
#[serde(skip_serializing_if = "Option::is_none")]
finished_at: Option<DateTime<Utc>>,
}
impl DumpInfo {
pub fn new(uid: String, status: DumpStatus) -> Self {
Self {
uid,
status,
error: None,
started_at: Utc::now(),
finished_at: None,
}
}
pub fn with_error(&mut self, error: String) {
self.status = DumpStatus::Failed;
self.finished_at = Some(Utc::now());
self.error = Some(error);
}
pub fn done(&mut self) {
self.finished_at = Some(Utc::now());
self.status = DumpStatus::Done;
}
pub fn dump_already_in_progress(&self) -> bool {
self.status == DumpStatus::InProgress
}
}
pub fn load_dump(
dst_path: impl AsRef<Path>,
src_path: impl AsRef<Path>,
index_db_size: usize,
update_db_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<()> {
let tmp_src = tempfile::tempdir_in(".")?;
let tmp_src_path = tmp_src.path();
compression::from_tar_gz(&src_path, tmp_src_path)?;
let meta_path = tmp_src_path.join(META_FILE_NAME);
let mut meta_file = File::open(&meta_path)?;
let meta: Metadata = serde_json::from_reader(&mut meta_file)?;
let dst_dir = dst_path
.as_ref()
.parent()
.with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?;
let tmp_dst = tempfile::tempdir_in(dst_dir)?;
match meta {
Metadata::V1(meta) => {
meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)?
}
Metadata::V2(meta) => meta.load_dump(
&tmp_src_path,
tmp_dst.path(),
index_db_size,
update_db_size,
indexer_opts,
)?,
}
// Persist and atomically rename the db
let persisted_dump = tmp_dst.into_path();
if dst_path.as_ref().exists() {
warn!("Overwriting database at {}", dst_path.as_ref().display());
std::fs::remove_dir_all(&dst_path)?;
}
std::fs::rename(&persisted_dump, &dst_path)?;
Ok(())
}
struct DumpTask<U, P> {
path: PathBuf,
uuid_resolver: U,
update_handle: P,
uid: String,
update_db_size: usize,
index_db_size: usize,
}
impl<U, P> DumpTask<U, P>
where
U: UuidResolverHandle + Send + Sync + Clone + 'static,
P: UpdateActorHandle + Send + Sync + Clone + 'static,
{
async fn run(self) -> Result<()> {
trace!("Performing dump.");
create_dir_all(&self.path).await?;
let path_clone = self.path.clone();
let temp_dump_dir =
tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??;
let temp_dump_path = temp_dump_dir.path().to_owned();
let meta = Metadata::new_v2(self.index_db_size, self.update_db_size);
let meta_path = temp_dump_path.join(META_FILE_NAME);
let mut meta_file = File::create(&meta_path)?;
serde_json::to_writer(&mut meta_file, &meta)?;
let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?;
self.update_handle
.dump(uuids, temp_dump_path.clone())
.await?;
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
compression::to_tar_gz(temp_dump_path, temp_dump_file.path())
.map_err(|e| DumpActorError::Internal(e.into()))?;
let dump_path = self.path.join(self.uid).with_extension("dump");
temp_dump_file.persist(&dump_path)?;
Ok(dump_path)
})
.await??;
info!("Created dump in {:?}.", dump_path);
Ok(())
}
}

View file

@ -1,40 +0,0 @@
use meilisearch_error::Code;
use meilisearch_error::ErrorCode;
use crate::index::error::IndexError;
use super::dump_actor::error::DumpActorError;
use super::index_actor::error::IndexActorError;
use super::update_actor::error::UpdateActorError;
use super::uuid_resolver::error::UuidResolverError;
pub type Result<T> = std::result::Result<T, IndexControllerError>;
#[derive(Debug, thiserror::Error)]
pub enum IndexControllerError {
#[error("Index creation must have an uid")]
MissingUid,
#[error("{0}")]
Uuid(#[from] UuidResolverError),
#[error("{0}")]
IndexActor(#[from] IndexActorError),
#[error("{0}")]
UpdateActor(#[from] UpdateActorError),
#[error("{0}")]
DumpActor(#[from] DumpActorError),
#[error("{0}")]
IndexError(#[from] IndexError),
}
impl ErrorCode for IndexControllerError {
fn error_code(&self) -> Code {
match self {
IndexControllerError::MissingUid => Code::BadRequest,
IndexControllerError::Uuid(e) => e.error_code(),
IndexControllerError::IndexActor(e) => e.error_code(),
IndexControllerError::UpdateActor(e) => e.error_code(),
IndexControllerError::DumpActor(e) => e.error_code(),
IndexControllerError::IndexError(e) => e.error_code(),
}
}
}

View file

@ -1,349 +0,0 @@
use std::path::PathBuf;
use std::sync::Arc;
use async_stream::stream;
use futures::stream::StreamExt;
use heed::CompactionOption;
use log::debug;
use milli::update::UpdateBuilder;
use tokio::task::spawn_blocking;
use tokio::{fs, sync::mpsc};
use uuid::Uuid;
use crate::index::{
update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings,
};
use crate::index_controller::{
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
};
use crate::option::IndexerOpts;
use super::error::{IndexActorError, Result};
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
pub const CONCURRENT_INDEX_MSG: usize = 10;
pub struct IndexActor<S> {
receiver: Option<mpsc::Receiver<IndexMsg>>,
update_handler: Arc<UpdateHandler>,
store: S,
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
pub fn new(
receiver: mpsc::Receiver<IndexMsg>,
store: S,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let update_handler = UpdateHandler::new(options)?;
let update_handler = Arc::new(update_handler);
let receiver = Some(receiver);
Ok(Self {
receiver,
update_handler,
store,
})
}
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
/// through the read channel are processed concurrently, the messages sent through the write
/// channel are processed one at a time.
pub async fn run(mut self) {
let mut receiver = self
.receiver
.take()
.expect("Index Actor must have a inbox at this point.");
let stream = stream! {
loop {
match receiver.recv().await {
Some(msg) => yield msg,
None => break,
}
}
};
stream
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
.await;
}
async fn handle_message(&self, msg: IndexMsg) {
use IndexMsg::*;
match msg {
CreateIndex {
uuid,
primary_key,
ret,
} => {
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
}
Update {
ret,
meta,
uuid,
} => {
let _ = ret.send(self.handle_update(uuid, meta).await);
}
Search { ret, query, uuid } => {
let _ = ret.send(self.handle_search(uuid, query).await);
}
Settings { ret, uuid } => {
let _ = ret.send(self.handle_settings(uuid).await);
}
Documents {
ret,
uuid,
attributes_to_retrieve,
offset,
limit,
} => {
let _ = ret.send(
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
.await,
);
}
Document {
uuid,
attributes_to_retrieve,
doc_id,
ret,
} => {
let _ = ret.send(
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
.await,
);
}
Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
GetMeta { uuid, ret } => {
let _ = ret.send(self.handle_get_meta(uuid).await);
}
UpdateIndex {
uuid,
index_settings,
ret,
} => {
let _ = ret.send(self.handle_update_index(uuid, index_settings).await);
}
Snapshot { uuid, path, ret } => {
let _ = ret.send(self.handle_snapshot(uuid, path).await);
}
Dump { uuid, path, ret } => {
let _ = ret.send(self.handle_dump(uuid, path).await);
}
GetStats { uuid, ret } => {
let _ = ret.send(self.handle_get_stats(uuid).await);
}
}
}
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result = spawn_blocking(move || index.perform_search(query)).await??;
Ok(result)
}
async fn handle_create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> Result<IndexMeta> {
let index = self.store.create(uuid, primary_key).await?;
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
Ok(meta)
}
async fn handle_update(
&self,
uuid: Uuid,
meta: Processing,
) -> Result<std::result::Result<Processed, Failed>> {
debug!("Processing update {}", meta.id());
let update_handler = self.update_handler.clone();
let index = match self.store.get(uuid).await? {
Some(index) => index,
None => self.store.create(uuid, None).await?,
};
Ok(spawn_blocking(move || update_handler.handle_update(index, meta)).await?)
}
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result = spawn_blocking(move || index.settings()).await??;
Ok(result)
}
async fn handle_fetch_documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result =
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
.await??;
Ok(result)
}
async fn handle_fetch_document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result =
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
.await??;
Ok(result)
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let index = self.store.delete(uuid).await?;
if let Some(index) = index {
tokio::task::spawn(async move {
let index = index.inner;
let store = get_arc_ownership_blocking(index).await;
spawn_blocking(move || {
store.prepare_for_closing().wait();
debug!("Index closed");
});
});
}
Ok(())
}
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
match self.store.get(uuid).await? {
Some(index) => {
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
Ok(meta)
}
None => Err(IndexActorError::UnexistingIndex),
}
}
async fn handle_update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result = spawn_blocking(move || match index_settings.primary_key {
Some(primary_key) => {
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
return Err(IndexActorError::ExistingPrimaryKey);
}
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
builder.set_primary_key(primary_key);
builder.execute(|_, _| ())?;
let meta = IndexMeta::new_txn(&index, &txn)?;
txn.commit()?;
Ok(meta)
}
None => {
let meta = IndexMeta::new(&index)?;
Ok(meta)
}
})
.await??;
Ok(result)
}
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
use tokio::fs::create_dir_all;
path.push("indexes");
create_dir_all(&path).await?;
if let Some(index) = self.store.get(uuid).await? {
let mut index_path = path.join(format!("index-{}", uuid));
create_dir_all(&index_path).await?;
index_path.push("data.mdb");
spawn_blocking(move || -> Result<()> {
// Get write txn to wait for ongoing write transaction before snapshot.
let _txn = index.write_txn()?;
index
.env
.copy_to_path(index_path, CompactionOption::Enabled)?;
Ok(())
})
.await??;
}
Ok(())
}
/// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the
/// documents and all the settings.
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let path = path.join(format!("indexes/index-{}/", uuid));
fs::create_dir_all(&path).await?;
tokio::task::spawn_blocking(move || index.dump(path)).await??;
Ok(())
}
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
spawn_blocking(move || {
let rtxn = index.read_txn()?;
Ok(IndexStats {
size: index.size(),
number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: None,
field_distribution: index.field_distribution(&rtxn)?,
})
})
.await?
}
}

View file

@ -1,48 +0,0 @@
use meilisearch_error::{Code, ErrorCode};
use crate::{error::MilliError, index::error::IndexError};
pub type Result<T> = std::result::Result<T, IndexActorError>;
#[derive(thiserror::Error, Debug)]
pub enum IndexActorError {
#[error("{0}")]
IndexError(#[from] IndexError),
#[error("Index already exists")]
IndexAlreadyExists,
#[error("Index not found")]
UnexistingIndex,
#[error("A primary key is already present. It's impossible to update it")]
ExistingPrimaryKey,
#[error("Internal Error: {0}")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
Milli(#[from] milli::Error),
}
macro_rules! internal_error {
($($other:path), *) => {
$(
impl From<$other> for IndexActorError {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
internal_error!(heed::Error, tokio::task::JoinError, std::io::Error);
impl ErrorCode for IndexActorError {
fn error_code(&self) -> Code {
match self {
IndexActorError::IndexError(e) => e.error_code(),
IndexActorError::IndexAlreadyExists => Code::IndexAlreadyExists,
IndexActorError::UnexistingIndex => Code::IndexNotFound,
IndexActorError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
IndexActorError::Internal(_) => Code::Internal,
IndexActorError::Milli(e) => MilliError(e).error_code(),
}
}
}

View file

@ -1,162 +0,0 @@
use crate::option::IndexerOpts;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::{
index::Checked,
index_controller::{IndexSettings, IndexStats, Processing},
};
use crate::{
index::{Document, SearchQuery, SearchResult, Settings},
index_controller::{Failed, Processed},
};
use super::error::Result;
use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore};
#[derive(Clone)]
pub struct IndexActorHandleImpl {
sender: mpsc::Sender<IndexMsg>,
}
#[async_trait::async_trait]
impl IndexActorHandle for IndexActorHandleImpl {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::CreateIndex {
ret,
uuid,
primary_key,
};
let _ = self.sender.send(msg).await;
receiver.await.expect("IndexActor has been killed")
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
) -> Result<std::result::Result<Processed, Failed>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update {
ret,
meta,
uuid,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Search { uuid, query, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Settings { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Documents {
uuid,
ret,
offset,
attributes_to_retrieve,
limit,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Document {
uuid,
ret,
doc_id,
attributes_to_retrieve,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetMeta { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::UpdateIndex {
uuid,
index_settings,
ret,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Snapshot { uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Dump { uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetStats { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
}
impl IndexActorHandleImpl {
pub fn new(
path: impl AsRef<Path>,
index_size: usize,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let (sender, receiver) = mpsc::channel(100);
let store = MapIndexStore::new(&path, index_size);
let actor = IndexActor::new(receiver, store, options)?;
tokio::task::spawn(actor.run());
Ok(Self { sender })
}
}

View file

@ -1,73 +0,0 @@
use std::path::PathBuf;
use tokio::sync::oneshot;
use uuid::Uuid;
use super::error::Result as IndexResult;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
use super::{IndexMeta, IndexSettings};
#[allow(clippy::large_enum_variant)]
pub enum IndexMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Update {
uuid: Uuid,
meta: Processing,
ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>,
},
Search {
uuid: Uuid,
query: SearchQuery,
ret: oneshot::Sender<IndexResult<SearchResult>>,
},
Settings {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<Settings<Checked>>>,
},
Documents {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
offset: usize,
limit: usize,
ret: oneshot::Sender<IndexResult<Vec<Document>>>,
},
Document {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
ret: oneshot::Sender<IndexResult<Document>>,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<()>>,
},
GetMeta {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
UpdateIndex {
uuid: Uuid,
index_settings: IndexSettings,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Snapshot {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<IndexResult<()>>,
},
Dump {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<IndexResult<()>>,
},
GetStats {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<IndexStats>>,
},
}

View file

@ -1,167 +0,0 @@
use std::path::PathBuf;
use chrono::{DateTime, Utc};
#[cfg(test)]
use mockall::automock;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use actor::IndexActor;
pub use actor::CONCURRENT_INDEX_MSG;
pub use handle_impl::IndexActorHandleImpl;
use message::IndexMsg;
use store::{IndexStore, MapIndexStore};
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
use error::Result;
use super::IndexSettings;
mod actor;
pub mod error;
mod handle_impl;
mod message;
mod store;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
impl IndexMeta {
fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
Ok(Self {
created_at,
updated_at,
primary_key,
})
}
}
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait IndexActorHandle {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>;
async fn update(
&self,
uuid: Uuid,
meta: Processing,
) -> Result<std::result::Result<Processed, Failed>>;
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>;
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>>;
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>>;
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>;
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>;
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use super::*;
#[async_trait::async_trait]
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
) -> Result<std::result::Result<Processed, Failed>> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
self.as_ref()
.documents(uuid, offset, limit, attributes_to_retrieve)
.await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
self.as_ref()
.document(uuid, doc_id, attributes_to_retrieve)
.await
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().dump(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
}

View file

@ -1,109 +0,0 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use milli::update::UpdateBuilder;
use tokio::fs;
use tokio::sync::RwLock;
use tokio::task::spawn_blocking;
use uuid::Uuid;
use super::error::{IndexActorError, Result};
use crate::index::Index;
use crate::index_controller::update_file_store::UpdateFileStore;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
#[async_trait::async_trait]
pub trait IndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
}
pub struct MapIndexStore {
index_store: AsyncMap<Uuid, Index>,
path: PathBuf,
index_size: usize,
update_file_store: Arc<UpdateFileStore>,
}
impl MapIndexStore {
pub fn new(path: impl AsRef<Path>, index_size: usize) -> Self {
let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap());
let path = path.as_ref().join("indexes/");
let index_store = Arc::new(RwLock::new(HashMap::new()));
Self {
index_store,
path,
index_size,
update_file_store,
}
}
}
#[async_trait::async_trait]
impl IndexStore for MapIndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
// We need to keep the lock until we are sure the db file has been opened correclty, to
// ensure that another db is not created at the same time.
let mut lock = self.index_store.write().await;
if let Some(index) = lock.get(&uuid) {
return Ok(index.clone());
}
let path = self.path.join(format!("index-{}", uuid));
if path.exists() {
return Err(IndexActorError::IndexAlreadyExists);
}
let index_size = self.index_size;
let file_store = self.update_file_store.clone();
let index = spawn_blocking(move || -> Result<Index> {
let index = Index::open(path, index_size, file_store)?;
if let Some(primary_key) = primary_key {
let mut txn = index.write_txn()?;
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
builder.set_primary_key(primary_key);
builder.execute(|_, _| ())?;
txn.commit()?;
}
Ok(index)
})
.await??;
lock.insert(uuid, index.clone());
Ok(index)
}
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
let guard = self.index_store.read().await;
match guard.get(&uuid) {
Some(index) => Ok(Some(index.clone())),
None => {
// drop the guard here so we can perform the write after without deadlocking;
drop(guard);
let path = self.path.join(format!("index-{}", uuid));
if !path.exists() {
return Ok(None);
}
let index_size = self.index_size;
let file_store = self.update_file_store.clone();
let index = spawn_blocking(move || Index::open(path, index_size, file_store)).await??;
self.index_store.write().await.insert(uuid, index.clone());
Ok(Some(index))
}
}
}
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
let db_path = self.path.join(format!("index-{}", uuid));
fs::remove_dir_all(db_path).await?;
let index = self.index_store.write().await.remove(&uuid);
Ok(index)
}
}

View file

@ -1,491 +0,0 @@
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use actix_web::error::PayloadError;
use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::Stream;
use log::info;
use milli::FieldDistribution;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use tokio::time::sleep;
use uuid::Uuid;
use dump_actor::DumpActorHandle;
pub use dump_actor::{DumpInfo, DumpStatus};
use index_actor::IndexActorHandle;
use snapshot::load_snapshot;
use update_actor::UpdateActorHandle;
pub use updates::*;
use uuid_resolver::{error::UuidResolverError, UuidResolverHandle};
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::option::Opt;
use error::Result;
use self::dump_actor::load_dump;
mod dump_actor;
pub mod error;
pub mod index_actor;
mod snapshot;
pub mod update_actor;
mod updates;
mod uuid_resolver;
pub mod update_file_store;
pub type Payload = Box<dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
#[serde(skip)]
pub uuid: Uuid,
pub uid: String,
name: String,
#[serde(flatten)]
pub meta: index_actor::IndexMeta,
}
#[derive(Clone, Debug)]
pub struct IndexSettings {
pub uid: Option<String>,
pub primary_key: Option<String>,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
#[serde(skip)]
pub size: u64,
pub number_of_documents: u64,
/// Whether the current index is performing an update. It is initially `None` when the
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>,
pub field_distribution: FieldDistribution,
}
#[derive(Clone)]
pub struct IndexController {
uuid_resolver: uuid_resolver::UuidResolverHandleImpl,
index_handle: index_actor::IndexActorHandleImpl,
update_handle: update_actor::UpdateActorHandleImpl,
dump_handle: dump_actor::DumpActorHandleImpl,
}
pub enum DocumentAdditionFormat {
Json,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
pub last_update: Option<DateTime<Utc>>,
pub indexes: BTreeMap<String, IndexStats>,
}
pub enum Update {
DocumentAddition {
payload: Payload,
primary_key: Option<String>,
method: IndexDocumentsMethod,
format: DocumentAdditionFormat,
}
}
impl IndexController {
pub fn new(path: impl AsRef<Path>, options: &Opt) -> anyhow::Result<Self> {
let index_size = options.max_index_size.get_bytes() as usize;
let update_store_size = options.max_index_size.get_bytes() as usize;
if let Some(ref path) = options.import_snapshot {
info!("Loading from snapshot {:?}", path);
load_snapshot(
&options.db_path,
path,
options.ignore_snapshot_if_db_exists,
options.ignore_missing_snapshot,
)?;
} else if let Some(ref src_path) = options.import_dump {
load_dump(
&options.db_path,
src_path,
options.max_index_size.get_bytes() as usize,
options.max_udb_size.get_bytes() as usize,
&options.indexer_options,
)?;
}
std::fs::create_dir_all(&path)?;
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?;
let index_handle =
index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?;
let update_handle = update_actor::UpdateActorHandleImpl::new(
index_handle.clone(),
&path,
update_store_size,
)?;
let dump_handle = dump_actor::DumpActorHandleImpl::new(
&options.dumps_dir,
uuid_resolver.clone(),
update_handle.clone(),
options.max_index_size.get_bytes() as usize,
options.max_udb_size.get_bytes() as usize,
)?;
//if options.schedule_snapshot {
//let snapshot_service = SnapshotService::new(
//uuid_resolver.clone(),
//update_handle.clone(),
//Duration::from_secs(options.snapshot_interval_sec),
//options.snapshot_dir.clone(),
//options
//.db_path
//.file_name()
//.map(|n| n.to_owned().into_string().expect("invalid path"))
//.unwrap_or_else(|| String::from("data.ms")),
//);
//tokio::task::spawn(snapshot_service.run());
//}
Ok(Self {
uuid_resolver,
index_handle,
update_handle,
dump_handle,
})
}
pub async fn register_update(&self, uid: &str, update: Update) -> Result<UpdateStatus> {
match self.uuid_resolver.get(uid.to_string()).await {
Ok(uuid) => {
let update_result = self.update_handle.update(uuid, update).await?;
Ok(update_result)
},
Err(UuidResolverError::UnexistingIndex(name)) => {
let uuid = Uuid::new_v4();
let update_result = self.update_handle.update(uuid, update).await?;
// ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
self.uuid_resolver.insert(name, uuid).await?;
Ok(update_result)
}
Err(e) => Err(e.into()),
}
}
//pub async fn add_documents(
//&self,
//uid: String,
//method: milli::update::IndexDocumentsMethod,
//payload: Payload,
//primary_key: Option<String>,
//) -> Result<UpdateStatus> {
//let perform_update = |uuid| async move {
//let meta = UpdateMeta::DocumentsAddition {
//method,
//primary_key,
//};
//let (sender, receiver) = mpsc::channel(10);
//// It is necessary to spawn a local task to send the payload to the update handle to
//// prevent dead_locking between the update_handle::update that waits for the update to be
//// registered and the update_actor that waits for the the payload to be sent to it.
//tokio::task::spawn_local(async move {
//payload
//.for_each(|r| async {
//let _ = sender.send(r).await;
//})
//.await
//});
//// This must be done *AFTER* spawning the task.
//self.update_handle.update(meta, receiver, uuid).await
//};
//match self.uuid_resolver.get(uid).await {
//Ok(uuid) => Ok(perform_update(uuid).await?),
//Err(UuidResolverError::UnexistingIndex(name)) => {
//let uuid = Uuid::new_v4();
//let status = perform_update(uuid).await?;
//// ignore if index creation fails now, since it may already have been created
//let _ = self.index_handle.create_index(uuid, None).await;
//self.uuid_resolver.insert(name, uuid).await?;
//Ok(status)
//}
//Err(e) => Err(e.into()),
//}
//}
//pub async fn clear_documents(&self, uid: String) -> Result<UpdateStatus> {
//let uuid = self.uuid_resolver.get(uid).await?;
//let meta = UpdateMeta::ClearDocuments;
//let (_, receiver) = mpsc::channel(1);
//let status = self.update_handle.update(meta, receiver, uuid).await?;
//Ok(status)
//}
//pub async fn delete_documents(
//&self,
//uid: String,
//documents: Vec<String>,
//) -> Result<UpdateStatus> {
//let uuid = self.uuid_resolver.get(uid).await?;
//let meta = UpdateMeta::DeleteDocuments { ids: documents };
//let (_, receiver) = mpsc::channel(1);
//let status = self.update_handle.update(meta, receiver, uuid).await?;
//Ok(status)
//}
//pub async fn update_settings(
//&self,
//uid: String,
//settings: Settings<Checked>,
//create: bool,
//) -> Result<UpdateStatus> {
//let perform_udpate = |uuid| async move {
//let meta = UpdateMeta::Settings(settings.into_unchecked());
//// Nothing so send, drop the sender right away, as not to block the update actor.
//let (_, receiver) = mpsc::channel(1);
//self.update_handle.update(meta, receiver, uuid).await
//};
//match self.uuid_resolver.get(uid).await {
//Ok(uuid) => Ok(perform_udpate(uuid).await?),
//Err(UuidResolverError::UnexistingIndex(name)) if create => {
//let uuid = Uuid::new_v4();
//let status = perform_udpate(uuid).await?;
//// ignore if index creation fails now, since it may already have been created
//let _ = self.index_handle.create_index(uuid, None).await;
//self.uuid_resolver.insert(name, uuid).await?;
//Ok(status)
//}
//Err(e) => Err(e.into()),
//}
//}
//pub async fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata> {
//let IndexSettings { uid, primary_key } = index_settings;
//let uid = uid.ok_or(IndexControllerError::MissingUid)?;
//let uuid = Uuid::new_v4();
//let meta = self.index_handle.create_index(uuid, primary_key).await?;
//self.uuid_resolver.insert(uid.clone(), uuid).await?;
//let meta = IndexMetadata {
//uuid,
//name: uid.clone(),
//uid,
//meta,
//};
//Ok(meta)
//}
//pub async fn delete_index(&self, uid: String) -> Result<()> {
//let uuid = self.uuid_resolver.delete(uid).await?;
//// We remove the index from the resolver synchronously, and effectively perform the index
//// deletion as a background task.
//let update_handle = self.update_handle.clone();
//let index_handle = self.index_handle.clone();
//tokio::spawn(async move {
//if let Err(e) = update_handle.delete(uuid).await {
//error!("Error while deleting index: {}", e);
//}
//if let Err(e) = index_handle.delete(uuid).await {
//error!("Error while deleting index: {}", e);
//}
//});
//Ok(())
//}
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
let uuid = self.uuid_resolver.get(uid).await?;
let result = self.update_handle.update_status(uuid, id).await?;
Ok(result)
}
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
let uuid = self.uuid_resolver.get(uid).await?;
let result = self.update_handle.get_all_updates_status(uuid).await?;
Ok(result)
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
let uuids = self.uuid_resolver.list().await?;
let mut ret = Vec::new();
for (uid, uuid) in uuids {
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
ret.push(meta);
}
Ok(ret)
}
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let settings = self.index_handle.settings(uuid).await?;
Ok(settings)
}
pub async fn documents(
&self,
uid: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let documents = self
.index_handle
.documents(uuid, offset, limit, attributes_to_retrieve)
.await?;
Ok(documents)
}
pub async fn document(
&self,
uid: String,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let document = self
.index_handle
.document(uuid, doc_id, attributes_to_retrieve)
.await?;
Ok(document)
}
pub async fn update_index(
&self,
uid: String,
mut index_settings: IndexSettings,
) -> Result<IndexMetadata> {
if index_settings.uid.is_some() {
index_settings.uid.take();
}
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let meta = self.index_handle.update_index(uuid, index_settings).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
let uuid = self.uuid_resolver.get(uid).await?;
let result = self.index_handle.search(uuid, query).await?;
Ok(result)
}
pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn get_uuids_size(&self) -> Result<u64> {
Ok(self.uuid_resolver.get_size().await?)
}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
let uuid = self.uuid_resolver.get(uid).await?;
let update_infos = self.update_handle.get_info().await?;
let mut stats = self.index_handle.get_index_stats(uuid).await?;
// Check if the currently indexing update is from out index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
Ok(stats)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
let update_infos = self.update_handle.get_info().await?;
let mut database_size = self.get_uuids_size().await? + update_infos.size;
let mut last_update: Option<DateTime<_>> = None;
let mut indexes = BTreeMap::new();
for index in self.list_indexes().await? {
let mut index_stats = self.index_handle.get_index_stats(index.uuid).await?;
database_size += index_stats.size;
last_update = last_update.map_or(Some(index.meta.updated_at), |last| {
Some(last.max(index.meta.updated_at))
});
index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing);
indexes.insert(index.uid, index_stats);
}
Ok(Stats {
database_size,
last_update,
indexes,
})
}
pub async fn create_dump(&self) -> Result<DumpInfo> {
Ok(self.dump_handle.create_dump().await?)
}
pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
Ok(self.dump_handle.dump_info(uid).await?)
}
}
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
sleep(Duration::from_millis(100)).await;
continue;
}
}
}
}
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
fn update_files_path(path: impl AsRef<Path>) -> PathBuf {
path.as_ref().join("updates/updates_files")
}

View file

@ -1,261 +0,0 @@
use std::path::Path;
use anyhow::bail;
use crate::helpers::compression;
//pub struct SnapshotService<U, R> {
//uuid_resolver_handle: R,
//update_handle: U,
//snapshot_period: Duration,
//snapshot_path: PathBuf,
//db_name: String,
//}
//impl<U, R> SnapshotService<U, R>
//where
//U: UpdateActorHandle,
//R: UuidResolverHandle,
//{
//pub fn new(
//uuid_resolver_handle: R,
//update_handle: U,
//snapshot_period: Duration,
//snapshot_path: PathBuf,
//db_name: String,
//) -> Self {
//Self {
//uuid_resolver_handle,
//update_handle,
//snapshot_period,
//snapshot_path,
//db_name,
//}
//}
//pub async fn run(self) {
//info!(
//"Snapshot scheduled every {}s.",
//self.snapshot_period.as_secs()
//);
//loop {
//if let Err(e) = self.perform_snapshot().await {
//error!("Error while performing snapshot: {}", e);
//}
//sleep(self.snapshot_period).await;
//}
//}
//async fn perform_snapshot(&self) -> anyhow::Result<()> {
//trace!("Performing snapshot.");
//let snapshot_dir = self.snapshot_path.clone();
//fs::create_dir_all(&snapshot_dir).await?;
//let temp_snapshot_dir =
//spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??;
//let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
//let uuids = self
//.uuid_resolver_handle
//.snapshot(temp_snapshot_path.clone())
//.await?;
//if uuids.is_empty() {
//return Ok(());
//}
//self.update_handle
//.snapshot(uuids, temp_snapshot_path.clone())
//.await?;
//let snapshot_dir = self.snapshot_path.clone();
//let snapshot_path = self
//.snapshot_path
//.join(format!("{}.snapshot", self.db_name));
//let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
//let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?;
//let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
//compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
//temp_snapshot_file.persist(&snapshot_path)?;
//Ok(snapshot_path)
//})
//.await??;
//trace!("Created snapshot in {:?}.", snapshot_path);
//Ok(())
//}
//}
pub fn load_snapshot(
db_path: impl AsRef<Path>,
snapshot_path: impl AsRef<Path>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
) -> anyhow::Result<()> {
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
match compression::from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()),
Err(e) => {
// clean created db folder
std::fs::remove_dir_all(&db_path)?;
Err(e)
}
}
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
db_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| db_path.as_ref().to_owned())
)
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
bail!(
"snapshot doesn't exist at {:?}",
snapshot_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
)
} else {
Ok(())
}
}
#[cfg(test)]
mod test {
use std::iter::FromIterator;
use std::{collections::HashSet, sync::Arc};
use futures::future::{err, ok};
use rand::Rng;
use tokio::time::timeout;
use uuid::Uuid;
use super::*;
use crate::index_controller::index_actor::MockIndexActorHandle;
use crate::index_controller::update_actor::{
error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl,
};
use crate::index_controller::uuid_resolver::{
error::UuidResolverError, MockUuidResolverHandle,
};
#[actix_rt::test]
async fn test_normal() {
let mut rng = rand::thread_rng();
let uuids_num: usize = rng.gen_range(5..10);
let uuids = (0..uuids_num)
.map(|_| Uuid::new_v4())
.collect::<HashSet<_>>();
let mut uuid_resolver = MockUuidResolverHandle::new();
let uuids_clone = uuids.clone();
uuid_resolver
.expect_snapshot()
.times(1)
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
let uuids_clone = uuids.clone();
let mut index_handle = MockIndexActorHandle::new();
index_handle
.expect_snapshot()
.withf(move |uuid, _path| uuids_clone.contains(uuid))
.times(uuids_num)
.returning(move |_, _| Box::pin(ok(())));
let dir = tempfile::tempdir_in(".").unwrap();
let handle = Arc::new(index_handle);
let update_handle =
UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
snapshot_service.perform_snapshot().await.unwrap();
}
#[actix_rt::test]
async fn error_performing_uuid_snapshot() {
let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver
.expect_snapshot()
.times(1)
// abitrary error
.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
let update_handle = MockUpdateActorHandle::new();
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
// Nothing was written to the file
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
}
#[actix_rt::test]
async fn error_performing_index_snapshot() {
let uuid = Uuid::new_v4();
let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver
.expect_snapshot()
.times(1)
.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
let mut update_handle = MockUpdateActorHandle::new();
update_handle
.expect_snapshot()
// abitrary error
.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0))));
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
// Nothing was written to the file
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
}
#[actix_rt::test]
async fn test_loop() {
let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver
.expect_snapshot()
// we expect the funtion to be called between 2 and 3 time in the given interval.
.times(2..4)
// abitrary error, to short-circuit the function
.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
let update_handle = MockUpdateActorHandle::new();
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await;
}
}

View file

@ -1,260 +0,0 @@
use std::collections::HashSet;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use actix_web::error::PayloadError;
use async_stream::stream;
use bytes::Bytes;
use futures::{Stream, StreamExt};
use log::trace;
use milli::documents::DocumentBatchBuilder;
use serde_json::{Map, Value};
use tokio::sync::mpsc;
use uuid::Uuid;
use super::error::{Result, UpdateActorError};
use super::RegisterUpdate;
use super::{UpdateMsg, UpdateStore, UpdateStoreInfo, Update};
use crate::index_controller::index_actor::IndexActorHandle;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::index_controller::{DocumentAdditionFormat, Payload, UpdateStatus};
pub struct UpdateActor<I> {
store: Arc<UpdateStore>,
inbox: Option<mpsc::Receiver<UpdateMsg>>,
update_file_store: UpdateFileStore,
index_handle: I,
must_exit: Arc<AtomicBool>,
}
struct StreamReader<S> {
stream: S,
current: Option<Bytes>,
}
impl<S> StreamReader<S> {
fn new(stream: S) -> Self {
Self { stream, current: None }
}
}
impl<S: Stream<Item = std::result::Result<Bytes, PayloadError>> + Unpin> io::Read for StreamReader<S> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
match self.current.take() {
Some(mut bytes) => {
let copied = bytes.split_to(buf.len());
buf.copy_from_slice(&copied);
if !bytes.is_empty() {
self.current.replace(bytes);
}
Ok(copied.len())
}
None => {
match tokio::runtime::Handle::current().block_on(self.stream.next()) {
Some(Ok(bytes)) => {
self.current.replace(bytes);
self.read(buf)
},
Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)),
None => return Ok(0),
}
}
}
}
}
impl<I> UpdateActor<I>
where
I: IndexActorHandle + Clone + Sync + Send + 'static,
{
pub fn new(
update_db_size: usize,
inbox: mpsc::Receiver<UpdateMsg>,
path: impl AsRef<Path>,
index_handle: I,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned();
std::fs::create_dir_all(&path)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(update_db_size);
let must_exit = Arc::new(AtomicBool::new(false));
let store = UpdateStore::open(options, &path, index_handle.clone(), must_exit.clone())?;
let inbox = Some(inbox);
let update_file_store = UpdateFileStore::new(&path).unwrap();
Ok(Self {
store,
inbox,
index_handle,
must_exit,
update_file_store
})
}
pub async fn run(mut self) {
use UpdateMsg::*;
trace!("Started update actor.");
let mut inbox = self
.inbox
.take()
.expect("A receiver should be present by now.");
let must_exit = self.must_exit.clone();
let stream = stream! {
loop {
let msg = inbox.recv().await;
if must_exit.load(std::sync::atomic::Ordering::Relaxed) {
break;
}
match msg {
Some(msg) => yield msg,
None => break,
}
}
};
stream
.for_each_concurrent(Some(10), |msg| async {
match msg {
Update {
uuid,
update,
ret,
} => {
let _ = ret.send(self.handle_update(uuid, update).await);
}
ListUpdates { uuid, ret } => {
let _ = ret.send(self.handle_list_updates(uuid).await);
}
GetUpdate { uuid, ret, id } => {
let _ = ret.send(self.handle_get_update(uuid, id).await);
}
Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Snapshot { uuids, path, ret } => {
let _ = ret.send(self.handle_snapshot(uuids, path).await);
}
GetInfo { ret } => {
let _ = ret.send(self.handle_get_info().await);
}
Dump { uuids, path, ret } => {
let _ = ret.send(self.handle_dump(uuids, path).await);
}
}
})
.await;
}
async fn handle_update(
&self,
index_uuid: Uuid,
update: Update,
) -> Result<UpdateStatus> {
let registration = match update {
Update::DocumentAddition { payload, primary_key, method, format } => {
let content_uuid = match format {
DocumentAdditionFormat::Json => self.documents_from_json(payload).await?,
};
RegisterUpdate::DocumentAddition { primary_key, method, content_uuid }
}
};
let store = self.store.clone();
let status = tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration)).await??;
Ok(status.into())
}
async fn documents_from_json(&self, payload: Payload) -> Result<Uuid> {
let file_store = self.update_file_store.clone();
tokio::task::spawn_blocking(move || {
let (uuid, mut file) = file_store.new_update().unwrap();
let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap();
let documents: Vec<Map<String, Value>> = serde_json::from_reader(StreamReader::new(payload))?;
builder.add_documents(documents).unwrap();
builder.finish().unwrap();
file.persist();
Ok(uuid)
}).await?
}
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = update_store.list(uuid)?;
Ok(result)
})
.await?
}
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = store
.meta(uuid, id)?
.ok_or(UpdateActorError::UnexistingUpdate(id))?;
Ok(result)
})
.await?
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
Ok(())
}
async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle))
.await??;
Ok(())
}
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> Result<()> {
update_store.dump(&uuids, path.to_path_buf(), index_handle)?;
Ok(())
})
.await??;
Ok(())
}
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone();
let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> {
let info = update_store.get_info()?;
Ok(info)
})
.await??;
Ok(info)
}
}

View file

@ -1,61 +0,0 @@
use std::error::Error;
use meilisearch_error::{Code, ErrorCode};
use crate::index_controller::index_actor::error::IndexActorError;
pub type Result<T> = std::result::Result<T, UpdateActorError>;
#[derive(Debug, thiserror::Error)]
#[allow(clippy::large_enum_variant)]
pub enum UpdateActorError {
#[error("Update {0} not found.")]
UnexistingUpdate(u64),
#[error("Internal error: {0}")]
Internal(Box<dyn Error + Send + Sync + 'static>),
#[error("{0}")]
IndexActor(#[from] IndexActorError),
#[error(
"update store was shut down due to a fatal error, please check your logs for more info."
)]
FatalUpdateStoreError,
#[error("{0}")]
InvalidPayload(Box<dyn Error + Send + Sync + 'static>),
#[error("{0}")]
PayloadError(#[from] actix_web::error::PayloadError),
}
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateActorError {
fn from(_: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::FatalUpdateStoreError
}
}
impl From<tokio::sync::oneshot::error::RecvError> for UpdateActorError {
fn from(_: tokio::sync::oneshot::error::RecvError) -> Self {
Self::FatalUpdateStoreError
}
}
internal_error!(
UpdateActorError: heed::Error,
std::io::Error,
serde_json::Error,
tokio::task::JoinError
);
impl ErrorCode for UpdateActorError {
fn error_code(&self) -> Code {
match self {
UpdateActorError::UnexistingUpdate(_) => Code::NotFound,
UpdateActorError::Internal(_) => Code::Internal,
UpdateActorError::IndexActor(e) => e.error_code(),
UpdateActorError::FatalUpdateStoreError => Code::Internal,
UpdateActorError::InvalidPayload(_) => Code::BadRequest,
UpdateActorError::PayloadError(error) => match error {
actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge,
_ => Code::Internal,
},
}
}
}

View file

@ -1,93 +0,0 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::index_controller::{IndexActorHandle, Update, UpdateStatus};
use super::error::Result;
use super::{UpdateActor, UpdateActorHandle, UpdateMsg, UpdateStoreInfo};
#[derive(Clone)]
pub struct UpdateActorHandleImpl {
sender: mpsc::Sender<UpdateMsg>,
}
impl UpdateActorHandleImpl {
pub fn new<I>(
index_handle: I,
path: impl AsRef<Path>,
update_store_size: usize,
) -> anyhow::Result<Self>
where
I: IndexActorHandle + Clone + Sync + Send +'static,
{
let path = path.as_ref().to_owned();
let (sender, receiver) = mpsc::channel(100);
let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?;
tokio::task::spawn_local(actor.run());
Ok(Self { sender })
}
}
#[async_trait::async_trait]
impl UpdateActorHandle for UpdateActorHandleImpl {
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::ListUpdates { uuid, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetUpdate { uuid, id, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Delete { uuid, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Snapshot { uuids, path, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Dump { uuids, path, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn get_info(&self) -> Result<UpdateStoreInfo> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetInfo { ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn update(
&self,
uuid: Uuid,
update: Update,
) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Update {
uuid,
update,
ret,
};
self.sender.send(msg).await?;
receiver.await?
}
}

View file

@ -1,42 +0,0 @@
use std::collections::HashSet;
use std::path::PathBuf;
use tokio::sync::oneshot;
use uuid::Uuid;
use super::error::Result;
use super::{UpdateStatus, UpdateStoreInfo, Update};
pub enum UpdateMsg {
Update {
uuid: Uuid,
update: Update,
ret: oneshot::Sender<Result<UpdateStatus>>,
},
ListUpdates {
uuid: Uuid,
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
},
GetUpdate {
uuid: Uuid,
ret: oneshot::Sender<Result<UpdateStatus>>,
id: u64,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
Snapshot {
uuids: HashSet<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
Dump {
uuids: HashSet<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
GetInfo {
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
},
}

View file

@ -1,49 +0,0 @@
use std::{collections::HashSet, path::PathBuf};
use milli::update::IndexDocumentsMethod;
use uuid::Uuid;
use serde::{Serialize, Deserialize};
use crate::index_controller::UpdateStatus;
use super::Update;
use actor::UpdateActor;
use error::Result;
use message::UpdateMsg;
pub use handle_impl::UpdateActorHandleImpl;
pub use store::{UpdateStore, UpdateStoreInfo};
mod actor;
pub mod error;
mod handle_impl;
mod message;
pub mod store;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RegisterUpdate {
DocumentAddition {
primary_key: Option<String>,
method: IndexDocumentsMethod,
content_uuid: Uuid,
}
}
#[cfg(test)]
use mockall::automock;
#[async_trait::async_trait]
pub trait UpdateActorHandle {
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn get_info(&self) -> Result<UpdateStoreInfo>;
async fn update(
&self,
uuid: Uuid,
update: Update,
) -> Result<UpdateStatus>;
}

View file

@ -1,86 +0,0 @@
use std::{borrow::Cow, convert::TryInto, mem::size_of};
use heed::{BytesDecode, BytesEncode};
use uuid::Uuid;
pub struct NextIdCodec;
pub enum NextIdKey {
Global,
Index(Uuid),
}
impl<'a> BytesEncode<'a> for NextIdCodec {
type EItem = NextIdKey;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
match item {
NextIdKey::Global => Some(Cow::Borrowed(b"__global__")),
NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())),
}
}
}
pub struct PendingKeyCodec;
impl<'a> BytesEncode<'a> for PendingKeyCodec {
type EItem = (u64, Uuid, u64);
fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
bytes.extend_from_slice(&global_id.to_be_bytes());
bytes.extend_from_slice(uuid.as_bytes());
bytes.extend_from_slice(&update_id.to_be_bytes());
Some(Cow::Owned(bytes))
}
}
impl<'a> BytesDecode<'a> for PendingKeyCodec {
type DItem = (u64, Uuid, u64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?;
let global_id = u64::from_be_bytes(global_id_bytes);
let uuid_bytes = bytes
.get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))?
.try_into()
.ok()?;
let uuid = Uuid::from_bytes(uuid_bytes);
let update_id_bytes = bytes
.get((size_of::<u64>() + size_of::<Uuid>())..)?
.try_into()
.ok()?;
let update_id = u64::from_be_bytes(update_id_bytes);
Some((global_id, uuid, update_id))
}
}
pub struct UpdateKeyCodec;
impl<'a> BytesEncode<'a> for UpdateKeyCodec {
type EItem = (Uuid, u64);
fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
bytes.extend_from_slice(uuid.as_bytes());
bytes.extend_from_slice(&update_id.to_be_bytes());
Some(Cow::Owned(bytes))
}
}
impl<'a> BytesDecode<'a> for UpdateKeyCodec {
type DItem = (Uuid, u64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?;
let uuid = Uuid::from_bytes(uuid_bytes);
let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?;
let update_id = u64::from_be_bytes(update_id_bytes);
Some((uuid, update_id))
}
}

View file

@ -1,186 +0,0 @@
use std::{
collections::HashSet,
fs::{create_dir_all, File},
io::Write,
path::{Path, PathBuf},
};
use heed::RoTxn;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::{Result, State, UpdateStore};
use crate::index_controller::{
index_actor::IndexActorHandle,
UpdateStatus,
};
#[derive(Serialize, Deserialize)]
struct UpdateEntry {
uuid: Uuid,
update: UpdateStatus,
}
impl UpdateStore {
pub fn dump(
&self,
uuids: &HashSet<Uuid>,
path: PathBuf,
handle: impl IndexActorHandle,
) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Dumping);
// txn must *always* be acquired after state lock, or it will dead lock.
let txn = self.env.write_txn()?;
let dump_path = path.join("updates");
create_dir_all(&dump_path)?;
self.dump_updates(&txn, uuids, &dump_path)?;
let fut = dump_indexes(uuids, handle, &path);
tokio::runtime::Handle::current().block_on(fut)?;
state_lock.swap(State::Idle);
Ok(())
}
fn dump_updates(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
path: impl AsRef<Path>,
) -> Result<()> {
let dump_data_path = path.as_ref().join("data.jsonl");
let mut dump_data_file = File::create(dump_data_path)?;
let update_files_path = path.as_ref().join(super::UPDATE_DIR);
create_dir_all(&update_files_path)?;
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
self.dump_completed(txn, uuids, &mut dump_data_file)?;
Ok(())
}
fn dump_pending(
&self,
_txn: &RoTxn,
_uuids: &HashSet<Uuid>,
_file: &mut File,
_dst_path: impl AsRef<Path>,
) -> Result<()> {
todo!()
//let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
//for pending in pendings {
//let ((_, uuid, _), data) = pending?;
//if uuids.contains(&uuid) {
//let update = data.decode()?;
//if let Some(ref update_uuid) = update.content {
//let src = super::update_uuid_to_file_path(&self.path, *update_uuid);
//let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid);
//std::fs::copy(src, dst)?;
//}
//let update_json = UpdateEntry {
//uuid,
//update: update.into(),
//};
//serde_json::to_writer(&mut file, &update_json)?;
//file.write_all(b"\n")?;
//}
//}
//Ok(())
}
fn dump_completed(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
mut file: &mut File,
) -> Result<()> {
let updates = self.updates.iter(txn)?.lazily_decode_data();
for update in updates {
let ((uuid, _), data) = update?;
if uuids.contains(&uuid) {
let update = data.decode()?;
let update_json = UpdateEntry { uuid, update };
serde_json::to_writer(&mut file, &update_json)?;
file.write_all(b"\n")?;
}
}
Ok(())
}
pub fn load_dump(
_src: impl AsRef<Path>,
_dst: impl AsRef<Path>,
_db_size: usize,
) -> anyhow::Result<()> {
todo!()
//let dst_update_path = dst.as_ref().join("updates/");
//create_dir_all(&dst_update_path)?;
//let mut options = EnvOpenOptions::new();
//options.map_size(db_size as usize);
//let (store, _) = UpdateStore::new(options, &dst_update_path)?;
//let src_update_path = src.as_ref().join("updates");
//let update_data = File::open(&src_update_path.join("data.jsonl"))?;
//let mut update_data = BufReader::new(update_data);
//std::fs::create_dir_all(dst_update_path.join("update_files/"))?;
//let mut wtxn = store.env.write_txn()?;
//let mut line = String::new();
//loop {
//match update_data.read_line(&mut line) {
//Ok(0) => break,
//Ok(_) => {
//let UpdateEntry { uuid, update } = serde_json::from_str(&line)?;
//store.register_raw_updates(&mut wtxn, &update, uuid)?;
//// Copy ascociated update path if it exists
//if let UpdateStatus::Enqueued(Enqueued {
//content: Some(uuid),
//..
//}) = update
//{
//let src = update_uuid_to_file_path(&src_update_path, uuid);
//let dst = update_uuid_to_file_path(&dst_update_path, uuid);
//std::fs::copy(src, dst)?;
//}
//}
//_ => break,
//}
//line.clear();
//}
//wtxn.commit()?;
//Ok(())
}
}
async fn dump_indexes(
uuids: &HashSet<Uuid>,
handle: impl IndexActorHandle,
path: impl AsRef<Path>,
) -> Result<()> {
for uuid in uuids {
handle.dump(*uuid, path.as_ref().to_owned()).await?;
}
Ok(())
}

View file

@ -1,708 +0,0 @@
mod codec;
pub mod dump;
use std::fs::{create_dir_all, remove_file};
use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::{
collections::{BTreeMap, HashSet},
path::PathBuf,
time::Duration,
};
use arc_swap::ArcSwap;
use futures::StreamExt;
use heed::types::{ByteSlice, OwnedType, SerdeJson};
use heed::zerocopy::U64;
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
use log::error;
use parking_lot::{Mutex, MutexGuard};
use tokio::runtime::Handle;
use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError;
use tokio::time::timeout;
use uuid::Uuid;
use codec::*;
use super::RegisterUpdate;
use super::error::Result;
use crate::helpers::EnvSizer;
use crate::index_controller::update_files_path;
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle};
#[allow(clippy::upper_case_acronyms)]
type BEU64 = U64<heed::byteorder::BE>;
const UPDATE_DIR: &str = "update_files";
pub struct UpdateStoreInfo {
/// Size of the update store in bytes.
pub size: u64,
/// Uuid of the currently processing update if it exists
pub processing: Option<Uuid>,
}
/// A data structure that allows concurrent reads AND exactly one writer.
pub struct StateLock {
lock: Mutex<()>,
data: ArcSwap<State>,
}
pub struct StateLockGuard<'a> {
_lock: MutexGuard<'a, ()>,
state: &'a StateLock,
}
impl StateLockGuard<'_> {
pub fn swap(&self, state: State) -> Arc<State> {
self.state.data.swap(Arc::new(state))
}
}
impl StateLock {
fn from_state(state: State) -> Self {
let lock = Mutex::new(());
let data = ArcSwap::from(Arc::new(state));
Self { lock, data }
}
pub fn read(&self) -> Arc<State> {
self.data.load().clone()
}
pub fn write(&self) -> StateLockGuard {
let _lock = self.lock.lock();
let state = &self;
StateLockGuard { _lock, state }
}
}
#[allow(clippy::large_enum_variant)]
pub enum State {
Idle,
Processing(Uuid, Processing),
Snapshoting,
Dumping,
}
#[derive(Clone)]
pub struct UpdateStore {
pub env: Env,
/// A queue containing the updates to process, ordered by arrival.
/// The key are built as follow:
/// | global_update_id | index_uuid | update_id |
/// | 8-bytes | 16-bytes | 8-bytes |
pending_queue: Database<PendingKeyCodec, SerdeJson<Enqueued>>,
/// Map indexes to the next available update id. If NextIdKey::Global is queried, then the next
/// global update id is returned
next_update_id: Database<NextIdCodec, OwnedType<BEU64>>,
/// Contains all the performed updates meta, be they failed, aborted, or processed.
/// The keys are built as follow:
/// | Uuid | id |
/// | 16-bytes | 8-bytes |
updates: Database<UpdateKeyCodec, SerdeJson<UpdateStatus>>,
/// Indicates the current state of the update store,
state: Arc<StateLock>,
/// Wake up the loop when a new event occurs.
notification_sender: mpsc::Sender<()>,
path: PathBuf,
}
impl UpdateStore {
fn new(
mut options: EnvOpenOptions,
path: impl AsRef<Path>,
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
options.max_dbs(5);
let update_path = path.as_ref().join("updates");
std::fs::create_dir_all(&update_path)?;
let env = options.open(update_path)?;
let pending_queue = env.create_database(Some("pending-queue"))?;
let next_update_id = env.create_database(Some("next-update-id"))?;
let updates = env.create_database(Some("updates"))?;
let state = Arc::new(StateLock::from_state(State::Idle));
let (notification_sender, notification_receiver) = mpsc::channel(1);
Ok((
Self {
env,
pending_queue,
next_update_id,
updates,
state,
notification_sender,
path: path.as_ref().to_owned(),
},
notification_receiver,
))
}
pub fn open(
options: EnvOpenOptions,
path: impl AsRef<Path>,
index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static,
must_exit: Arc<AtomicBool>,
) -> anyhow::Result<Arc<Self>> {
let (update_store, mut notification_receiver) = Self::new(options, path)?;
let update_store = Arc::new(update_store);
// Send a first notification to trigger the process.
if let Err(TrySendError::Closed(())) = update_store.notification_sender.try_send(()) {
panic!("Failed to init update store");
}
// We need a weak reference so we can take ownership on the arc later when we
// want to close the index.
let duration = Duration::from_secs(10 * 60); // 10 minutes
let update_store_weak = Arc::downgrade(&update_store);
tokio::task::spawn_local(async move {
// Block and wait for something to process with a timeout. The timeout
// function returns a Result and we must just unlock the loop on Result.
'outer: while timeout(duration, notification_receiver.recv())
.await
.map_or(true, |o| o.is_some())
{
loop {
match update_store_weak.upgrade() {
Some(update_store) => {
let handler = index_handle.clone();
let res = tokio::task::spawn_blocking(move || {
update_store.process_pending_update(handler)
})
.await
.expect("Fatal error processing update.");
match res {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => {
error!("Fatal error while processing an update that requires the update store to shutdown: {}", e);
must_exit.store(true, Ordering::SeqCst);
break 'outer;
}
}
}
// the ownership on the arc has been taken, we need to exit.
None => break 'outer,
}
}
}
error!("Update store loop exited.");
});
Ok(update_store)
}
/// Returns the next global update id and the next update id for a given `index_uuid`.
fn next_update_id(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<(u64, u64)> {
let global_id = self
.next_update_id
.get(txn, &NextIdKey::Global)?
.map(U64::get)
.unwrap_or_default();
self.next_update_id
.put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?;
let update_id = self.next_update_id_raw(txn, index_uuid)?;
Ok((global_id, update_id))
}
/// Returns the next next update id for a given `index_uuid` without
/// incrementing the global update id. This is useful for the dumps.
fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<u64> {
let update_id = self
.next_update_id
.get(txn, &NextIdKey::Index(index_uuid))?
.map(U64::get)
.unwrap_or_default();
self.next_update_id.put(
txn,
&NextIdKey::Index(index_uuid),
&BEU64::new(update_id + 1),
)?;
Ok(update_id)
}
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(
&self,
index_uuid: Uuid,
update: RegisterUpdate,
) -> heed::Result<Enqueued> {
let mut txn = self.env.write_txn()?;
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
let meta = Enqueued::new(update, update_id);
self.pending_queue
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
txn.commit()?;
if let Err(TrySendError::Closed(())) = self.notification_sender.try_send(()) {
panic!("Update store loop exited");
}
Ok(meta)
}
// /// Push already processed update in the UpdateStore without triggering the notification
// /// process. This is useful for the dumps.
//pub fn register_raw_updates(
//&self,
//wtxn: &mut heed::RwTxn,
//update: &UpdateStatus,
//index_uuid: Uuid,
//) -> heed::Result<()> {
//match update {
//UpdateStatus::Enqueued(enqueued) => {
//let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
//self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
//wtxn,
//&(global_id, index_uuid, enqueued.id()),
//enqueued,
//)?;
//}
//_ => {
//let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
//self.updates.put(wtxn, &(index_uuid, update.id()), update)?;
//}
//}
//Ok(())
//}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update(&self, index_handle: impl IndexActorHandle) -> Result<Option<()>> {
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_queue.first(&rtxn)?;
drop(rtxn);
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some(((global_id, index_uuid, _), pending)) => {
let processing = pending.processing();
// Acquire the state lock and set the current state to processing.
// txn must *always* be acquired after state lock, or it will dead lock.
let state = self.state.write();
state.swap(State::Processing(index_uuid, processing.clone()));
let result =
self.perform_update(processing, index_handle, index_uuid, global_id);
state.swap(State::Idle);
result
}
None => Ok(None),
}
}
fn perform_update(
&self,
processing: Processing,
index_handle: impl IndexActorHandle,
index_uuid: Uuid,
global_id: u64,
) -> Result<Option<()>> {
// Process the pending update using the provided user function.
let handle = Handle::current();
let update_id = processing.id();
let result =
match handle.block_on(index_handle.update(index_uuid, processing.clone())) {
Ok(result) => result,
Err(e) => Err(processing.fail(e.into())),
};
// Once the pending update have been successfully processed
// we must remove the content from the pending and processing stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.pending_queue
.delete(&mut wtxn, &(global_id, index_uuid, update_id))?;
let result = match result {
Ok(res) => res.into(),
Err(res) => res.into(),
};
self.updates
.put(&mut wtxn, &(index_uuid, update_id), &result)?;
wtxn.commit()?;
Ok(Some(()))
}
/// List the updates for `index_uuid`.
pub fn list(&self, index_uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let mut update_list = BTreeMap::<u64, UpdateStatus>::new();
let txn = self.env.read_txn()?;
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
for entry in pendings {
let ((_, uuid, id), pending) = entry?;
if uuid == index_uuid {
update_list.insert(id, pending.decode()?.into());
}
}
let updates = self
.updates
.remap_key_type::<ByteSlice>()
.prefix_iter(&txn, index_uuid.as_bytes())?;
for entry in updates {
let (_, update) = entry?;
update_list.insert(update.id(), update);
}
// If the currently processing update is from this index, replace the corresponding pending update with this one.
match *self.state.read() {
State::Processing(uuid, ref processing) if uuid == index_uuid => {
update_list.insert(processing.id(), processing.clone().into());
}
_ => (),
}
Ok(update_list.into_iter().map(|(_, v)| v).collect())
}
/// Returns the update associated meta or `None` if the update doesn't exist.
pub fn meta(&self, index_uuid: Uuid, update_id: u64) -> heed::Result<Option<UpdateStatus>> {
// Check if the update is the one currently processing
match *self.state.read() {
State::Processing(uuid, ref processing)
if uuid == index_uuid && processing.id() == update_id =>
{
return Ok(Some(processing.clone().into()));
}
_ => (),
}
let txn = self.env.read_txn()?;
// Else, check if it is in the updates database:
let update = self.updates.get(&txn, &(index_uuid, update_id))?;
if let Some(update) = update {
return Ok(Some(update));
}
// If nothing was found yet, we resolve to iterate over the pending queue.
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
for entry in pendings {
let ((_, uuid, id), pending) = entry?;
if uuid == index_uuid && id == update_id {
return Ok(Some(pending.decode()?.into()));
}
}
// No update was found.
Ok(None)
}
/// Delete all updates for an index from the update store. If the currently processing update
/// is for `index_uuid`, the call will block until the update is terminated.
pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> {
let mut txn = self.env.write_txn()?;
// Contains all the content file paths that we need to be removed if the deletion was successful.
let uuids_to_remove = Vec::new();
let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data();
while let Some(Ok(((_, uuid, _), pending))) = pendings.next() {
if uuid == index_uuid {
let mut _pending = pending.decode()?;
//if let Some(update_uuid) = pending.content.take() {
//uuids_to_remove.push(update_uuid);
//}
// Invariant check: we can only delete the current entry when we don't hold
// references to it anymore. This must be done after we have retrieved its content.
unsafe {
pendings.del_current()?;
}
}
}
drop(pendings);
let mut updates = self
.updates
.remap_key_type::<ByteSlice>()
.prefix_iter_mut(&mut txn, index_uuid.as_bytes())?
.lazily_decode_data();
while let Some(_) = updates.next() {
unsafe {
updates.del_current()?;
}
}
drop(updates);
txn.commit()?;
// If the currently processing update is from our index, we wait until it is
// finished before returning. This ensure that no write to the index occurs after we delete it.
if let State::Processing(uuid, _) = *self.state.read() {
if uuid == index_uuid {
// wait for a write lock, do nothing with it.
self.state.write();
}
}
// Finally, remove any outstanding update files. This must be done after waiting for the
// last update to ensure that the update files are not deleted before the update needs
// them.
uuids_to_remove
.iter()
.map(|uuid: &Uuid| update_files_path(&self.path).join(uuid.to_string()))
.for_each(|path| {
let _ = remove_file(path);
});
Ok(())
}
pub fn snapshot(
&self,
uuids: &HashSet<Uuid>,
path: impl AsRef<Path>,
handle: impl IndexActorHandle + Clone,
) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Snapshoting);
let txn = self.env.write_txn()?;
let update_path = path.as_ref().join("updates");
create_dir_all(&update_path)?;
// acquire write lock to prevent further writes during snapshot
create_dir_all(&update_path)?;
let db_path = update_path.join("data.mdb");
// create db snapshot
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
let update_files_path = update_path.join(UPDATE_DIR);
create_dir_all(&update_files_path)?;
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
for entry in pendings {
let ((_, _uuid, _), _pending) = entry?;
//if uuids.contains(&uuid) {
//if let Enqueued {
//content: Some(uuid),
//..
//} = pending.decode()?
//{
//let path = update_uuid_to_file_path(&self.path, uuid);
//copy(path, &update_files_path)?;
//}
//}
}
let path = &path.as_ref().to_path_buf();
let handle = &handle;
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
let mut stream = futures::stream::iter(uuids.iter())
.map(move |uuid| handle.snapshot(*uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
Handle::current().block_on(async {
while let Some(res) = stream.next().await {
res?;
}
Ok(()) as Result<()>
})?;
Ok(())
}
pub fn get_info(&self) -> Result<UpdateStoreInfo> {
let size = self.env.size();
let txn = self.env.read_txn()?;
for entry in self.pending_queue.iter(&txn)? {
let (_, _pending) = entry?;
//if let Enqueued {
//content: Some(uuid),
//..
//} = pending
//{
//let path = update_uuid_to_file_path(&self.path, uuid);
//size += File::open(path)?.metadata()?.len();
//}
}
let processing = match *self.state.read() {
State::Processing(uuid, _) => Some(uuid),
_ => None,
};
Ok(UpdateStoreInfo { size, processing })
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::index_controller::{
index_actor::{error::IndexActorError, MockIndexActorHandle},
UpdateResult,
};
use futures::future::ok;
#[actix_rt::test]
async fn test_next_id() {
let dir = tempfile::tempdir_in(".").unwrap();
let mut options = EnvOpenOptions::new();
let handle = Arc::new(MockIndexActorHandle::new());
options.map_size(4096 * 100);
let update_store = UpdateStore::open(
options,
dir.path(),
handle,
Arc::new(AtomicBool::new(false)),
)
.unwrap();
let index1_uuid = Uuid::new_v4();
let index2_uuid = Uuid::new_v4();
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((0, 0), ids);
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((1, 0), ids);
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((2, 1), ids);
}
#[actix_rt::test]
async fn test_register_update() {
let dir = tempfile::tempdir_in(".").unwrap();
let mut options = EnvOpenOptions::new();
let handle = Arc::new(MockIndexActorHandle::new());
options.map_size(4096 * 100);
let update_store = UpdateStore::open(
options,
dir.path(),
handle,
Arc::new(AtomicBool::new(false)),
)
.unwrap();
let meta = UpdateMeta::ClearDocuments;
let uuid = Uuid::new_v4();
let store_clone = update_store.clone();
tokio::task::spawn_blocking(move || {
store_clone.register_update(meta, None, uuid).unwrap();
})
.await
.unwrap();
let txn = update_store.env.read_txn().unwrap();
assert!(update_store
.pending_queue
.get(&txn, &(0, uuid, 0))
.unwrap()
.is_some());
}
#[actix_rt::test]
async fn test_process_update() {
let dir = tempfile::tempdir_in(".").unwrap();
let mut handle = MockIndexActorHandle::new();
handle
.expect_update()
.times(2)
.returning(|_index_uuid, processing, _file| {
if processing.id() == 0 {
Box::pin(ok(Ok(processing.process(UpdateResult::Other))))
} else {
Box::pin(ok(Err(
processing.fail(IndexActorError::ExistingPrimaryKey.into())
)))
}
});
let handle = Arc::new(handle);
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let store = UpdateStore::open(
options,
dir.path(),
handle.clone(),
Arc::new(AtomicBool::new(false)),
)
.unwrap();
// wait a bit for the event loop exit.
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
let mut txn = store.env.write_txn().unwrap();
let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None);
let uuid = Uuid::new_v4();
store
.pending_queue
.put(&mut txn, &(0, uuid, 0), &update)
.unwrap();
let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None);
store
.pending_queue
.put(&mut txn, &(1, uuid, 1), &update)
.unwrap();
txn.commit().unwrap();
// Process the pending, and check that it has been moved to the update databases, and
// removed from the pending database.
let store_clone = store.clone();
tokio::task::spawn_blocking(move || {
store_clone.process_pending_update(handle.clone()).unwrap();
store_clone.process_pending_update(handle).unwrap();
})
.await
.unwrap();
let txn = store.env.read_txn().unwrap();
assert!(store.pending_queue.first(&txn).unwrap().is_none());
let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap();
assert!(matches!(update, UpdateStatus::Processed(_)));
let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap();
assert!(matches!(update, UpdateStatus::Failed(_)));
}
}

View file

@ -1,63 +0,0 @@
use std::fs::File;
use std::path::{Path, PathBuf};
use std::ops::{Deref, DerefMut};
use tempfile::NamedTempFile;
use uuid::Uuid;
use super::error::Result;
pub struct UpdateFile {
path: PathBuf,
file: NamedTempFile,
}
impl UpdateFile {
pub fn persist(self) {
println!("persisting in {}", self.path.display());
self.file.persist(&self.path).unwrap();
}
}
impl Deref for UpdateFile {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for UpdateFile {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[derive(Clone, Debug)]
pub struct UpdateFileStore {
path: PathBuf,
}
impl UpdateFileStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join("updates/updates_files");
std::fs::create_dir_all(&path).unwrap();
Ok(Self { path })
}
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
let file = NamedTempFile::new().unwrap();
let uuid = Uuid::new_v4();
let path = self.path.join(uuid.to_string());
let update_file = UpdateFile { file, path };
Ok((uuid, update_file))
}
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
let path = self.path.join(uuid.to_string());
println!("reading in {}", path.display());
let file = File::open(path).unwrap();
Ok(file)
}
}

View file

@ -1,231 +0,0 @@
use chrono::{DateTime, Utc};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use serde::{Deserialize, Serialize};
use crate::{
error::ResponseError,
index::{Settings, Unchecked},
};
use super::update_actor::RegisterUpdate;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments {
ids: Vec<String>,
},
Settings(Settings<Unchecked>),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: RegisterUpdate,
pub enqueued_at: DateTime<Utc>,
}
impl Enqueued {
pub fn new(meta: RegisterUpdate, update_id: u64) -> Self {
Self {
enqueued_at: Utc::now(),
meta,
update_id,
}
}
pub fn processing(self) -> Processing {
Processing {
from: self,
started_processing_at: Utc::now(),
}
}
pub fn abort(self) -> Aborted {
Aborted {
from: self,
aborted_at: Utc::now(),
}
}
pub fn meta(&self) -> &RegisterUpdate {
&self.meta
}
pub fn id(&self) -> u64 {
self.update_id
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: UpdateResult,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing,
}
impl Processed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &RegisterUpdate {
self.from.meta()
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
pub started_processing_at: DateTime<Utc>,
}
impl Processing {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &RegisterUpdate {
self.from.meta()
}
pub fn process(self, success: UpdateResult) -> Processed {
Processed {
success,
from: self,
processed_at: Utc::now(),
}
}
pub fn fail(self, error: ResponseError) -> Failed {
Failed {
from: self,
error,
failed_at: Utc::now(),
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Aborted {
#[serde(flatten)]
from: Enqueued,
aborted_at: DateTime<Utc>,
}
impl Aborted {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &RegisterUpdate {
self.from.meta()
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub error: ResponseError,
pub failed_at: DateTime<Utc>,
}
impl Failed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &RegisterUpdate {
self.from.meta()
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Aborted(Aborted),
Failed(Failed),
}
impl UpdateStatus {
pub fn id(&self) -> u64 {
match self {
UpdateStatus::Processing(u) => u.id(),
UpdateStatus::Enqueued(u) => u.id(),
UpdateStatus::Processed(u) => u.id(),
UpdateStatus::Aborted(u) => u.id(),
UpdateStatus::Failed(u) => u.id(),
}
}
pub fn meta(&self) -> &RegisterUpdate {
match self {
UpdateStatus::Processing(u) => u.meta(),
UpdateStatus::Enqueued(u) => u.meta(),
UpdateStatus::Processed(u) => u.meta(),
UpdateStatus::Aborted(u) => u.meta(),
UpdateStatus::Failed(u) => u.meta(),
}
}
pub fn processed(&self) -> Option<&Processed> {
match self {
UpdateStatus::Processed(p) => Some(p),
_ => None,
}
}
}
impl From<Enqueued> for UpdateStatus {
fn from(other: Enqueued) -> Self {
Self::Enqueued(other)
}
}
impl From<Aborted> for UpdateStatus {
fn from(other: Aborted) -> Self {
Self::Aborted(other)
}
}
impl From<Processed> for UpdateStatus {
fn from(other: Processed) -> Self {
Self::Processed(other)
}
}
impl From<Processing> for UpdateStatus {
fn from(other: Processing) -> Self {
Self::Processing(other)
}
}
impl From<Failed> for UpdateStatus {
fn from(other: Failed) -> Self {
Self::Failed(other)
}
}

View file

@ -1,98 +0,0 @@
use std::{collections::HashSet, path::PathBuf};
use log::{trace, warn};
use tokio::sync::mpsc;
use uuid::Uuid;
use super::{error::UuidResolverError, Result, UuidResolveMsg, UuidStore};
pub struct UuidResolverActor<S> {
inbox: mpsc::Receiver<UuidResolveMsg>,
store: S,
}
impl<S: UuidStore> UuidResolverActor<S> {
pub fn new(inbox: mpsc::Receiver<UuidResolveMsg>, store: S) -> Self {
Self { inbox, store }
}
pub async fn run(mut self) {
use UuidResolveMsg::*;
trace!("uuid resolver started");
loop {
match self.inbox.recv().await {
Some(Get { uid: name, ret }) => {
let _ = ret.send(self.handle_get(name).await);
}
Some(Delete { uid: name, ret }) => {
let _ = ret.send(self.handle_delete(name).await);
}
Some(List { ret }) => {
let _ = ret.send(self.handle_list().await);
}
Some(Insert { ret, uuid, name }) => {
let _ = ret.send(self.handle_insert(name, uuid).await);
}
Some(SnapshotRequest { path, ret }) => {
let _ = ret.send(self.handle_snapshot(path).await);
}
Some(GetSize { ret }) => {
let _ = ret.send(self.handle_get_size().await);
}
Some(DumpRequest { path, ret }) => {
let _ = ret.send(self.handle_dump(path).await);
}
// all senders have been dropped, need to quit.
None => break,
}
}
warn!("exiting uuid resolver loop");
}
async fn handle_get(&self, uid: String) -> Result<Uuid> {
self.store
.get_uuid(uid.clone())
.await?
.ok_or(UuidResolverError::UnexistingIndex(uid))
}
async fn handle_delete(&self, uid: String) -> Result<Uuid> {
self.store
.delete(uid.clone())
.await?
.ok_or(UuidResolverError::UnexistingIndex(uid))
}
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
let result = self.store.list().await?;
Ok(result)
}
async fn handle_snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
self.store.snapshot(path).await
}
async fn handle_dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
self.store.dump(path).await
}
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
if !is_index_uid_valid(&uid) {
return Err(UuidResolverError::BadlyFormatted(uid));
}
self.store.insert(uid, uuid).await?;
Ok(())
}
async fn handle_get_size(&self) -> Result<u64> {
self.store.get_size().await
}
}
fn is_index_uid_valid(uid: &str) -> bool {
uid.chars()
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
}

View file

@ -1,34 +0,0 @@
use meilisearch_error::{Code, ErrorCode};
pub type Result<T> = std::result::Result<T, UuidResolverError>;
#[derive(Debug, thiserror::Error)]
pub enum UuidResolverError {
#[error("Index already exists.")]
NameAlreadyExist,
#[error("Index \"{0}\" not found.")]
UnexistingIndex(String),
#[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")]
BadlyFormatted(String),
#[error("Internal error: {0}")]
Internal(Box<dyn std::error::Error + Sync + Send + 'static>),
}
internal_error!(
UuidResolverError: heed::Error,
uuid::Error,
std::io::Error,
tokio::task::JoinError,
serde_json::Error
);
impl ErrorCode for UuidResolverError {
fn error_code(&self) -> Code {
match self {
UuidResolverError::NameAlreadyExist => Code::IndexAlreadyExists,
UuidResolverError::UnexistingIndex(_) => Code::IndexNotFound,
UuidResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
UuidResolverError::Internal(_) => Code::Internal,
}
}
}

View file

@ -1,87 +0,0 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use super::{HeedUuidStore, Result, UuidResolveMsg, UuidResolverActor, UuidResolverHandle};
#[derive(Clone)]
pub struct UuidResolverHandleImpl {
sender: mpsc::Sender<UuidResolveMsg>,
}
impl UuidResolverHandleImpl {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let (sender, reveiver) = mpsc::channel(100);
let store = HeedUuidStore::new(path)?;
let actor = UuidResolverActor::new(reveiver, store);
tokio::spawn(actor.run());
Ok(Self { sender })
}
}
#[async_trait::async_trait]
impl UuidResolverHandle for UuidResolverHandleImpl {
async fn get(&self, name: String) -> Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Get { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn delete(&self, name: String) -> Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Delete { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::List { ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Insert { ret, name, uuid };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::SnapshotRequest { path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn get_size(&self) -> Result<u64> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::GetSize { ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::DumpRequest { ret, path };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
}

View file

@ -1,37 +0,0 @@
use std::collections::HashSet;
use std::path::PathBuf;
use tokio::sync::oneshot;
use uuid::Uuid;
use super::Result;
pub enum UuidResolveMsg {
Get {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
},
Delete {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
},
List {
ret: oneshot::Sender<Result<Vec<(String, Uuid)>>>,
},
Insert {
uuid: Uuid,
name: String,
ret: oneshot::Sender<Result<()>>,
},
SnapshotRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
},
GetSize {
ret: oneshot::Sender<Result<u64>>,
},
DumpRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
},
}

View file

@ -1,35 +0,0 @@
mod actor;
pub mod error;
mod handle_impl;
mod message;
pub mod store;
use std::collections::HashSet;
use std::path::PathBuf;
use uuid::Uuid;
use actor::UuidResolverActor;
use error::Result;
use message::UuidResolveMsg;
use store::UuidStore;
#[cfg(test)]
use mockall::automock;
pub use handle_impl::UuidResolverHandleImpl;
pub use store::HeedUuidStore;
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait UuidResolverHandle {
async fn get(&self, name: String) -> Result<Uuid>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn delete(&self, name: String) -> Result<Uuid>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
}

View file

@ -1,224 +0,0 @@
use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use heed::types::{ByteSlice, Str};
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::{error::UuidResolverError, Result, UUID_STORE_SIZE};
use crate::helpers::EnvSizer;
#[derive(Serialize, Deserialize)]
struct DumpEntry {
uuid: Uuid,
uid: String,
}
const UUIDS_DB_PATH: &str = "index_uuids";
#[async_trait::async_trait]
pub trait UuidStore: Sized {
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
// the uuid otherwise.
async fn get_uuid(&self, uid: String) -> Result<Option<Uuid>>;
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
}
#[derive(Clone)]
pub struct HeedUuidStore {
env: Env,
db: Database<Str, ByteSlice>,
}
impl HeedUuidStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join(UUIDS_DB_PATH);
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(UUID_STORE_SIZE); // 1GB
let env = options.open(path)?;
let db = env.create_database(None)?;
Ok(Self { env, db })
}
pub fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
let txn = env.read_txn()?;
match db.get(&txn, &name)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
let mut txn = env.write_txn()?;
match db.get(&txn, &uid)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
db.delete(&mut txn, &uid)?;
txn.commit()?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
let env = self.env.clone();
let db = self.db;
let txn = env.read_txn()?;
let mut entries = Vec::new();
for entry in db.iter(&txn)? {
let (name, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.push((name.to_owned(), uuid))
}
Ok(entries)
}
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let env = self.env.clone();
let db = self.db;
let mut txn = env.write_txn()?;
if db.get(&txn, &name)?.is_some() {
return Err(UuidResolverError::NameAlreadyExist);
}
db.put(&mut txn, &name, uuid.as_bytes())?;
txn.commit()?;
Ok(())
}
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
let env = self.env.clone();
let db = self.db;
// Write transaction to acquire a lock on the database.
let txn = env.write_txn()?;
let mut entries = HashSet::new();
for entry in db.iter(&txn)? {
let (_, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.insert(uuid);
}
// only perform snapshot if there are indexes
if !entries.is_empty() {
path.push(UUIDS_DB_PATH);
create_dir_all(&path).unwrap();
path.push("data.mdb");
env.copy_to_path(path, CompactionOption::Enabled)?;
}
Ok(entries)
}
pub fn get_size(&self) -> Result<u64> {
Ok(self.env.size())
}
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let dump_path = path.join(UUIDS_DB_PATH);
create_dir_all(&dump_path)?;
let dump_file_path = dump_path.join("data.jsonl");
let mut dump_file = File::create(&dump_file_path)?;
let mut uuids = HashSet::new();
let txn = self.env.read_txn()?;
for entry in self.db.iter(&txn)? {
let (uid, uuid) = entry?;
let uid = uid.to_string();
let uuid = Uuid::from_slice(uuid)?;
let entry = DumpEntry { uuid, uid };
serde_json::to_writer(&mut dump_file, &entry)?;
dump_file.write_all(b"\n").unwrap();
uuids.insert(uuid);
}
Ok(uuids)
}
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH);
std::fs::create_dir_all(&uuid_resolver_path)?;
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
let indexes = File::open(&src_indexes)?;
let mut indexes = BufReader::new(indexes);
let mut line = String::new();
let db = Self::new(dst)?;
let mut txn = db.env.write_txn()?;
loop {
match indexes.read_line(&mut line) {
Ok(0) => break,
Ok(_) => {
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
println!("importing {} {}", uid, uuid);
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
}
Err(e) => return Err(e.into()),
}
line.clear();
}
txn.commit()?;
db.env.prepare_for_closing().wait();
Ok(())
}
}
#[async_trait::async_trait]
impl UuidStore for HeedUuidStore {
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.get_uuid(name)).await?
}
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.delete(uid)).await?
}
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.list()).await?
}
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
}
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.snapshot(path)).await?
}
async fn get_size(&self) -> Result<u64> {
self.get_size()
}
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.dump(path)).await?
}
}

View file

@ -38,7 +38,6 @@
//! Most of the routes use [extractors] to handle the authentication.
#![allow(rustdoc::private_intra_doc_links)]
pub mod data;
#[macro_use]
pub mod error;
#[macro_use]
@ -46,11 +45,8 @@ pub mod extractors;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub mod analytics;
pub mod helpers;
mod index;
mod index_controller;
pub mod option;
pub mod routes;
pub use self::data::Data;
use crate::extractors::authentication::AuthConfig;
pub use option::Opt;
@ -58,6 +54,7 @@ use actix_web::web;
use extractors::authentication::policies::*;
use extractors::payload::PayloadConfig;
use meilisearch_lib::MeiliSearch;
use sha2::Digest;
#[derive(Clone)]
@ -86,14 +83,14 @@ impl ApiKeys {
pub fn configure_data(
config: &mut web::ServiceConfig,
data: Data,
data: MeiliSearch,
opt: &Opt,
) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config
.app_data(web::Data::new(data.clone()))
// TODO!: Why are we passing the data with two different things?
.app_data(data)
// TODO!: Why are we passing the data with two different things?
//.app_data(data)
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)

View file

@ -1,7 +1,8 @@
use std::env;
use actix_web::HttpServer;
use meilisearch_http::{create_app, Data, Opt};
use meilisearch_http::{create_app, Opt};
use meilisearch_lib::MeiliSearch;
use structopt::StructOpt;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
@ -39,6 +40,26 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
Ok(())
}
fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
let mut meilisearch = MeiliSearch::builder();
meilisearch
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
.set_max_update_store_size(opt.max_udb_size.get_bytes() as usize)
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
.set_dump_dst(opt.dumps_dir.clone())
.set_snapshot_dir(opt.snapshot_dir.clone());
if let Some(ref path) = opt.import_snapshot {
meilisearch.set_import_snapshot(path.clone());
}
if let Some(ref path) = opt.import_dump {
meilisearch.set_dump_src(path.clone());
}
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
@ -55,23 +76,23 @@ async fn main() -> anyhow::Result<()> {
_ => unreachable!(),
}
let data = Data::new(opt.clone())?;
let meilisearch = setup_meilisearch(&opt)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
if !opt.no_analytics {
let analytics_data = data.clone();
let analytics_data = meilisearch.clone();
let analytics_opt = opt.clone();
tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt));
}
print_launch_resume(&opt);
run_http(data, opt).await?;
run_http(meilisearch, opt).await?;
Ok(())
}
async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> {
async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> {
let _enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone();
let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone))

View file

@ -8,7 +8,6 @@ use std::sync::Arc;
use std::fs;
use byte_unit::Byte;
use milli::CompressionType;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
@ -16,56 +15,7 @@ use rustls::{
};
use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
}
}
}
use meilisearch_lib::options::IndexerOpts;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

View file

@ -1,17 +1,17 @@
use actix_web::{web, HttpResponse};
use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(create_dump)))
.service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status)));
}
pub async fn create_dump(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
pub async fn create_dump(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let res = data.create_dump().await?;
debug!("returns: {:?}", res);
@ -30,10 +30,10 @@ struct DumpParam {
}
async fn get_dump_status(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<DumpParam>,
) -> Result<HttpResponse, ResponseError> {
let res = data.dump_status(path.dump_uid.clone()).await?;
let res = data.dump_info(path.dump_uid.clone()).await?;
debug!("returns: {:?}", res);
Ok(HttpResponse::Ok().json(res))

View file

@ -3,6 +3,8 @@ use actix_web::{web, HttpResponse};
use actix_web::web::Bytes;
use futures::{Stream, StreamExt};
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update};
use milli::update::IndexDocumentsMethod;
use serde::Deserialize;
//use serde_json::Value;
@ -11,9 +13,7 @@ use tokio::sync::mpsc;
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::payload::Payload;
use crate::index_controller::{DocumentAdditionFormat, Update};
use crate::routes::IndexParam;
use crate::Data;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
@ -88,20 +88,20 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
}
pub async fn get_document(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone();
let id = path.document_id.clone();
let document = data
.retrieve_document(index, id, None as Option<Vec<String>>)
.document(index, id, None as Option<Vec<String>>)
.await?;
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
//pub async fn delete_document(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<DocumentParam>,
//) -> Result<HttpResponse, ResponseError> {
//let update_status = data
@ -120,7 +120,7 @@ pub struct BrowseQuery {
}
pub async fn get_all_documents(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
@ -137,7 +137,7 @@ pub async fn get_all_documents(
});
let documents = data
.retrieve_documents(
.documents(
path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
@ -157,7 +157,7 @@ pub struct UpdateDocumentsQuery {
/// Route used when the payload type is "application/json"
/// Used to add or replace documents
pub async fn add_documents(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
@ -180,7 +180,7 @@ pub async fn add_documents(
/// Route used when the payload type is "application/json"
/// Used to add or replace documents
pub async fn update_documents(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
@ -201,7 +201,7 @@ pub async fn update_documents(
}
//pub async fn delete_documents(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>,
//body: web::Json<Vec<Value>>,
//) -> Result<HttpResponse, ResponseError> {
@ -221,7 +221,7 @@ pub async fn update_documents(
//}
//pub async fn clear_all_documents(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>,
//) -> Result<HttpResponse, ResponseError> {
//let update_status = data.clear_documents(path.index_uid.clone()).await?;

View file

@ -1,12 +1,13 @@
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index_controller::IndexSettings;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::IndexParam;
use crate::Data;
pub mod documents;
pub mod search;
@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
);
}
pub async fn list_indexes(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
pub async fn list_indexes(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let indexes = data.list_indexes().await?;
debug!("returns: {:?}", indexes);
Ok(HttpResponse::Ok().json(indexes))
@ -49,7 +50,7 @@ pub struct IndexCreateRequest {
}
//pub async fn create_index(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//body: web::Json<IndexCreateRequest>,
//) -> Result<HttpResponse, ResponseError> {
//let body = body.into_inner();
@ -75,30 +76,34 @@ pub struct UpdateIndexResponse {
}
pub async fn get_index(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let meta = data.index(path.index_uid.clone()).await?;
let meta = data.get_index(path.index_uid.clone()).await?;
debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta))
}
pub async fn update_index(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
body: web::Json<UpdateIndexRequest>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let body = body.into_inner();
let settings = IndexSettings {
uid: body.uid,
primary_key: body.primary_key,
};
let meta = data
.update_index(path.into_inner().index_uid, body.primary_key, body.uid)
.update_index(path.into_inner().index_uid, settings)
.await?;
debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta))
}
//pub async fn delete_index(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>,
//) -> Result<HttpResponse, ResponseError> {
//data.delete_index(path.index_uid.clone()).await?;
@ -106,7 +111,7 @@ pub async fn update_index(
//}
pub async fn get_index_stats(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let response = data.get_index_stats(path.index_uid.clone()).await?;

View file

@ -1,13 +1,13 @@
use actix_web::{web, HttpResponse};
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use serde::Deserialize;
use serde_json::Value;
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use crate::routes::IndexParam;
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
@ -82,7 +82,7 @@ impl From<SearchQueryGet> for SearchQuery {
}
pub async fn search_with_url_query(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<SearchQueryGet>,
) -> Result<HttpResponse, ResponseError> {
@ -99,7 +99,7 @@ pub async fn search_with_url_query(
}
pub async fn search_with_post(
data: GuardedData<Public, Data>,
data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Json<SearchQuery>,
) -> Result<HttpResponse, ResponseError> {

View file

@ -148,7 +148,7 @@
//);
//pub async fn update_all(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>,
//body: web::Json<Settings<Unchecked>>,
//) -> Result<HttpResponse, ResponseError> {
@ -162,7 +162,7 @@
//}
//pub async fn get_all(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>,
//) -> Result<HttpResponse, ResponseError> {
//let settings = data.settings(index_uid.into_inner()).await?;
@ -171,7 +171,7 @@
//}
//pub async fn delete_all(
//data: GuardedData<Private, Data>,
//data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>,
//) -> Result<HttpResponse, ResponseError> {
//let settings = Settings::cleared();

View file

@ -1,12 +1,12 @@
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::{IndexParam, UpdateStatusResponse};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(get_all_updates_status)))
@ -37,12 +37,12 @@ pub struct UpdateParam {
}
pub async fn get_update_status(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<UpdateParam>,
) -> Result<HttpResponse, ResponseError> {
let params = path.into_inner();
let meta = data
.get_update_status(params.index_uid, params.update_id)
.update_status(params.index_uid, params.update_id)
.await?;
let meta = UpdateStatusResponse::from(meta);
debug!("returns: {:?}", meta);
@ -50,10 +50,10 @@ pub async fn get_update_status(
}
pub async fn get_all_updates_status(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let metas = data.get_updates_status(path.into_inner().index_uid).await?;
let metas = data.all_update_status(path.into_inner().index_uid).await?;
let metas = metas
.into_iter()
.map(UpdateStatusResponse::from)

View file

@ -5,12 +5,12 @@ use chrono::{DateTime, Utc};
use log::debug;
use serde::{Deserialize, Serialize};
use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate};
use meilisearch_lib::index::{Settings, Unchecked};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{Settings, Unchecked};
use crate::index_controller::update_actor::RegisterUpdate;
use crate::index_controller::{UpdateResult, UpdateStatus};
use crate::{ApiKeys, Data};
use crate::ApiKeys;
mod dump;
mod indexes;
@ -187,15 +187,17 @@ impl From<UpdateStatus> for UpdateStatusResponse {
let duration = Duration::from_millis(duration as u64).as_secs_f64();
let update_id = failed.id();
let response = failed.error;
let processed_at = failed.failed_at;
let enqueued_at = failed.from.from.enqueued_at;
let response = failed.into();
let content = FailedUpdateResult {
update_id,
update_type,
response,
duration,
enqueued_at: failed.from.from.enqueued_at,
processed_at: failed.failed_at,
enqueued_at,
processed_at,
};
UpdateStatusResponse::Failed { content }
}
@ -230,7 +232,7 @@ pub async fn running() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
}
async fn get_stats(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
async fn get_stats(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let response = data.get_all_stats().await?;
debug!("returns: {:?}", response);
@ -245,7 +247,7 @@ struct VersionResponse {
pkg_version: String,
}
async fn get_version(_data: GuardedData<Private, Data>) -> HttpResponse {
async fn get_version(_data: GuardedData<Private, MeiliSearch>) -> HttpResponse {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
@ -288,7 +290,7 @@ mod test {
macro_rules! impl_is_policy {
($($param:ident)*) => {
impl<Policy, Func, $($param,)* Res> Is<Policy, (($($param,)*), Res)> for Func
where Func: Fn(GuardedData<Policy, Data>, $($param,)*) -> Res {}
where Func: Fn(GuardedData<Policy, MeiliSearch>, $($param,)*) -> Res {}
};
}