1711: MeiliSearch refactor introducing OBKV format r=MarinPostma a=MarinPostma

This PR refactor some multiple components of meilisearch, and introduce the obkv document format to meilisearch

- [x] Split meilisearch-http and meilisearch-lib
- [x] Replace `IndexActor` and `UuidResolver` with `IndexResolver`
- [x] Remove mentions to Actor
- [x] Remove Actor traits to simplify code
- [x] Integrate obkv document format
- [x] Remove `Data`
- [x] Restore all route
- [x] Replace `Box<dyn error>` with `anyhow::Error`
- [x] Introduce update file store
- [x] Update file store error handling
- [x] Fix dumps
- [x] Fix snapshots
- [x] Fix tests
- [x] Update module documentation
- [x] add csv suppport (feat `@ManyTheFish` #1729 )
- [x] add jsonl support
- [x] integrate geosearch (feat `@irevoire` #1725) 

partially implements #1691 and #1690. The error handling is very basic now, I will finish it in the next pr.

Some unit tests have been disabled, I will re-enable them ASAP, but they need a bit more work.

close #1531 


P.S: sorry for this monstrous PR :'(

Co-authored-by: mpostma <postma.marin@protonmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
bors[bot] 2021-09-29 14:38:55 +00:00 committed by GitHub
commit 5fad37aebd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
79 changed files with 4060 additions and 4035 deletions

571
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
members = [
"meilisearch-http",
"meilisearch-error",
"meilisearch-lib",
]
resolver = "2"

View File

@ -6,3 +6,4 @@ edition = "2018"
[dependencies]
actix-http = "=3.0.0-beta.10"
serde = { version = "1.0.130", features = ["derive"] }

View File

@ -1,6 +1,7 @@
use std::fmt;
use actix_http::http::StatusCode;
use serde::{Deserialize, Serialize};
pub trait ErrorCode: std::error::Error {
fn error_code(&self) -> Code;
@ -45,6 +46,7 @@ impl fmt::Display for ErrorType {
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code {
// index related error
CreateIndex,
@ -69,6 +71,8 @@ pub enum Code {
BadRequest,
DocumentNotFound,
Internal,
InvalidGeoField,
InvalidRankingRule,
InvalidToken,
MissingAuthorizationHeader,
NotFound,
@ -106,6 +110,8 @@ impl Code {
PrimaryKeyAlreadyPresent => {
ErrCode::invalid("primary_key_already_present", StatusCode::BAD_REQUEST)
}
// invalid ranking rule
InvalidRankingRule => ErrCode::invalid("invalid_request", StatusCode::BAD_REQUEST),
// invalid document
MaxFieldsLimitExceeded => {
@ -124,6 +130,9 @@ impl Code {
BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST),
DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND),
Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR),
InvalidGeoField => {
ErrCode::authentication("invalid_geo_field", StatusCode::BAD_REQUEST)
}
InvalidToken => ErrCode::authentication("invalid_token", StatusCode::FORBIDDEN),
MissingAuthorizationHeader => {
ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED)

View File

@ -25,7 +25,7 @@ zip = { version = "0.5.13", optional = true }
actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" }
actix-web = { version = "4.0.0-beta.9", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
anyhow = "1.0.43"
anyhow = { version = "1.0.43", features = ["backtrace"] }
async-stream = "0.3.2"
async-trait = "0.1.51"
arc-swap = "1.3.2"
@ -44,11 +44,10 @@ http = "0.2.4"
indexmap = { version = "1.7.0", features = ["serde-1"] }
itertools = "0.10.1"
log = "0.4.14"
main_error = "0.1.1"
meilisearch-lib = { path = "../meilisearch-lib" }
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.13.1" }
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.8.0"
@ -73,16 +72,13 @@ obkv = "0.2.0"
pin-project = "1.0.8"
whoami = { version = "1.1.3", optional = true }
reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true }
serdeval = "0.1.0"
sysinfo = "0.20.2"
tokio-stream = "0.1.7"
[dev-dependencies]
actix-rt = "2.2.0"
assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" }
mockall = "0.10.2"
paste = "1.0.5"
serde_url_params = "0.2.1"
tempdir = "0.3.7"
urlencoding = "2.1.0"
[features]

View File

@ -2,10 +2,10 @@ use std::hash::{Hash, Hasher};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::Serialize;
use siphasher::sip::SipHasher;
use crate::Data;
use crate::Opt;
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
@ -18,8 +18,8 @@ struct EventProperties {
}
impl EventProperties {
async fn from(data: Data) -> anyhow::Result<EventProperties> {
let stats = data.index_controller.get_all_stats().await?;
async fn from(data: MeiliSearch) -> anyhow::Result<EventProperties> {
let stats = data.get_all_stats().await?;
let database_size = stats.database_size;
let last_update_timestamp = stats.last_update.map(|u| u.timestamp());
@ -62,7 +62,7 @@ struct AmplitudeRequest<'a> {
events: Vec<Event<'a>>,
}
pub async fn analytics_sender(data: Data, opt: Opt) {
pub async fn analytics_sender(data: MeiliSearch, opt: Opt) {
let username = whoami::username();
let hostname = whoami::hostname();
let platform = whoami::platform();

View File

@ -1,133 +0,0 @@
use std::ops::Deref;
use std::sync::Arc;
use sha2::Digest;
use crate::index::{Checked, Settings};
use crate::index_controller::{
error::Result, DumpInfo, IndexController, IndexMetadata, IndexSettings, IndexStats, Stats,
};
use crate::option::Opt;
pub mod search;
mod updates;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
pub struct DataInner {
pub index_controller: IndexController,
pub api_keys: ApiKeys,
options: Opt,
}
#[derive(Clone)]
pub struct ApiKeys {
pub public: Option<String>,
pub private: Option<String>,
pub master: Option<String>,
}
impl ApiKeys {
pub fn generate_missing_api_keys(&mut self) {
if let Some(master_key) = &self.master {
if self.private.is_none() {
let key = format!("{}-private", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.private = Some(format!("{:x}", sha));
}
if self.public.is_none() {
let key = format!("{}-public", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.public = Some(format!("{:x}", sha));
}
}
}
}
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
let index_controller = IndexController::new(&path, &options)?;
let mut api_keys = ApiKeys {
master: options.clone().master_key,
private: None,
public: None,
};
api_keys.generate_missing_api_keys();
let inner = DataInner {
index_controller,
api_keys,
options,
};
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
self.index_controller.settings(uid).await
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes().await
}
pub async fn index(&self, uid: String) -> Result<IndexMetadata> {
self.index_controller.get_index(uid).await
}
pub async fn create_index(
&self,
uid: String,
primary_key: Option<String>,
) -> Result<IndexMetadata> {
let settings = IndexSettings {
uid: Some(uid),
primary_key,
};
let meta = self.index_controller.create_index(settings).await?;
Ok(meta)
}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
Ok(self.index_controller.get_index_stats(uid).await?)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
Ok(self.index_controller.get_all_stats().await?)
}
pub async fn create_dump(&self) -> Result<DumpInfo> {
Ok(self.index_controller.create_dump().await?)
}
pub async fn dump_status(&self, uid: String) -> Result<DumpInfo> {
Ok(self.index_controller.dump_info(uid).await?)
}
#[inline]
pub fn http_payload_size_limit(&self) -> usize {
self.options.http_payload_size_limit.get_bytes() as usize
}
#[inline]
pub fn api_keys(&self) -> &ApiKeys {
&self.api_keys
}
}

View File

@ -1,34 +0,0 @@
use serde_json::{Map, Value};
use super::Data;
use crate::index::{SearchQuery, SearchResult};
use crate::index_controller::error::Result;
impl Data {
pub async fn search(&self, index: String, search_query: SearchQuery) -> Result<SearchResult> {
self.index_controller.search(index, search_query).await
}
pub async fn retrieve_documents(
&self,
index: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Map<String, Value>>> {
self.index_controller
.documents(index, offset, limit, attributes_to_retrieve)
.await
}
pub async fn retrieve_document(
&self,
index: String,
document_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Map<String, Value>> {
self.index_controller
.document(index, document_id, attributes_to_retrieve)
.await
}
}

View File

@ -1,80 +0,0 @@
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use crate::extractors::payload::Payload;
use crate::index::{Checked, Settings};
use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus};
use crate::Data;
impl Data {
pub async fn add_documents(
&self,
index: String,
method: IndexDocumentsMethod,
format: UpdateFormat,
stream: Payload,
primary_key: Option<String>,
) -> Result<UpdateStatus> {
let update_status = self
.index_controller
.add_documents(index, method, format, stream, primary_key)
.await?;
Ok(update_status)
}
pub async fn update_settings(
&self,
index: String,
settings: Settings<Checked>,
create: bool,
) -> Result<UpdateStatus> {
let update = self
.index_controller
.update_settings(index, settings, create)
.await?;
Ok(update)
}
pub async fn clear_documents(&self, index: String) -> Result<UpdateStatus> {
let update = self.index_controller.clear_documents(index).await?;
Ok(update)
}
pub async fn delete_documents(
&self,
index: String,
document_ids: Vec<String>,
) -> Result<UpdateStatus> {
let update = self
.index_controller
.delete_documents(index, document_ids)
.await?;
Ok(update)
}
pub async fn delete_index(&self, index: String) -> Result<()> {
self.index_controller.delete_index(index).await?;
Ok(())
}
pub async fn get_update_status(&self, index: String, uid: u64) -> Result<UpdateStatus> {
self.index_controller.update_status(index, uid).await
}
pub async fn get_updates_status(&self, index: String) -> Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index).await
}
pub async fn update_index(
&self,
uid: String,
primary_key: Option<String>,
new_uid: Option<String>,
) -> Result<IndexMetadata> {
let settings = IndexSettings {
uid: new_uid,
primary_key,
};
self.index_controller.update_index(uid, settings).await
}
}

View File

@ -7,7 +7,6 @@ use actix_web::http::StatusCode;
use actix_web::HttpResponseBuilder;
use aweb::error::{JsonPayloadError, QueryPayloadError};
use meilisearch_error::{Code, ErrorCode};
use milli::UserError;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize, Clone)]
@ -55,64 +54,6 @@ impl aweb::error::ResponseError for ResponseError {
}
}
macro_rules! internal_error {
($target:ty : $($other:path), *) => {
$(
impl From<$other> for $target {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);
impl Error for MilliError<'_> {}
impl fmt::Display for MilliError<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl ErrorCode for MilliError<'_> {
fn error_code(&self) -> Code {
match self.0 {
milli::Error::InternalError(_) => Code::Internal,
milli::Error::IoError(_) => Code::Internal,
milli::Error::UserError(ref error) => {
match error {
// TODO: wait for spec for new error codes.
UserError::Csv(_)
| UserError::SerdeJson(_)
| UserError::MaxDatabaseSizeReached
| UserError::InvalidCriterionName { .. }
| UserError::InvalidDocumentId { .. }
| UserError::InvalidStoreFile
| UserError::NoSpaceLeftOnDevice
| UserError::InvalidAscDescSyntax { .. }
| UserError::DocumentLimitReached => Code::Internal,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::Filter,
UserError::InvalidFilterAttribute(_) => Code::Filter,
UserError::InvalidSortName { .. } => Code::Sort,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent,
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
UserError::SortRankingRuleMissing => Code::Sort,
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
}
}
}
}
}
impl fmt::Display for PayloadError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {

View File

@ -1,4 +1,3 @@
pub mod compression;
mod env;
pub use env::EnvSizer;

View File

@ -1,351 +0,0 @@
use std::fs::File;
use std::path::PathBuf;
use std::sync::Arc;
use async_stream::stream;
use futures::stream::StreamExt;
use heed::CompactionOption;
use log::debug;
use milli::update::UpdateBuilder;
use tokio::task::spawn_blocking;
use tokio::{fs, sync::mpsc};
use uuid::Uuid;
use crate::index::{
update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings,
};
use crate::index_controller::{
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
};
use crate::option::IndexerOpts;
use super::error::{IndexActorError, Result};
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
pub const CONCURRENT_INDEX_MSG: usize = 10;
pub struct IndexActor<S> {
receiver: Option<mpsc::Receiver<IndexMsg>>,
update_handler: Arc<UpdateHandler>,
store: S,
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
pub fn new(
receiver: mpsc::Receiver<IndexMsg>,
store: S,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let update_handler = UpdateHandler::new(options)?;
let update_handler = Arc::new(update_handler);
let receiver = Some(receiver);
Ok(Self {
receiver,
update_handler,
store,
})
}
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
/// through the read channel are processed concurrently, the messages sent through the write
/// channel are processed one at a time.
pub async fn run(mut self) {
let mut receiver = self
.receiver
.take()
.expect("Index Actor must have a inbox at this point.");
let stream = stream! {
loop {
match receiver.recv().await {
Some(msg) => yield msg,
None => break,
}
}
};
stream
.for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg))
.await;
}
async fn handle_message(&self, msg: IndexMsg) {
use IndexMsg::*;
match msg {
CreateIndex {
uuid,
primary_key,
ret,
} => {
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
}
Update {
ret,
meta,
data,
uuid,
} => {
let _ = ret.send(self.handle_update(uuid, meta, data).await);
}
Search { ret, query, uuid } => {
let _ = ret.send(self.handle_search(uuid, query).await);
}
Settings { ret, uuid } => {
let _ = ret.send(self.handle_settings(uuid).await);
}
Documents {
ret,
uuid,
attributes_to_retrieve,
offset,
limit,
} => {
let _ = ret.send(
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
.await,
);
}
Document {
uuid,
attributes_to_retrieve,
doc_id,
ret,
} => {
let _ = ret.send(
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
.await,
);
}
Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
GetMeta { uuid, ret } => {
let _ = ret.send(self.handle_get_meta(uuid).await);
}
UpdateIndex {
uuid,
index_settings,
ret,
} => {
let _ = ret.send(self.handle_update_index(uuid, index_settings).await);
}
Snapshot { uuid, path, ret } => {
let _ = ret.send(self.handle_snapshot(uuid, path).await);
}
Dump { uuid, path, ret } => {
let _ = ret.send(self.handle_dump(uuid, path).await);
}
GetStats { uuid, ret } => {
let _ = ret.send(self.handle_get_stats(uuid).await);
}
}
}
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result = spawn_blocking(move || index.perform_search(query)).await??;
Ok(result)
}
async fn handle_create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> Result<IndexMeta> {
let index = self.store.create(uuid, primary_key).await?;
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
Ok(meta)
}
async fn handle_update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<File>,
) -> Result<std::result::Result<Processed, Failed>> {
debug!("Processing update {}", meta.id());
let update_handler = self.update_handler.clone();
let index = match self.store.get(uuid).await? {
Some(index) => index,
None => self.store.create(uuid, None).await?,
};
Ok(spawn_blocking(move || update_handler.handle_update(meta, data, index)).await?)
}
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result = spawn_blocking(move || index.settings()).await??;
Ok(result)
}
async fn handle_fetch_documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result =
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
.await??;
Ok(result)
}
async fn handle_fetch_document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result =
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
.await??;
Ok(result)
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let index = self.store.delete(uuid).await?;
if let Some(index) = index {
tokio::task::spawn(async move {
let index = index.0;
let store = get_arc_ownership_blocking(index).await;
spawn_blocking(move || {
store.prepare_for_closing().wait();
debug!("Index closed");
});
});
}
Ok(())
}
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
match self.store.get(uuid).await? {
Some(index) => {
let meta = spawn_blocking(move || IndexMeta::new(&index)).await??;
Ok(meta)
}
None => Err(IndexActorError::UnexistingIndex),
}
}
async fn handle_update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let result = spawn_blocking(move || match index_settings.primary_key {
Some(primary_key) => {
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
return Err(IndexActorError::ExistingPrimaryKey);
}
let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index);
builder.set_primary_key(primary_key);
builder.execute(|_, _| ())?;
let meta = IndexMeta::new_txn(&index, &txn)?;
txn.commit()?;
Ok(meta)
}
None => {
let meta = IndexMeta::new(&index)?;
Ok(meta)
}
})
.await??;
Ok(result)
}
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
use tokio::fs::create_dir_all;
path.push("indexes");
create_dir_all(&path).await?;
if let Some(index) = self.store.get(uuid).await? {
let mut index_path = path.join(format!("index-{}", uuid));
create_dir_all(&index_path).await?;
index_path.push("data.mdb");
spawn_blocking(move || -> Result<()> {
// Get write txn to wait for ongoing write transaction before snapshot.
let _txn = index.write_txn()?;
index
.env
.copy_to_path(index_path, CompactionOption::Enabled)?;
Ok(())
})
.await??;
}
Ok(())
}
/// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the
/// documents and all the settings.
async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
let path = path.join(format!("indexes/index-{}/", uuid));
fs::create_dir_all(&path).await?;
tokio::task::spawn_blocking(move || index.dump(path)).await??;
Ok(())
}
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexActorError::UnexistingIndex)?;
spawn_blocking(move || {
let rtxn = index.read_txn()?;
Ok(IndexStats {
size: index.size(),
number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: None,
field_distribution: index.field_distribution(&rtxn)?,
})
})
.await?
}
}

View File

@ -1,48 +0,0 @@
use meilisearch_error::{Code, ErrorCode};
use crate::{error::MilliError, index::error::IndexError};
pub type Result<T> = std::result::Result<T, IndexActorError>;
#[derive(thiserror::Error, Debug)]
pub enum IndexActorError {
#[error("{0}")]
IndexError(#[from] IndexError),
#[error("Index already exists")]
IndexAlreadyExists,
#[error("Index not found")]
UnexistingIndex,
#[error("A primary key is already present. It's impossible to update it")]
ExistingPrimaryKey,
#[error("Internal Error: {0}")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
Milli(#[from] milli::Error),
}
macro_rules! internal_error {
($($other:path), *) => {
$(
impl From<$other> for IndexActorError {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
internal_error!(heed::Error, tokio::task::JoinError, std::io::Error);
impl ErrorCode for IndexActorError {
fn error_code(&self) -> Code {
match self {
IndexActorError::IndexError(e) => e.error_code(),
IndexActorError::IndexAlreadyExists => Code::IndexAlreadyExists,
IndexActorError::UnexistingIndex => Code::IndexNotFound,
IndexActorError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
IndexActorError::Internal(_) => Code::Internal,
IndexActorError::Milli(e) => MilliError(e).error_code(),
}
}
}

View File

@ -1,164 +0,0 @@
use crate::option::IndexerOpts;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::{
index::Checked,
index_controller::{IndexSettings, IndexStats, Processing},
};
use crate::{
index::{Document, SearchQuery, SearchResult, Settings},
index_controller::{Failed, Processed},
};
use super::error::Result;
use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore};
#[derive(Clone)]
pub struct IndexActorHandleImpl {
sender: mpsc::Sender<IndexMsg>,
}
#[async_trait::async_trait]
impl IndexActorHandle for IndexActorHandleImpl {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::CreateIndex {
ret,
uuid,
primary_key,
};
let _ = self.sender.send(msg).await;
receiver.await.expect("IndexActor has been killed")
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
) -> Result<std::result::Result<Processed, Failed>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update {
ret,
meta,
data,
uuid,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Search { uuid, query, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Settings { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Documents {
uuid,
ret,
offset,
attributes_to_retrieve,
limit,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Document {
uuid,
ret,
doc_id,
attributes_to_retrieve,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetMeta { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::UpdateIndex {
uuid,
index_settings,
ret,
};
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Snapshot { uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Dump { uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetStats { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
}
impl IndexActorHandleImpl {
pub fn new(
path: impl AsRef<Path>,
index_size: usize,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let (sender, receiver) = mpsc::channel(100);
let store = MapIndexStore::new(path, index_size);
let actor = IndexActor::new(receiver, store, options)?;
tokio::task::spawn(actor.run());
Ok(Self { sender })
}
}

View File

@ -1,74 +0,0 @@
use std::path::PathBuf;
use tokio::sync::oneshot;
use uuid::Uuid;
use super::error::Result as IndexResult;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
use super::{IndexMeta, IndexSettings};
#[allow(clippy::large_enum_variant)]
pub enum IndexMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Update {
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>,
},
Search {
uuid: Uuid,
query: SearchQuery,
ret: oneshot::Sender<IndexResult<SearchResult>>,
},
Settings {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<Settings<Checked>>>,
},
Documents {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
offset: usize,
limit: usize,
ret: oneshot::Sender<IndexResult<Vec<Document>>>,
},
Document {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
ret: oneshot::Sender<IndexResult<Document>>,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<()>>,
},
GetMeta {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
UpdateIndex {
uuid: Uuid,
index_settings: IndexSettings,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Snapshot {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<IndexResult<()>>,
},
Dump {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<IndexResult<()>>,
},
GetStats {
uuid: Uuid,
ret: oneshot::Sender<IndexResult<IndexStats>>,
},
}

View File

@ -1,169 +0,0 @@
use std::fs::File;
use std::path::PathBuf;
use chrono::{DateTime, Utc};
#[cfg(test)]
use mockall::automock;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use actor::IndexActor;
pub use actor::CONCURRENT_INDEX_MSG;
pub use handle_impl::IndexActorHandleImpl;
use message::IndexMsg;
use store::{IndexStore, MapIndexStore};
use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings};
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
use error::Result;
use super::IndexSettings;
mod actor;
pub mod error;
mod handle_impl;
mod message;
mod store;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
impl IndexMeta {
fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
Ok(Self {
created_at,
updated_at,
primary_key,
})
}
}
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait IndexActorHandle {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>;
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<File>,
) -> Result<std::result::Result<Processed, Failed>>;
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>;
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>>;
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>>;
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>;
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>;
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use super::*;
#[async_trait::async_trait]
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
) -> Result<std::result::Result<Processed, Failed>> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> Result<Settings<Checked>> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
self.as_ref()
.documents(uuid, offset, limit, attributes_to_retrieve)
.await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
self.as_ref()
.document(uuid, doc_id, attributes_to_retrieve)
.await
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().dump(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
}

View File

@ -1,456 +0,0 @@
use std::collections::BTreeMap;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use actix_web::web::Bytes;
use chrono::{DateTime, Utc};
use futures::stream::StreamExt;
use log::error;
use log::info;
use milli::FieldDistribution;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use tokio::time::sleep;
use uuid::Uuid;
use dump_actor::DumpActorHandle;
pub use dump_actor::{DumpInfo, DumpStatus};
use index_actor::IndexActorHandle;
use snapshot::{load_snapshot, SnapshotService};
use update_actor::UpdateActorHandle;
pub use updates::*;
use uuid_resolver::{error::UuidResolverError, UuidResolverHandle};
use crate::extractors::payload::Payload;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::option::Opt;
use error::Result;
use self::dump_actor::load_dump;
use self::error::IndexControllerError;
mod dump_actor;
pub mod error;
pub mod index_actor;
mod snapshot;
mod update_actor;
mod updates;
mod uuid_resolver;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
#[serde(skip)]
pub uuid: Uuid,
pub uid: String,
name: String,
#[serde(flatten)]
pub meta: index_actor::IndexMeta,
}
#[derive(Clone, Debug)]
pub struct IndexSettings {
pub uid: Option<String>,
pub primary_key: Option<String>,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
#[serde(skip)]
pub size: u64,
pub number_of_documents: u64,
/// Whether the current index is performing an update. It is initially `None` when the
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>,
pub field_distribution: FieldDistribution,
}
#[derive(Clone)]
pub struct IndexController {
uuid_resolver: uuid_resolver::UuidResolverHandleImpl,
index_handle: index_actor::IndexActorHandleImpl,
update_handle: update_actor::UpdateActorHandleImpl<Bytes>,
dump_handle: dump_actor::DumpActorHandleImpl,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
pub last_update: Option<DateTime<Utc>>,
pub indexes: BTreeMap<String, IndexStats>,
}
impl IndexController {
pub fn new(path: impl AsRef<Path>, options: &Opt) -> anyhow::Result<Self> {
let index_size = options.max_index_size.get_bytes() as usize;
let update_store_size = options.max_index_size.get_bytes() as usize;
if let Some(ref path) = options.import_snapshot {
info!("Loading from snapshot {:?}", path);
load_snapshot(
&options.db_path,
path,
options.ignore_snapshot_if_db_exists,
options.ignore_missing_snapshot,
)?;
} else if let Some(ref src_path) = options.import_dump {
load_dump(
&options.db_path,
src_path,
options.max_index_size.get_bytes() as usize,
options.max_udb_size.get_bytes() as usize,
&options.indexer_options,
)?;
}
std::fs::create_dir_all(&path)?;
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?;
let index_handle =
index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?;
let update_handle = update_actor::UpdateActorHandleImpl::new(
index_handle.clone(),
&path,
update_store_size,
)?;
let dump_handle = dump_actor::DumpActorHandleImpl::new(
&options.dumps_dir,
uuid_resolver.clone(),
update_handle.clone(),
options.max_index_size.get_bytes() as usize,
options.max_udb_size.get_bytes() as usize,
)?;
if options.schedule_snapshot {
let snapshot_service = SnapshotService::new(
uuid_resolver.clone(),
update_handle.clone(),
Duration::from_secs(options.snapshot_interval_sec),
options.snapshot_dir.clone(),
options
.db_path
.file_name()
.map(|n| n.to_owned().into_string().expect("invalid path"))
.unwrap_or_else(|| String::from("data.ms")),
);
tokio::task::spawn(snapshot_service.run());
}
Ok(Self {
uuid_resolver,
index_handle,
update_handle,
dump_handle,
})
}
pub async fn add_documents(
&self,
uid: String,
method: milli::update::IndexDocumentsMethod,
format: milli::update::UpdateFormat,
payload: Payload,
primary_key: Option<String>,
) -> Result<UpdateStatus> {
let perform_update = |uuid| async move {
let meta = UpdateMeta::DocumentsAddition {
method,
format,
primary_key,
};
let (sender, receiver) = mpsc::channel(10);
// It is necessary to spawn a local task to send the payload to the update handle to
// prevent dead_locking between the update_handle::update that waits for the update to be
// registered and the update_actor that waits for the the payload to be sent to it.
tokio::task::spawn_local(async move {
payload
.for_each(|r| async {
let _ = sender.send(r).await;
})
.await
});
// This must be done *AFTER* spawning the task.
self.update_handle.update(meta, receiver, uuid).await
};
match self.uuid_resolver.get(uid).await {
Ok(uuid) => Ok(perform_update(uuid).await?),
Err(UuidResolverError::UnexistingIndex(name)) => {
let uuid = Uuid::new_v4();
let status = perform_update(uuid).await?;
// ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
self.uuid_resolver.insert(name, uuid).await?;
Ok(status)
}
Err(e) => Err(e.into()),
}
}
pub async fn clear_documents(&self, uid: String) -> Result<UpdateStatus> {
let uuid = self.uuid_resolver.get(uid).await?;
let meta = UpdateMeta::ClearDocuments;
let (_, receiver) = mpsc::channel(1);
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
pub async fn delete_documents(
&self,
uid: String,
documents: Vec<String>,
) -> Result<UpdateStatus> {
let uuid = self.uuid_resolver.get(uid).await?;
let meta = UpdateMeta::DeleteDocuments { ids: documents };
let (_, receiver) = mpsc::channel(1);
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
pub async fn update_settings(
&self,
uid: String,
settings: Settings<Checked>,
create: bool,
) -> Result<UpdateStatus> {
let perform_udpate = |uuid| async move {
let meta = UpdateMeta::Settings(settings.into_unchecked());
// Nothing so send, drop the sender right away, as not to block the update actor.
let (_, receiver) = mpsc::channel(1);
self.update_handle.update(meta, receiver, uuid).await
};
match self.uuid_resolver.get(uid).await {
Ok(uuid) => Ok(perform_udpate(uuid).await?),
Err(UuidResolverError::UnexistingIndex(name)) if create => {
let uuid = Uuid::new_v4();
let status = perform_udpate(uuid).await?;
// ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
self.uuid_resolver.insert(name, uuid).await?;
Ok(status)
}
Err(e) => Err(e.into()),
}
}
pub async fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata> {
let IndexSettings { uid, primary_key } = index_settings;
let uid = uid.ok_or(IndexControllerError::MissingUid)?;
let uuid = Uuid::new_v4();
let meta = self.index_handle.create_index(uuid, primary_key).await?;
self.uuid_resolver.insert(uid.clone(), uuid).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn delete_index(&self, uid: String) -> Result<()> {
let uuid = self.uuid_resolver.delete(uid).await?;
// We remove the index from the resolver synchronously, and effectively perform the index
// deletion as a background task.
let update_handle = self.update_handle.clone();
let index_handle = self.index_handle.clone();
tokio::spawn(async move {
if let Err(e) = update_handle.delete(uuid).await {
error!("Error while deleting index: {}", e);
}
if let Err(e) = index_handle.delete(uuid).await {
error!("Error while deleting index: {}", e);
}
});
Ok(())
}
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
let uuid = self.uuid_resolver.get(uid).await?;
let result = self.update_handle.update_status(uuid, id).await?;
Ok(result)
}
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
let uuid = self.uuid_resolver.get(uid).await?;
let result = self.update_handle.get_all_updates_status(uuid).await?;
Ok(result)
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
let uuids = self.uuid_resolver.list().await?;
let mut ret = Vec::new();
for (uid, uuid) in uuids {
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
ret.push(meta);
}
Ok(ret)
}
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let settings = self.index_handle.settings(uuid).await?;
Ok(settings)
}
pub async fn documents(
&self,
uid: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let documents = self
.index_handle
.documents(uuid, offset, limit, attributes_to_retrieve)
.await?;
Ok(documents)
}
pub async fn document(
&self,
uid: String,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let document = self
.index_handle
.document(uuid, doc_id, attributes_to_retrieve)
.await?;
Ok(document)
}
pub async fn update_index(
&self,
uid: String,
mut index_settings: IndexSettings,
) -> Result<IndexMetadata> {
if index_settings.uid.is_some() {
index_settings.uid.take();
}
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let meta = self.index_handle.update_index(uuid, index_settings).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
let uuid = self.uuid_resolver.get(uid).await?;
let result = self.index_handle.search(uuid, query).await?;
Ok(result)
}
pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> {
let uuid = self.uuid_resolver.get(uid.clone()).await?;
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn get_uuids_size(&self) -> Result<u64> {
Ok(self.uuid_resolver.get_size().await?)
}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
let uuid = self.uuid_resolver.get(uid).await?;
let update_infos = self.update_handle.get_info().await?;
let mut stats = self.index_handle.get_index_stats(uuid).await?;
// Check if the currently indexing update is from out index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
Ok(stats)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
let update_infos = self.update_handle.get_info().await?;
let mut database_size = self.get_uuids_size().await? + update_infos.size;
let mut last_update: Option<DateTime<_>> = None;
let mut indexes = BTreeMap::new();
for index in self.list_indexes().await? {
let mut index_stats = self.index_handle.get_index_stats(index.uuid).await?;
database_size += index_stats.size;
last_update = last_update.map_or(Some(index.meta.updated_at), |last| {
Some(last.max(index.meta.updated_at))
});
index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing);
indexes.insert(index.uid, index_stats);
}
Ok(Stats {
database_size,
last_update,
indexes,
})
}
pub async fn create_dump(&self) -> Result<DumpInfo> {
Ok(self.dump_handle.create_dump().await?)
}
pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
Ok(self.dump_handle.dump_info(uid).await?)
}
}
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
sleep(Duration::from_millis(100)).await;
continue;
}
}
}
}
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}

View File

@ -1,268 +0,0 @@
use std::path::{Path, PathBuf};
use std::time::Duration;
use anyhow::bail;
use log::{error, info, trace};
use tokio::fs;
use tokio::task::spawn_blocking;
use tokio::time::sleep;
use super::update_actor::UpdateActorHandle;
use super::uuid_resolver::UuidResolverHandle;
use crate::helpers::compression;
pub struct SnapshotService<U, R> {
uuid_resolver_handle: R,
update_handle: U,
snapshot_period: Duration,
snapshot_path: PathBuf,
db_name: String,
}
impl<U, R> SnapshotService<U, R>
where
U: UpdateActorHandle,
R: UuidResolverHandle,
{
pub fn new(
uuid_resolver_handle: R,
update_handle: U,
snapshot_period: Duration,
snapshot_path: PathBuf,
db_name: String,
) -> Self {
Self {
uuid_resolver_handle,
update_handle,
snapshot_period,
snapshot_path,
db_name,
}
}
pub async fn run(self) {
info!(
"Snapshot scheduled every {}s.",
self.snapshot_period.as_secs()
);
loop {
if let Err(e) = self.perform_snapshot().await {
error!("Error while performing snapshot: {}", e);
}
sleep(self.snapshot_period).await;
}
}
async fn perform_snapshot(&self) -> anyhow::Result<()> {
trace!("Performing snapshot.");
let snapshot_dir = self.snapshot_path.clone();
fs::create_dir_all(&snapshot_dir).await?;
let temp_snapshot_dir =
spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??;
let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
let uuids = self
.uuid_resolver_handle
.snapshot(temp_snapshot_path.clone())
.await?;
if uuids.is_empty() {
return Ok(());
}
self.update_handle
.snapshot(uuids, temp_snapshot_path.clone())
.await?;
let snapshot_dir = self.snapshot_path.clone();
let snapshot_path = self
.snapshot_path
.join(format!("{}.snapshot", self.db_name));
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?;
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
temp_snapshot_file.persist(&snapshot_path)?;
Ok(snapshot_path)
})
.await??;
trace!("Created snapshot in {:?}.", snapshot_path);
Ok(())
}
}
pub fn load_snapshot(
db_path: impl AsRef<Path>,
snapshot_path: impl AsRef<Path>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
) -> anyhow::Result<()> {
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
match compression::from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()),
Err(e) => {
// clean created db folder
std::fs::remove_dir_all(&db_path)?;
Err(e)
}
}
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
db_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| db_path.as_ref().to_owned())
)
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
bail!(
"snapshot doesn't exist at {:?}",
snapshot_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
)
} else {
Ok(())
}
}
#[cfg(test)]
mod test {
use std::iter::FromIterator;
use std::{collections::HashSet, sync::Arc};
use futures::future::{err, ok};
use rand::Rng;
use tokio::time::timeout;
use uuid::Uuid;
use super::*;
use crate::index_controller::index_actor::MockIndexActorHandle;
use crate::index_controller::update_actor::{
error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl,
};
use crate::index_controller::uuid_resolver::{
error::UuidResolverError, MockUuidResolverHandle,
};
#[actix_rt::test]
async fn test_normal() {
let mut rng = rand::thread_rng();
let uuids_num: usize = rng.gen_range(5..10);
let uuids = (0..uuids_num)
.map(|_| Uuid::new_v4())
.collect::<HashSet<_>>();
let mut uuid_resolver = MockUuidResolverHandle::new();
let uuids_clone = uuids.clone();
uuid_resolver
.expect_snapshot()
.times(1)
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
let uuids_clone = uuids.clone();
let mut index_handle = MockIndexActorHandle::new();
index_handle
.expect_snapshot()
.withf(move |uuid, _path| uuids_clone.contains(uuid))
.times(uuids_num)
.returning(move |_, _| Box::pin(ok(())));
let dir = tempfile::tempdir_in(".").unwrap();
let handle = Arc::new(index_handle);
let update_handle =
UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
snapshot_service.perform_snapshot().await.unwrap();
}
#[actix_rt::test]
async fn error_performing_uuid_snapshot() {
let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver
.expect_snapshot()
.times(1)
// abitrary error
.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
let update_handle = MockUpdateActorHandle::new();
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
// Nothing was written to the file
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
}
#[actix_rt::test]
async fn error_performing_index_snapshot() {
let uuid = Uuid::new_v4();
let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver
.expect_snapshot()
.times(1)
.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
let mut update_handle = MockUpdateActorHandle::new();
update_handle
.expect_snapshot()
// abitrary error
.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0))));
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
assert!(snapshot_service.perform_snapshot().await.is_err());
// Nothing was written to the file
assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
}
#[actix_rt::test]
async fn test_loop() {
let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver
.expect_snapshot()
// we expect the funtion to be called between 2 and 3 time in the given interval.
.times(2..4)
// abitrary error, to short-circuit the function
.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
let update_handle = MockUpdateActorHandle::new();
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
uuid_resolver,
update_handle,
Duration::from_millis(100),
snapshot_path.path().to_owned(),
"data.ms".to_string(),
);
let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await;
}
}

View File

@ -1,270 +0,0 @@
use std::collections::HashSet;
use std::io::SeekFrom;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use async_stream::stream;
use futures::StreamExt;
use log::trace;
use serdeval::*;
use tokio::fs;
use tokio::io::AsyncWriteExt;
use tokio::sync::mpsc;
use uuid::Uuid;
use super::error::{Result, UpdateActorError};
use super::{PayloadData, UpdateMsg, UpdateStore, UpdateStoreInfo};
use crate::index_controller::index_actor::IndexActorHandle;
use crate::index_controller::{UpdateMeta, UpdateStatus};
pub struct UpdateActor<D, I> {
path: PathBuf,
store: Arc<UpdateStore>,
inbox: Option<mpsc::Receiver<UpdateMsg<D>>>,
index_handle: I,
must_exit: Arc<AtomicBool>,
}
impl<D, I> UpdateActor<D, I>
where
D: AsRef<[u8]> + Sized + 'static,
I: IndexActorHandle + Clone + Send + Sync + 'static,
{
pub fn new(
update_db_size: usize,
inbox: mpsc::Receiver<UpdateMsg<D>>,
path: impl AsRef<Path>,
index_handle: I,
) -> anyhow::Result<Self> {
let path = path.as_ref().join("updates");
std::fs::create_dir_all(&path)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(update_db_size);
let must_exit = Arc::new(AtomicBool::new(false));
let store = UpdateStore::open(options, &path, index_handle.clone(), must_exit.clone())?;
std::fs::create_dir_all(path.join("update_files"))?;
let inbox = Some(inbox);
Ok(Self {
path,
store,
inbox,
index_handle,
must_exit,
})
}
pub async fn run(mut self) {
use UpdateMsg::*;
trace!("Started update actor.");
let mut inbox = self
.inbox
.take()
.expect("A receiver should be present by now.");
let must_exit = self.must_exit.clone();
let stream = stream! {
loop {
let msg = inbox.recv().await;
if must_exit.load(std::sync::atomic::Ordering::Relaxed) {
break;
}
match msg {
Some(msg) => yield msg,
None => break,
}
}
};
stream
.for_each_concurrent(Some(10), |msg| async {
match msg {
Update {
uuid,
meta,
data,
ret,
} => {
let _ = ret.send(self.handle_update(uuid, meta, data).await);
}
ListUpdates { uuid, ret } => {
let _ = ret.send(self.handle_list_updates(uuid).await);
}
GetUpdate { uuid, ret, id } => {
let _ = ret.send(self.handle_get_update(uuid, id).await);
}
Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Snapshot { uuids, path, ret } => {
let _ = ret.send(self.handle_snapshot(uuids, path).await);
}
GetInfo { ret } => {
let _ = ret.send(self.handle_get_info().await);
}
Dump { uuids, path, ret } => {
let _ = ret.send(self.handle_dump(uuids, path).await);
}
}
})
.await;
}
async fn handle_update(
&self,
uuid: Uuid,
meta: UpdateMeta,
payload: mpsc::Receiver<PayloadData<D>>,
) -> Result<UpdateStatus> {
let file_path = match meta {
UpdateMeta::DocumentsAddition { .. } => {
let update_file_id = uuid::Uuid::new_v4();
let path = self
.path
.join(format!("update_files/update_{}", update_file_id));
let mut file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await?;
async fn write_to_file<D>(
file: &mut fs::File,
mut payload: mpsc::Receiver<PayloadData<D>>,
) -> Result<usize>
where
D: AsRef<[u8]> + Sized + 'static,
{
let mut file_len = 0;
while let Some(bytes) = payload.recv().await {
let bytes = bytes?;
file_len += bytes.as_ref().len();
file.write_all(bytes.as_ref()).await?;
}
file.flush().await?;
Ok(file_len)
}
let file_len = write_to_file(&mut file, payload).await;
match file_len {
Ok(len) if len > 0 => {
let file = file.into_std().await;
Some((file, update_file_id))
}
Err(e) => {
fs::remove_file(&path).await?;
return Err(e);
}
_ => {
fs::remove_file(&path).await?;
None
}
}
}
_ => None,
};
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || {
use std::io::{BufReader, Seek};
// If the payload is empty, ignore the check.
let update_uuid = if let Some((mut file, uuid)) = file_path {
// set the file back to the beginning
file.seek(SeekFrom::Start(0))?;
// Check that the json payload is valid:
let reader = BufReader::new(&mut file);
// Validate that the payload is in the correct format.
let _: Seq<Map<Str, Any>> = serde_json::from_reader(reader)
.map_err(|e| UpdateActorError::InvalidPayload(Box::new(e)))?;
Some(uuid)
} else {
None
};
// The payload is valid, we can register it to the update store.
let status = update_store
.register_update(meta, update_uuid, uuid)
.map(UpdateStatus::Enqueued)?;
Ok(status)
})
.await?
}
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = update_store.list(uuid)?;
Ok(result)
})
.await?
}
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = store
.meta(uuid, id)?
.ok_or(UpdateActorError::UnexistingUpdate(id))?;
Ok(result)
})
.await?
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
Ok(())
}
async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle))
.await??;
Ok(())
}
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> Result<()> {
update_store.dump(&uuids, path.to_path_buf(), index_handle)?;
Ok(())
})
.await??;
Ok(())
}
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone();
let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> {
let info = update_store.get_info()?;
Ok(info)
})
.await??;
Ok(info)
}
}

View File

@ -1,61 +0,0 @@
use std::error::Error;
use meilisearch_error::{Code, ErrorCode};
use crate::index_controller::index_actor::error::IndexActorError;
pub type Result<T> = std::result::Result<T, UpdateActorError>;
#[derive(Debug, thiserror::Error)]
#[allow(clippy::large_enum_variant)]
pub enum UpdateActorError {
#[error("Update {0} not found.")]
UnexistingUpdate(u64),
#[error("Internal error: {0}")]
Internal(Box<dyn Error + Send + Sync + 'static>),
#[error("{0}")]
IndexActor(#[from] IndexActorError),
#[error(
"update store was shut down due to a fatal error, please check your logs for more info."
)]
FatalUpdateStoreError,
#[error("{0}")]
InvalidPayload(Box<dyn Error + Send + Sync + 'static>),
#[error("{0}")]
PayloadError(#[from] actix_web::error::PayloadError),
}
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateActorError {
fn from(_: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::FatalUpdateStoreError
}
}
impl From<tokio::sync::oneshot::error::RecvError> for UpdateActorError {
fn from(_: tokio::sync::oneshot::error::RecvError) -> Self {
Self::FatalUpdateStoreError
}
}
internal_error!(
UpdateActorError: heed::Error,
std::io::Error,
serde_json::Error,
tokio::task::JoinError
);
impl ErrorCode for UpdateActorError {
fn error_code(&self) -> Code {
match self {
UpdateActorError::UnexistingUpdate(_) => Code::NotFound,
UpdateActorError::Internal(_) => Code::Internal,
UpdateActorError::IndexActor(e) => e.error_code(),
UpdateActorError::FatalUpdateStoreError => Code::Internal,
UpdateActorError::InvalidPayload(_) => Code::BadRequest,
UpdateActorError::PayloadError(error) => match error {
actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge,
_ => Code::Internal,
},
}
}
}

View File

@ -1,103 +0,0 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::index_controller::{IndexActorHandle, UpdateStatus};
use super::error::Result;
use super::{PayloadData, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStoreInfo};
#[derive(Clone)]
pub struct UpdateActorHandleImpl<D> {
sender: mpsc::Sender<UpdateMsg<D>>,
}
impl<D> UpdateActorHandleImpl<D>
where
D: AsRef<[u8]> + Sized + 'static + Sync + Send,
{
pub fn new<I>(
index_handle: I,
path: impl AsRef<Path>,
update_store_size: usize,
) -> anyhow::Result<Self>
where
I: IndexActorHandle + Clone + Send + Sync + 'static,
{
let path = path.as_ref().to_owned();
let (sender, receiver) = mpsc::channel(100);
let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?;
tokio::task::spawn(actor.run());
Ok(Self { sender })
}
}
#[async_trait::async_trait]
impl<D> UpdateActorHandle for UpdateActorHandleImpl<D>
where
D: AsRef<[u8]> + Sized + 'static + Sync + Send,
{
type Data = D;
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::ListUpdates { uuid, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetUpdate { uuid, id, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Delete { uuid, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Snapshot { uuids, path, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Dump { uuids, path, ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn get_info(&self) -> Result<UpdateStoreInfo> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetInfo { ret };
self.sender.send(msg).await?;
receiver.await?
}
async fn update(
&self,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<Self::Data>>,
uuid: Uuid,
) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Update {
uuid,
data,
meta,
ret,
};
self.sender.send(msg).await?;
receiver.await?
}
}

View File

@ -1,43 +0,0 @@
use std::collections::HashSet;
use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use super::error::Result;
use super::{PayloadData, UpdateMeta, UpdateStatus, UpdateStoreInfo};
pub enum UpdateMsg<D> {
Update {
uuid: Uuid,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<D>>,
ret: oneshot::Sender<Result<UpdateStatus>>,
},
ListUpdates {
uuid: Uuid,
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
},
GetUpdate {
uuid: Uuid,
ret: oneshot::Sender<Result<UpdateStatus>>,
id: u64,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
Snapshot {
uuids: HashSet<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
Dump {
uuids: HashSet<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
GetInfo {
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
},
}

View File

@ -1,44 +0,0 @@
use std::{collections::HashSet, path::PathBuf};
use actix_web::error::PayloadError;
use tokio::sync::mpsc;
use uuid::Uuid;
use crate::index_controller::{UpdateMeta, UpdateStatus};
use actor::UpdateActor;
use error::Result;
use message::UpdateMsg;
pub use handle_impl::UpdateActorHandleImpl;
pub use store::{UpdateStore, UpdateStoreInfo};
mod actor;
pub mod error;
mod handle_impl;
mod message;
pub mod store;
type PayloadData<D> = std::result::Result<D, PayloadError>;
#[cfg(test)]
use mockall::automock;
#[async_trait::async_trait]
#[cfg_attr(test, automock(type Data=Vec<u8>;))]
pub trait UpdateActorHandle {
type Data: AsRef<[u8]> + Sized + 'static + Sync + Send;
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, uuid: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn get_info(&self) -> Result<UpdateStoreInfo>;
async fn update(
&self,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<Self::Data>>,
uuid: Uuid,
) -> Result<UpdateStatus>;
}

View File

@ -1,184 +0,0 @@
use std::{
collections::HashSet,
fs::{create_dir_all, File},
io::{BufRead, BufReader, Write},
path::{Path, PathBuf},
};
use heed::{EnvOpenOptions, RoTxn};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::{Result, State, UpdateStore};
use crate::index_controller::{
index_actor::IndexActorHandle, update_actor::store::update_uuid_to_file_path, Enqueued,
UpdateStatus,
};
#[derive(Serialize, Deserialize)]
struct UpdateEntry {
uuid: Uuid,
update: UpdateStatus,
}
impl UpdateStore {
pub fn dump(
&self,
uuids: &HashSet<Uuid>,
path: PathBuf,
handle: impl IndexActorHandle,
) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Dumping);
// txn must *always* be acquired after state lock, or it will dead lock.
let txn = self.env.write_txn()?;
let dump_path = path.join("updates");
create_dir_all(&dump_path)?;
self.dump_updates(&txn, uuids, &dump_path)?;
let fut = dump_indexes(uuids, handle, &path);
tokio::runtime::Handle::current().block_on(fut)?;
state_lock.swap(State::Idle);
Ok(())
}
fn dump_updates(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
path: impl AsRef<Path>,
) -> Result<()> {
let dump_data_path = path.as_ref().join("data.jsonl");
let mut dump_data_file = File::create(dump_data_path)?;
let update_files_path = path.as_ref().join(super::UPDATE_DIR);
create_dir_all(&update_files_path)?;
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
self.dump_completed(txn, uuids, &mut dump_data_file)?;
Ok(())
}
fn dump_pending(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
mut file: &mut File,
dst_path: impl AsRef<Path>,
) -> Result<()> {
let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
for pending in pendings {
let ((_, uuid, _), data) = pending?;
if uuids.contains(&uuid) {
let update = data.decode()?;
if let Some(ref update_uuid) = update.content {
let src = super::update_uuid_to_file_path(&self.path, *update_uuid);
let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid);
std::fs::copy(src, dst)?;
}
let update_json = UpdateEntry {
uuid,
update: update.into(),
};
serde_json::to_writer(&mut file, &update_json)?;
file.write_all(b"\n")?;
}
}
Ok(())
}
fn dump_completed(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
mut file: &mut File,
) -> Result<()> {
let updates = self.updates.iter(txn)?.lazily_decode_data();
for update in updates {
let ((uuid, _), data) = update?;
if uuids.contains(&uuid) {
let update = data.decode()?;
let update_json = UpdateEntry { uuid, update };
serde_json::to_writer(&mut file, &update_json)?;
file.write_all(b"\n")?;
}
}
Ok(())
}
pub fn load_dump(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
db_size: usize,
) -> anyhow::Result<()> {
let dst_update_path = dst.as_ref().join("updates/");
create_dir_all(&dst_update_path)?;
let mut options = EnvOpenOptions::new();
options.map_size(db_size as usize);
let (store, _) = UpdateStore::new(options, &dst_update_path)?;
let src_update_path = src.as_ref().join("updates");
let update_data = File::open(&src_update_path.join("data.jsonl"))?;
let mut update_data = BufReader::new(update_data);
std::fs::create_dir_all(dst_update_path.join("update_files/"))?;
let mut wtxn = store.env.write_txn()?;
let mut line = String::new();
loop {
match update_data.read_line(&mut line) {
Ok(0) => break,
Ok(_) => {
let UpdateEntry { uuid, update } = serde_json::from_str(&line)?;
store.register_raw_updates(&mut wtxn, &update, uuid)?;
// Copy ascociated update path if it exists
if let UpdateStatus::Enqueued(Enqueued {
content: Some(uuid),
..
}) = update
{
let src = update_uuid_to_file_path(&src_update_path, uuid);
let dst = update_uuid_to_file_path(&dst_update_path, uuid);
std::fs::copy(src, dst)?;
}
}
_ => break,
}
line.clear();
}
wtxn.commit()?;
Ok(())
}
}
async fn dump_indexes(
uuids: &HashSet<Uuid>,
handle: impl IndexActorHandle,
path: impl AsRef<Path>,
) -> Result<()> {
for uuid in uuids {
handle.dump(*uuid, path.as_ref().to_owned()).await?;
}
Ok(())
}

View File

@ -1,98 +0,0 @@
use std::{collections::HashSet, path::PathBuf};
use log::{trace, warn};
use tokio::sync::mpsc;
use uuid::Uuid;
use super::{error::UuidResolverError, Result, UuidResolveMsg, UuidStore};
pub struct UuidResolverActor<S> {
inbox: mpsc::Receiver<UuidResolveMsg>,
store: S,
}
impl<S: UuidStore> UuidResolverActor<S> {
pub fn new(inbox: mpsc::Receiver<UuidResolveMsg>, store: S) -> Self {
Self { inbox, store }
}
pub async fn run(mut self) {
use UuidResolveMsg::*;
trace!("uuid resolver started");
loop {
match self.inbox.recv().await {
Some(Get { uid: name, ret }) => {
let _ = ret.send(self.handle_get(name).await);
}
Some(Delete { uid: name, ret }) => {
let _ = ret.send(self.handle_delete(name).await);
}
Some(List { ret }) => {
let _ = ret.send(self.handle_list().await);
}
Some(Insert { ret, uuid, name }) => {
let _ = ret.send(self.handle_insert(name, uuid).await);
}
Some(SnapshotRequest { path, ret }) => {
let _ = ret.send(self.handle_snapshot(path).await);
}
Some(GetSize { ret }) => {
let _ = ret.send(self.handle_get_size().await);
}
Some(DumpRequest { path, ret }) => {
let _ = ret.send(self.handle_dump(path).await);
}
// all senders have been dropped, need to quit.
None => break,
}
}
warn!("exiting uuid resolver loop");
}
async fn handle_get(&self, uid: String) -> Result<Uuid> {
self.store
.get_uuid(uid.clone())
.await?
.ok_or(UuidResolverError::UnexistingIndex(uid))
}
async fn handle_delete(&self, uid: String) -> Result<Uuid> {
self.store
.delete(uid.clone())
.await?
.ok_or(UuidResolverError::UnexistingIndex(uid))
}
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
let result = self.store.list().await?;
Ok(result)
}
async fn handle_snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
self.store.snapshot(path).await
}
async fn handle_dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
self.store.dump(path).await
}
async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> {
if !is_index_uid_valid(&uid) {
return Err(UuidResolverError::BadlyFormatted(uid));
}
self.store.insert(uid, uuid).await?;
Ok(())
}
async fn handle_get_size(&self) -> Result<u64> {
self.store.get_size().await
}
}
fn is_index_uid_valid(uid: &str) -> bool {
uid.chars()
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
}

View File

@ -1,34 +0,0 @@
use meilisearch_error::{Code, ErrorCode};
pub type Result<T> = std::result::Result<T, UuidResolverError>;
#[derive(Debug, thiserror::Error)]
pub enum UuidResolverError {
#[error("Index already exists.")]
NameAlreadyExist,
#[error("Index \"{0}\" not found.")]
UnexistingIndex(String),
#[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")]
BadlyFormatted(String),
#[error("Internal error: {0}")]
Internal(Box<dyn std::error::Error + Sync + Send + 'static>),
}
internal_error!(
UuidResolverError: heed::Error,
uuid::Error,
std::io::Error,
tokio::task::JoinError,
serde_json::Error
);
impl ErrorCode for UuidResolverError {
fn error_code(&self) -> Code {
match self {
UuidResolverError::NameAlreadyExist => Code::IndexAlreadyExists,
UuidResolverError::UnexistingIndex(_) => Code::IndexNotFound,
UuidResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
UuidResolverError::Internal(_) => Code::Internal,
}
}
}

View File

@ -1,87 +0,0 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use super::{HeedUuidStore, Result, UuidResolveMsg, UuidResolverActor, UuidResolverHandle};
#[derive(Clone)]
pub struct UuidResolverHandleImpl {
sender: mpsc::Sender<UuidResolveMsg>,
}
impl UuidResolverHandleImpl {
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let (sender, reveiver) = mpsc::channel(100);
let store = HeedUuidStore::new(path)?;
let actor = UuidResolverActor::new(reveiver, store);
tokio::spawn(actor.run());
Ok(Self { sender })
}
}
#[async_trait::async_trait]
impl UuidResolverHandle for UuidResolverHandleImpl {
async fn get(&self, name: String) -> Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Get { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn delete(&self, name: String) -> Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Delete { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::List { ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Insert { ret, name, uuid };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::SnapshotRequest { path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn get_size(&self) -> Result<u64> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::GetSize { ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::DumpRequest { ret, path };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
}

View File

@ -1,35 +0,0 @@
mod actor;
pub mod error;
mod handle_impl;
mod message;
pub mod store;
use std::collections::HashSet;
use std::path::PathBuf;
use uuid::Uuid;
use actor::UuidResolverActor;
use error::Result;
use message::UuidResolveMsg;
use store::UuidStore;
#[cfg(test)]
use mockall::automock;
pub use handle_impl::UuidResolverHandleImpl;
pub use store::HeedUuidStore;
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait UuidResolverHandle {
async fn get(&self, name: String) -> Result<Uuid>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn delete(&self, name: String) -> Result<Uuid>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
}

View File

@ -1,44 +1,4 @@
//! # MeiliSearch
//! Hello there, future contributors. If you are here and see this code, it's probably because you want to add a super new fancy feature in MeiliSearch or fix a bug and first of all, thank you for that!
//!
//! To help you in this task, we'll try to do a little overview of the project.
//! ## Milli
//! [Milli](https://github.com/meilisearch/milli) is the core library of MeiliSearch. It's where we actually index documents and perform searches. Its purpose is to do these two tasks as fast as possible. You can give an update to milli, and it'll uses as many cores as provided to perform it as fast as possible. Nothing more. You can perform searches at the same time (search only uses one core).
//! As you can see, we're missing quite a lot of features here; milli does not handle multiples indexes, it can't queue updates, it doesn't provide any web / API frontend, it doesn't implement dumps or snapshots, etc...
//!
//! ## `Index` module
//! The [index] module is what encapsulates one milli index. It abstracts over its transaction and isolates a task that can be run into a thread. This is the unit of interaction with milli.
//! If you add a feature to milli, you'll probably need to add it in this module too before exposing it to the rest of meilisearch.
//!
//! ## `IndexController` module
//! To handle multiple indexes, we created an [index_controller]. It's in charge of creating new indexes, keeping references to all its indexes, forward asynchronous updates to its indexes, and provide an API to search in its indexes synchronously.
//! To achieves this goal, we use an [actor model](https://en.wikipedia.org/wiki/Actor_model).
//!
//! ### The actor model
//! Every actor is composed of at least three files:
//! - `mod.rs` declare and import all the files used by the actor. We also describe the interface (= all the methods) used to interact with the actor. If you are not modifying anything inside of an actor, this is usually all you need to see.
//! - `handle_impl.rs` implements the interface described in the `mod.rs`; in reality, there is no code logic in this file. Every method is only wrapping its parameters in a structure that is sent to the actor. This is useful for test and futureproofing.
//! - `message.rs` contains an enum that describes all the interactions you can have with the actor.
//! - `actor.rs` is used to create and execute the actor. It's where we'll write the loop looking for new messages and actually perform the tasks.
//!
//! MeiliSearch currently uses four actors:
//! - [`uuid_resolver`](index_controller/uuid_resolver/index.html) hold the association between the user-provided indexes name and the internal [`uuid`](https://en.wikipedia.org/wiki/Universally_unique_identifier) representation we use.
//! - [`index_actor`](index_controller::index_actor) is our representation of multiples indexes. Any request made to MeiliSearch that needs to talk to milli will pass through this actor.
//! - [`update_actor`](index_controller/update_actor/index.html) is in charge of indexes updates. Since updates can take a long time to receive and process, we need to:
//! 1. Store them as fast as possible so we can continue to receive other updates even if nothing has been processed
//! 2. Feed the `index_actor` with a new update every time it finished its current job.
//! - [`dump_actor`](index_controller/dump_actor/index.html) this actor handle the [dumps](https://docs.meilisearch.com/reference/api/dump.html). It needs to contact all the others actors and create a dump of everything that was currently happening.
//!
//! ## Data module
//! The [data] module provide a unified interface to communicate with the index controller and other services (snapshot, dumps, ...), initialize the MeiliSearch instance
//!
//! ## HTTP server
//! To handle the web and API part, we are using [actix-web](https://docs.rs/actix-web/); you can find all routes in the [routes] module.
//! Currently, the configuration of actix-web is made in the [lib.rs](crate).
//! Most of the routes use [extractors] to handle the authentication.
#![allow(rustdoc::private_intra_doc_links)]
pub mod data;
#[macro_use]
pub mod error;
#[macro_use]
@ -46,11 +6,11 @@ pub mod extractors;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub mod analytics;
pub mod helpers;
mod index;
mod index_controller;
pub mod option;
pub mod routes;
pub use self::data::Data;
use std::path::Path;
use std::time::Duration;
use crate::extractors::authentication::AuthConfig;
pub use option::Opt;
@ -58,11 +18,83 @@ use actix_web::web;
use extractors::authentication::policies::*;
use extractors::payload::PayloadConfig;
use meilisearch_lib::MeiliSearch;
use sha2::Digest;
pub fn configure_data(config: &mut web::ServiceConfig, data: Data) {
let http_payload_size_limit = data.http_payload_size_limit();
#[derive(Clone)]
pub struct ApiKeys {
pub public: Option<String>,
pub private: Option<String>,
pub master: Option<String>,
}
impl ApiKeys {
pub fn generate_missing_api_keys(&mut self) {
if let Some(master_key) = &self.master {
if self.private.is_none() {
let key = format!("{}-private", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.private = Some(format!("{:x}", sha));
}
if self.public.is_none() {
let key = format!("{}-public", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.public = Some(format!("{:x}", sha));
}
}
}
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
let mut meilisearch = MeiliSearch::builder();
meilisearch
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
.set_max_update_store_size(opt.max_udb_size.get_bytes() as usize)
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
.set_dump_dst(opt.dumps_dir.clone())
.set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec))
.set_snapshot_dir(opt.snapshot_dir.clone());
if let Some(ref path) = opt.import_snapshot {
meilisearch.set_import_snapshot(path.clone());
}
if let Some(ref path) = opt.import_dump {
meilisearch.set_dump_src(path.clone());
}
if opt.schedule_snapshot {
meilisearch.set_schedule_snapshot();
}
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
}
/// Cleans and setup the temporary file folder in the database directory. This must be done after
/// the meilisearch instance has been created, to not interfere with the snapshot and dump loading.
pub fn setup_temp_dir(db_path: impl AsRef<Path>) -> anyhow::Result<()> {
// Set the tempfile directory in the current db path, to avoid cross device references. Also
// remove the previous outstanding files found there
//
// TODO: if two processes open the same db, one might delete the other tmpdir. Need to make
// sure that no one is using it before deleting it.
let temp_path = db_path.as_ref().join("tmp");
// Ignore error if tempdir doesn't exist
let _ = std::fs::remove_dir_all(&temp_path);
std::fs::create_dir_all(&temp_path)?;
if cfg!(windows) {
std::env::set_var("TMP", temp_path);
} else {
std::env::set_var("TMPDIR", temp_path);
}
Ok(())
}
pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config
.app_data(web::Data::new(data.clone()))
.app_data(data)
.app_data(
web::JsonConfig::default()
@ -77,8 +109,15 @@ pub fn configure_data(config: &mut web::ServiceConfig, data: Data) {
);
}
pub fn configure_auth(config: &mut web::ServiceConfig, data: &Data) {
let keys = data.api_keys();
pub fn configure_auth(config: &mut web::ServiceConfig, opts: &Opt) {
let mut keys = ApiKeys {
master: opts.master_key.clone(),
private: None,
public: None,
};
keys.generate_missing_api_keys();
let auth_config = if let Some(ref master_key) = keys.master {
let private_key = keys.private.as_ref().unwrap();
let public_key = keys.public.as_ref().unwrap();
@ -94,7 +133,7 @@ pub fn configure_auth(config: &mut web::ServiceConfig, data: &Data) {
AuthConfig::NoAuth
};
config.app_data(auth_config);
config.app_data(auth_config).app_data(keys);
}
#[cfg(feature = "mini-dashboard")]
@ -138,7 +177,7 @@ pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
#[macro_export]
macro_rules! create_app {
($data:expr, $enable_frontend:expr) => {{
($data:expr, $enable_frontend:expr, $opt:expr) => {{
use actix_cors::Cors;
use actix_web::middleware::TrailingSlash;
use actix_web::App;
@ -147,8 +186,8 @@ macro_rules! create_app {
use meilisearch_http::{configure_auth, configure_data, dashboard};
App::new()
.configure(|s| configure_data(s, $data.clone()))
.configure(|s| configure_auth(s, &$data))
.configure(|s| configure_data(s, $data.clone(), &$opt))
.configure(|s| configure_auth(s, &$opt))
.configure(routes::configure)
.configure(|s| dashboard(s, $enable_frontend))
.wrap(

View File

@ -1,8 +1,8 @@
use std::env;
use actix_web::HttpServer;
use main_error::MainError;
use meilisearch_http::{create_app, Data, Opt};
use meilisearch_http::{create_app, setup_meilisearch, Opt};
use meilisearch_lib::MeiliSearch;
use structopt::StructOpt;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
@ -12,10 +12,8 @@ use meilisearch_http::analytics;
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[actix_web::main]
async fn main() -> Result<(), MainError> {
let opt = Opt::from_args();
/// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<()> {
let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters(&opt.log_level);
if opt.log_level == "info" {
@ -25,38 +23,51 @@ async fn main() -> Result<(), MainError> {
log_builder.init();
Ok(())
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
setup(&opt)?;
match opt.env.as_ref() {
"production" => {
if opt.master_key.is_none() {
return Err(
anyhow::bail!(
"In production mode, the environment variable MEILI_MASTER_KEY is mandatory"
.into(),
);
)
}
}
"development" => (),
_ => unreachable!(),
}
let data = Data::new(opt.clone())?;
let meilisearch = setup_meilisearch(&opt)?;
// Setup the temp directory to be in the db folder. This is important, since temporary file
// don't support to be persisted accross filesystem boundaries.
meilisearch_http::setup_temp_dir(&opt.db_path)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
if !opt.no_analytics {
let analytics_data = data.clone();
let analytics_data = meilisearch.clone();
let analytics_opt = opt.clone();
tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt));
}
print_launch_resume(&opt, &data);
print_launch_resume(&opt);
run_http(data, opt).await?;
run_http(meilisearch, opt).await?;
Ok(())
}
async fn run_http(data: Data, opt: Opt) -> Result<(), Box<dyn std::error::Error>> {
async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> {
let _enable_dashboard = &opt.env == "development";
let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard))
let opt_clone = opt.clone();
let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone))
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals();
@ -66,12 +77,12 @@ async fn run_http(data: Data, opt: Opt) -> Result<(), Box<dyn std::error::Error>
.run()
.await?;
} else {
http_server.bind(opt.http_addr)?.run().await?;
http_server.bind(&opt.http_addr)?.run().await?;
}
Ok(())
}
pub fn print_launch_resume(opt: &Opt, data: &Data) {
pub fn print_launch_resume(opt: &Opt) {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
@ -116,7 +127,7 @@ Anonymous telemetry: \"Enabled\""
eprintln!();
if data.api_keys().master.is_some() {
if opt.master_key.is_some() {
eprintln!("A Master Key has been set. Requests to MeiliSearch won't be authorized unless you provide an authentication key.");
} else {
eprintln!("No master key found; The server will accept unidentified requests. \

View File

@ -1,71 +1,16 @@
use byte_unit::ByteError;
use std::fmt;
use std::fs;
use std::io::{BufReader, Read};
use std::ops::Deref;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::{error, fs};
use byte_unit::Byte;
use milli::CompressionType;
use meilisearch_lib::options::IndexerOpts;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
RootCertStore,
};
use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
}
}
}
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
@ -184,7 +129,7 @@ pub struct Opt {
}
impl Opt {
pub fn get_ssl_config(&self) -> Result<Option<rustls::ServerConfig>, Box<dyn error::Error>> {
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let client_auth = match &self.ssl_auth_path {
Some(auth_path) => {
@ -210,7 +155,7 @@ impl Opt {
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
config
.set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
.map_err(|_| "bad certificates/private key")?;
.map_err(|_| anyhow::anyhow!("bad certificates/private key"))?;
if self.ssl_resumption {
config.set_persistence(rustls::ServerSessionMemoryCache::new(256));
@ -227,82 +172,31 @@ impl Opt {
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory)
}
}
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(
total_memory_bytes()
.map(|bytes| bytes * 2 / 3)
.map(Byte::from_bytes),
)
}
}
impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
None => f.write_str("unknown"),
}
}
}
impl Deref for MaxMemory {
type Target = Option<Byte>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl MaxMemory {
pub fn unlimited() -> Self {
Self(None)
}
}
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory() * 1024) // KiB into bytes
} else {
None
}
}
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
fn load_certs(filename: PathBuf) -> anyhow::Result<Vec<rustls::Certificate>> {
let certfile =
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;
let mut reader = BufReader::new(certfile);
Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?)
certs(&mut reader).map_err(|_| anyhow::anyhow!("cannot read certificate file"))
}
fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn error::Error>> {
fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
let rsa_keys = {
let keyfile =
fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?;
let keyfile = fs::File::open(filename.clone())
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
let mut reader = BufReader::new(keyfile);
rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")?
rsa_private_keys(&mut reader)
.map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))?
};
let pkcs8_keys = {
let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?;
let keyfile = fs::File::open(filename)
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
let mut reader = BufReader::new(keyfile);
pkcs8_private_keys(&mut reader)
.map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")?
pkcs8_private_keys(&mut reader).map_err(|_| {
anyhow::anyhow!(
"file contains invalid pkcs8 private key (encrypted keys not supported)"
)
})?
};
// prefer to load pkcs8 keys
@ -314,14 +208,14 @@ fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn err
}
}
fn load_ocsp(filename: &Option<PathBuf>) -> Result<Vec<u8>, Box<dyn error::Error>> {
fn load_ocsp(filename: &Option<PathBuf>) -> anyhow::Result<Vec<u8>> {
let mut ret = Vec::new();
if let Some(ref name) = filename {
fs::File::open(name)
.map_err(|_| "cannot open ocsp file")?
.map_err(|_| anyhow::anyhow!("cannot open ocsp file"))?
.read_to_end(&mut ret)
.map_err(|_| "cannot read oscp file")?;
.map_err(|_| anyhow::anyhow!("cannot read oscp file"))?;
}
Ok(ret)

View File

@ -1,18 +1,20 @@
use actix_web::{web, HttpResponse};
use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(create_dump)))
.service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status)));
}
pub async fn create_dump(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
let res = data.create_dump().await?;
pub async fn create_dump(
meilisearch: GuardedData<Private, MeiliSearch>,
) -> Result<HttpResponse, ResponseError> {
let res = meilisearch.create_dump().await?;
debug!("returns: {:?}", res);
Ok(HttpResponse::Accepted().json(res))
@ -30,10 +32,10 @@ struct DumpParam {
}
async fn get_dump_status(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<DumpParam>,
) -> Result<HttpResponse, ResponseError> {
let res = data.dump_status(path.dump_uid.clone()).await?;
let res = meilisearch.dump_info(path.dump_uid.clone()).await?;
debug!("returns: {:?}", res);
Ok(HttpResponse::Ok().json(res))

View File

@ -1,19 +1,23 @@
use actix_web::error::PayloadError;
use actix_web::web::Bytes;
use actix_web::{web, HttpResponse};
use futures::{Stream, StreamExt};
use log::debug;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update};
use meilisearch_lib::milli::update::IndexDocumentsMethod;
use meilisearch_lib::MeiliSearch;
use serde::Deserialize;
use serde_json::Value;
use tokio::sync::mpsc;
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::payload::Payload;
use crate::routes::IndexParam;
use crate::Data;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
/*
macro_rules! guard_content_type {
($fn_name:ident, $guard_value:literal) => {
fn $fn_name(head: &actix_web::dev::RequestHead) -> bool {
@ -30,22 +34,22 @@ macro_rules! guard_content_type {
}
guard_content_type!(guard_json, "application/json");
*/
guard_content_type!(guard_csv, "application/csv");
guard_content_type!(guard_ndjson, "application/ndjson");
fn guard_json(head: &actix_web::dev::RequestHead) -> bool {
if let Some(_content_type) = head.headers.get("Content-Type") {
// CURRENTLY AND FOR THIS RELEASE ONLY WE DECIDED TO INTERPRET ALL CONTENT-TYPES AS JSON
true
/*
content_type
.to_str()
.map(|v| v.contains("application/json"))
.unwrap_or(false)
*/
} else {
// if no content-type is specified we still accept the data as json!
true
}
fn empty_application_type(head: &actix_web::dev::RequestHead) -> bool {
head.headers.get("Content-Type").is_none()
}
/// This is required because Payload is not Sync nor Send
fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, PayloadError>> {
let (snd, recv) = mpsc::channel(1);
tokio::task::spawn_local(async move {
while let Some(data) = payload.next().await {
let _ = snd.send(data).await;
}
});
tokio_stream::wrappers::ReceiverStream::new(recv)
}
#[derive(Deserialize)]
@ -58,8 +62,26 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(get_all_documents))
.route(web::post().guard(guard_json).to(add_documents))
.route(web::put().guard(guard_json).to(update_documents))
// replace documents routes
.route(
web::post()
.guard(empty_application_type)
.to(HttpResponse::UnsupportedMediaType),
)
.route(web::post().guard(guard_json).to(add_documents_json))
.route(web::post().guard(guard_ndjson).to(add_documents_ndjson))
.route(web::post().guard(guard_csv).to(add_documents_csv))
.route(web::post().to(HttpResponse::UnsupportedMediaType))
// update documents routes
.route(
web::put()
.guard(empty_application_type)
.to(HttpResponse::UnsupportedMediaType),
)
.route(web::put().guard(guard_json).to(update_documents_json))
.route(web::put().guard(guard_ndjson).to(update_documents_ndjson))
.route(web::put().guard(guard_csv).to(update_documents_csv))
.route(web::put().to(HttpResponse::UnsupportedMediaType))
.route(web::delete().to(clear_all_documents)),
)
// this route needs to be before the /documents/{document_id} to match properly
@ -72,24 +94,29 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
}
pub async fn get_document(
data: GuardedData<Public, Data>,
meilisearch: GuardedData<Public, MeiliSearch>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone();
let id = path.document_id.clone();
let document = data
.retrieve_document(index, id, None as Option<Vec<String>>)
let document = meilisearch
.document(index, id, None as Option<Vec<String>>)
.await?;
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
pub async fn delete_document(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
let update_status = data
.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()])
let DocumentParam {
document_id,
index_uid,
} = path.into_inner();
let update = Update::DeleteDocuments(vec![document_id]);
let update_status = meilisearch
.register_update(index_uid, update, false)
.await?;
debug!("returns: {:?}", update_status);
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
@ -104,7 +131,7 @@ pub struct BrowseQuery {
}
pub async fn get_all_documents(
data: GuardedData<Public, Data>,
meilisearch: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
@ -120,8 +147,8 @@ pub async fn get_all_documents(
Some(names)
});
let documents = data
.retrieve_documents(
let documents = meilisearch
.documents(
path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
@ -138,54 +165,134 @@ pub struct UpdateDocumentsQuery {
primary_key: Option<String>,
}
/// Route used when the payload type is "application/json"
/// Used to add or replace documents
pub async fn add_documents(
data: GuardedData<Private, Data>,
pub async fn add_documents_json(
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
document_addition(
meilisearch,
path,
params,
body,
DocumentAdditionFormat::Json,
IndexDocumentsMethod::ReplaceDocuments,
)
.await
}
pub async fn add_documents_ndjson(
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
document_addition(
meilisearch,
path,
params,
body,
DocumentAdditionFormat::Ndjson,
IndexDocumentsMethod::ReplaceDocuments,
)
.await
}
pub async fn add_documents_csv(
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
document_addition(
meilisearch,
path,
params,
body,
DocumentAdditionFormat::Csv,
IndexDocumentsMethod::ReplaceDocuments,
)
.await
}
pub async fn update_documents_json(
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
document_addition(
meilisearch,
path,
params,
body,
DocumentAdditionFormat::Json,
IndexDocumentsMethod::UpdateDocuments,
)
.await
}
pub async fn update_documents_ndjson(
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
document_addition(
meilisearch,
path,
params,
body,
DocumentAdditionFormat::Ndjson,
IndexDocumentsMethod::UpdateDocuments,
)
.await
}
pub async fn update_documents_csv(
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
document_addition(
meilisearch,
path,
params,
body,
DocumentAdditionFormat::Csv,
IndexDocumentsMethod::UpdateDocuments,
)
.await
}
/// Route used when the payload type is "application/json"
/// Used to add or replace documents
async fn document_addition(
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
format: DocumentAdditionFormat,
method: IndexDocumentsMethod,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let update_status = data
.add_documents(
path.into_inner().index_uid,
IndexDocumentsMethod::ReplaceDocuments,
UpdateFormat::Json,
body,
params.primary_key.clone(),
)
let update = Update::DocumentAddition {
payload: Box::new(payload_to_stream(body)),
primary_key: params.primary_key.clone(),
method,
format,
};
let update_status = meilisearch
.register_update(path.into_inner().index_uid, update, true)
.await?;
debug!("returns: {:?}", update_status);
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
/// Route used when the payload type is "application/json"
/// Used to add or replace documents
pub async fn update_documents(
data: GuardedData<Private, Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let update = data
.add_documents(
path.into_inner().index_uid,
IndexDocumentsMethod::UpdateDocuments,
UpdateFormat::Json,
body,
params.primary_key.clone(),
)
.await?;
debug!("returns: {:?}", update);
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update.id() })))
}
pub async fn delete_documents(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
body: web::Json<Vec<Value>>,
) -> Result<HttpResponse, ResponseError> {
@ -199,16 +306,22 @@ pub async fn delete_documents(
})
.collect();
let update_status = data.delete_documents(path.index_uid.clone(), ids).await?;
let update = Update::DeleteDocuments(ids);
let update_status = meilisearch
.register_update(path.into_inner().index_uid, update, false)
.await?;
debug!("returns: {:?}", update_status);
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
pub async fn clear_all_documents(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let update_status = data.clear_documents(path.index_uid.clone()).await?;
let update = Update::ClearDocuments;
let update_status = meilisearch
.register_update(path.into_inner().index_uid, update, false)
.await?;
debug!("returns: {:?}", update_status);
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}

View File

@ -1,12 +1,13 @@
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::index_controller::IndexSettings;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::IndexParam;
use crate::Data;
pub mod documents;
pub mod search;
@ -35,7 +36,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
);
}
pub async fn list_indexes(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
pub async fn list_indexes(
data: GuardedData<Private, MeiliSearch>,
) -> Result<HttpResponse, ResponseError> {
let indexes = data.list_indexes().await?;
debug!("returns: {:?}", indexes);
Ok(HttpResponse::Ok().json(indexes))
@ -49,11 +52,11 @@ pub struct IndexCreateRequest {
}
pub async fn create_index(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
body: web::Json<IndexCreateRequest>,
) -> Result<HttpResponse, ResponseError> {
let body = body.into_inner();
let meta = data.create_index(body.uid, body.primary_key).await?;
let meta = meilisearch.create_index(body.uid, body.primary_key).await?;
Ok(HttpResponse::Created().json(meta))
}
@ -75,41 +78,45 @@ pub struct UpdateIndexResponse {
}
pub async fn get_index(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let meta = data.index(path.index_uid.clone()).await?;
let meta = meilisearch.get_index(path.index_uid.clone()).await?;
debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta))
}
pub async fn update_index(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
body: web::Json<UpdateIndexRequest>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let body = body.into_inner();
let meta = data
.update_index(path.into_inner().index_uid, body.primary_key, body.uid)
let settings = IndexSettings {
uid: body.uid,
primary_key: body.primary_key,
};
let meta = meilisearch
.update_index(path.into_inner().index_uid, settings)
.await?;
debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta))
}
pub async fn delete_index(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
data.delete_index(path.index_uid.clone()).await?;
meilisearch.delete_index(path.index_uid.clone()).await?;
Ok(HttpResponse::NoContent().finish())
}
pub async fn get_index_stats(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let response = data.get_index_stats(path.index_uid.clone()).await?;
let response = meilisearch.get_index_stats(path.index_uid.clone()).await?;
debug!("returns: {:?}", response);
Ok(HttpResponse::Ok().json(response))

View File

@ -1,13 +1,13 @@
use actix_web::{web, HttpResponse};
use log::debug;
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use meilisearch_lib::MeiliSearch;
use serde::Deserialize;
use serde_json::Value;
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use crate::routes::IndexParam;
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
@ -61,9 +61,7 @@ impl From<SearchQueryGet> for SearchQuery {
None => None,
};
let sort = other
.sort
.map(|attrs| attrs.split(',').map(String::from).collect());
let sort = other.sort.map(|attr| fix_sort_query_parameters(&attr));
Self {
q: other.q,
@ -81,14 +79,42 @@ impl From<SearchQueryGet> for SearchQuery {
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
let mut sort_parameters = Vec::new();
let mut merge = false;
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
if current_sort.starts_with("_geoPoint(") {
sort_parameters.push(current_sort.to_string());
merge = true;
} else if merge && !sort_parameters.is_empty() {
sort_parameters
.last_mut()
.unwrap()
.push_str(&format!(",{}", current_sort));
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
merge = false;
}
} else {
sort_parameters.push(current_sort.to_string());
merge = false;
}
}
sort_parameters
}
pub async fn search_with_url_query(
data: GuardedData<Public, Data>,
meilisearch: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Query<SearchQueryGet>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let query = params.into_inner().into();
let search_result = data.search(path.into_inner().index_uid, query).await?;
let search_result = meilisearch
.search(path.into_inner().index_uid, query)
.await?;
// Tests that the nb_hits is always set to false
#[cfg(test)]
@ -99,12 +125,12 @@ pub async fn search_with_url_query(
}
pub async fn search_with_post(
data: GuardedData<Public, Data>,
meilisearch: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>,
params: web::Json<SearchQuery>,
) -> Result<HttpResponse, ResponseError> {
debug!("search called with params: {:?}", params);
let search_result = data
let search_result = meilisearch
.search(path.into_inner().index_uid, params.into_inner())
.await?;
@ -115,3 +141,42 @@ pub async fn search_with_post(
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_fix_sort_query_parameters() {
let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc");
assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]);
let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc");
assert_eq!(
sort,
vec![
"doggo:asc".to_string(),
"_geoPoint(12.45,13.56):desc".to_string(),
]
);
let sort = fix_sort_query_parameters(
"doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc",
);
assert_eq!(
sort,
vec![
"doggo:asc".to_string(),
"_geoPoint(12.45,13.56,2590352):desc".to_string(),
"catto:desc".to_string(),
]
);
let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc");
// This is ugly but eh, I don't want to write a full parser just for this unused route
assert_eq!(
sort,
vec![
"doggo:asc".to_string(),
"_geoPoint(1,2),catto:desc".to_string(),
]
);
}
}

View File

@ -1,10 +1,12 @@
use actix_web::{web, HttpResponse};
use log::debug;
use actix_web::{web, HttpResponse};
use meilisearch_lib::index::{Settings, Unchecked};
use meilisearch_lib::index_controller::Update;
use meilisearch_lib::MeiliSearch;
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::Settings;
use crate::Data;
use crate::{error::ResponseError, index::Unchecked};
#[macro_export]
macro_rules! make_setting_route {
@ -13,29 +15,28 @@ macro_rules! make_setting_route {
use log::debug;
use actix_web::{web, HttpResponse, Resource};
use milli::update::Setting;
use meilisearch_lib::milli::update::Setting;
use meilisearch_lib::{MeiliSearch, index::Settings, index_controller::Update};
use crate::data;
use crate::error::ResponseError;
use crate::index::Settings;
use crate::extractors::authentication::{GuardedData, policies::*};
pub async fn delete(
data: GuardedData<Private, data::Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
use crate::index::Settings;
let settings = Settings {
$attr: Setting::Reset,
..Default::default()
};
let update_status = data.update_settings(index_uid.into_inner(), settings, false).await?;
let update = Update::Settings(settings);
let update_status = meilisearch.register_update(index_uid.into_inner(), update, false).await?;
debug!("returns: {:?}", update_status);
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
pub async fn update(
data: GuardedData<Private, data::Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
index_uid: actix_web::web::Path<String>,
body: actix_web::web::Json<Option<$type>>,
) -> std::result::Result<HttpResponse, ResponseError> {
@ -47,16 +48,17 @@ macro_rules! make_setting_route {
..Default::default()
};
let update_status = data.update_settings(index_uid.into_inner(), settings, true).await?;
let update = Update::Settings(settings);
let update_status = meilisearch.register_update(index_uid.into_inner(), update, true).await?;
debug!("returns: {:?}", update_status);
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
pub async fn get(
data: GuardedData<Private, data::Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
let settings = data.settings(index_uid.into_inner()).await?;
let settings = meilisearch.settings(index_uid.into_inner()).await?;
debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take();
@ -149,13 +151,15 @@ generate_configure!(
);
pub async fn update_all(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
index_uid: web::Path<String>,
body: web::Json<Settings<Unchecked>>,
) -> Result<HttpResponse, ResponseError> {
let settings = body.into_inner().check();
let update_result = data
.update_settings(index_uid.into_inner(), settings, true)
let settings = body.into_inner();
let update = Update::Settings(settings);
let update_result = meilisearch
.register_update(index_uid.into_inner(), update, true)
.await?;
let json = serde_json::json!({ "updateId": update_result.id() });
debug!("returns: {:?}", json);
@ -163,7 +167,7 @@ pub async fn update_all(
}
pub async fn get_all(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let settings = data.settings(index_uid.into_inner()).await?;
@ -172,12 +176,14 @@ pub async fn get_all(
}
pub async fn delete_all(
data: GuardedData<Private, Data>,
data: GuardedData<Private, MeiliSearch>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let settings = Settings::cleared();
let update = Update::Settings(settings.into_unchecked());
let update_result = data
.update_settings(index_uid.into_inner(), settings, false)
.register_update(index_uid.into_inner(), update, false)
.await?;
let json = serde_json::json!({ "updateId": update_result.id() });
debug!("returns: {:?}", json);

View File

@ -1,12 +1,12 @@
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::{IndexParam, UpdateStatusResponse};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(get_all_updates_status)))
@ -37,12 +37,12 @@ pub struct UpdateParam {
}
pub async fn get_update_status(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<UpdateParam>,
) -> Result<HttpResponse, ResponseError> {
let params = path.into_inner();
let meta = data
.get_update_status(params.index_uid, params.update_id)
let meta = meilisearch
.update_status(params.index_uid, params.update_id)
.await?;
let meta = UpdateStatusResponse::from(meta);
debug!("returns: {:?}", meta);
@ -50,10 +50,12 @@ pub async fn get_update_status(
}
pub async fn get_all_updates_status(
data: GuardedData<Private, Data>,
meilisearch: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let metas = data.get_updates_status(path.into_inner().index_uid).await?;
let metas = meilisearch
.all_update_status(path.into_inner().index_uid)
.await?;
let metas = metas
.into_iter()
.map(UpdateStatusResponse::from)

View File

@ -3,13 +3,15 @@ use std::time::Duration;
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use log::debug;
use meilisearch_lib::index_controller::updates::status::{UpdateResult, UpdateStatus};
use serde::{Deserialize, Serialize};
use meilisearch_lib::index::{Settings, Unchecked};
use meilisearch_lib::{MeiliSearch, Update};
use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{Settings, Unchecked};
use crate::index_controller::{UpdateMeta, UpdateResult, UpdateStatus};
use crate::Data;
use crate::ApiKeys;
mod dump;
mod indexes;
@ -48,9 +50,9 @@ pub enum UpdateType {
impl From<&UpdateStatus> for UpdateType {
fn from(other: &UpdateStatus) -> Self {
use milli::update::IndexDocumentsMethod::*;
use meilisearch_lib::milli::update::IndexDocumentsMethod::*;
match other.meta() {
UpdateMeta::DocumentsAddition { method, .. } => {
Update::DocumentAddition { method, .. } => {
let number = match other {
UpdateStatus::Processed(processed) => match processed.success {
UpdateResult::DocumentsAddition(ref addition) => {
@ -67,13 +69,13 @@ impl From<&UpdateStatus> for UpdateType {
_ => unreachable!(),
}
}
UpdateMeta::ClearDocuments => UpdateType::ClearAll,
UpdateMeta::DeleteDocuments { ids } => UpdateType::DocumentsDeletion {
number: Some(ids.len()),
},
UpdateMeta::Settings(settings) => UpdateType::Settings {
Update::Settings(settings) => UpdateType::Settings {
settings: settings.clone(),
},
Update::ClearDocuments => UpdateType::ClearAll,
Update::DeleteDocuments(ids) => UpdateType::DocumentsDeletion {
number: Some(ids.len()),
},
}
}
}
@ -186,15 +188,17 @@ impl From<UpdateStatus> for UpdateStatusResponse {
let duration = Duration::from_millis(duration as u64).as_secs_f64();
let update_id = failed.id();
let response = failed.error;
let processed_at = failed.failed_at;
let enqueued_at = failed.from.from.enqueued_at;
let response = failed.into();
let content = FailedUpdateResult {
update_id,
update_type,
response,
duration,
enqueued_at: failed.from.from.enqueued_at,
processed_at: failed.failed_at,
enqueued_at,
processed_at,
};
UpdateStatusResponse::Failed { content }
}
@ -229,8 +233,10 @@ pub async fn running() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
}
async fn get_stats(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> {
let response = data.get_all_stats().await?;
async fn get_stats(
meilisearch: GuardedData<Private, MeiliSearch>,
) -> Result<HttpResponse, ResponseError> {
let response = meilisearch.get_all_stats().await?;
debug!("returns: {:?}", response);
Ok(HttpResponse::Ok().json(response))
@ -244,7 +250,7 @@ struct VersionResponse {
pkg_version: String,
}
async fn get_version(_data: GuardedData<Private, Data>) -> HttpResponse {
async fn get_version(_meilisearch: GuardedData<Private, MeiliSearch>) -> HttpResponse {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
@ -261,8 +267,8 @@ struct KeysResponse {
public: Option<String>,
}
pub async fn list_keys(data: GuardedData<Admin, Data>) -> HttpResponse {
let api_keys = data.api_keys.clone();
pub async fn list_keys(meilisearch: GuardedData<Admin, ApiKeys>) -> HttpResponse {
let api_keys = (*meilisearch).clone();
HttpResponse::Ok().json(&KeysResponse {
private: api_keys.private,
public: api_keys.public,
@ -276,17 +282,16 @@ pub async fn get_health() -> Result<HttpResponse, ResponseError> {
#[cfg(test)]
mod test {
use super::*;
use crate::data::Data;
use crate::extractors::authentication::GuardedData;
/// A type implemented for a route that uses a authentication policy `Policy`.
///
/// This trait is used for regression testing of route authenticaton policies.
trait Is<Policy, T> {}
trait Is<Policy, Data, T> {}
macro_rules! impl_is_policy {
($($param:ident)*) => {
impl<Policy, Func, $($param,)* Res> Is<Policy, (($($param,)*), Res)> for Func
impl<Policy, Func, Data, $($param,)* Res> Is<Policy, Data, (($($param,)*), Res)> for Func
where Func: Fn(GuardedData<Policy, Data>, $($param,)*) -> Res {}
};
@ -306,7 +311,7 @@ mod test {
($($policy:ident => { $($route:expr,)*})*) => {
#[test]
fn test_auth() {
$($(let _: &dyn Is<$policy, _> = &$route;)*)*
$($(let _: &dyn Is<$policy, _, _> = &$route;)*)*
}
};
}
@ -356,8 +361,10 @@ mod test {
indexes::documents::clear_all_documents,
indexes::documents::delete_documents,
indexes::documents::update_documents,
indexes::documents::add_documents,
indexes::documents::update_documents_json,
indexes::documents::update_documents_csv,
indexes::documents::add_documents_json,
indexes::documents::add_documents_csv,
indexes::documents::delete_document,
indexes::updates::get_all_updates_status,

View File

@ -2,12 +2,14 @@ use std::path::Path;
use actix_web::http::StatusCode;
use byte_unit::{Byte, ByteUnit};
use meilisearch_http::setup_meilisearch;
use meilisearch_lib::options::{IndexerOpts, MaxMemory};
use once_cell::sync::Lazy;
use serde_json::Value;
use tempdir::TempDir;
use tempfile::TempDir;
use urlencoding::encode;
use meilisearch_http::data::Data;
use meilisearch_http::option::{IndexerOpts, MaxMemory, Opt};
use meilisearch_http::option::Opt;
use super::index::Index;
use super::service::Service;
@ -15,17 +17,28 @@ use super::service::Service;
pub struct Server {
pub service: Service,
// hold ownership to the tempdir while we use the server instance.
_dir: Option<tempdir::TempDir>,
_dir: Option<TempDir>,
}
static TEST_TEMP_DIR: Lazy<TempDir> = Lazy::new(|| TempDir::new().unwrap());
impl Server {
pub async fn new() -> Self {
let dir = TempDir::new("meilisearch").unwrap();
let dir = TempDir::new().unwrap();
let opt = default_settings(dir.path());
if cfg!(windows) {
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
let data = Data::new(opt).unwrap();
let service = Service(data);
let options = default_settings(dir.path());
let meilisearch = setup_meilisearch(&options).unwrap();
let service = Service {
meilisearch,
options,
};
Server {
service,
@ -33,9 +46,12 @@ impl Server {
}
}
pub async fn new_with_options(opt: Opt) -> Self {
let data = Data::new(opt).unwrap();
let service = Service(data);
pub async fn new_with_options(options: Opt) -> Self {
let meilisearch = setup_meilisearch(&options).unwrap();
let service = Service {
meilisearch,
options,
};
Server {
service,

View File

@ -1,14 +1,17 @@
use actix_web::{http::StatusCode, test};
use meilisearch_lib::MeiliSearch;
use serde_json::Value;
use meilisearch_http::create_app;
use meilisearch_http::data::Data;
use meilisearch_http::{create_app, Opt};
pub struct Service(pub Data);
pub struct Service {
pub meilisearch: MeiliSearch,
pub options: Opt,
}
impl Service {
pub async fn post(&self, url: impl AsRef<str>, body: Value) -> (Value, StatusCode) {
let app = test::init_service(create_app!(&self.0, true)).await;
let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await;
let req = test::TestRequest::post()
.uri(url.as_ref())
@ -28,7 +31,7 @@ impl Service {
url: impl AsRef<str>,
body: impl AsRef<str>,
) -> (Value, StatusCode) {
let app = test::init_service(create_app!(&self.0, true)).await;
let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await;
let req = test::TestRequest::post()
.uri(url.as_ref())
@ -44,7 +47,7 @@ impl Service {
}
pub async fn get(&self, url: impl AsRef<str>) -> (Value, StatusCode) {
let app = test::init_service(create_app!(&self.0, true)).await;
let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await;
let req = test::TestRequest::get().uri(url.as_ref()).to_request();
let res = test::call_service(&app, req).await;
@ -56,7 +59,7 @@ impl Service {
}
pub async fn put(&self, url: impl AsRef<str>, body: Value) -> (Value, StatusCode) {
let app = test::init_service(create_app!(&self.0, true)).await;
let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await;
let req = test::TestRequest::put()
.uri(url.as_ref())
@ -71,7 +74,7 @@ impl Service {
}
pub async fn delete(&self, url: impl AsRef<str>) -> (Value, StatusCode) {
let app = test::init_service(create_app!(&self.0, true)).await;
let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await;
let req = test::TestRequest::delete().uri(url.as_ref()).to_request();
let res = test::call_service(&app, req).await;

View File

@ -16,7 +16,12 @@ async fn add_documents_test_json_content_types() {
// this is a what is expected and should work
let server = Server::new().await;
let app = test::init_service(create_app!(&server.service.0, true)).await;
let app = test::init_service(create_app!(
&server.service.meilisearch,
true,
&server.service.options
))
.await;
let req = test::TestRequest::post()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
@ -41,7 +46,12 @@ async fn add_documents_test_no_content_types() {
]);
let server = Server::new().await;
let app = test::init_service(create_app!(&server.service.0, true)).await;
let app = test::init_service(create_app!(
&server.service.meilisearch,
true,
&server.service.options
))
.await;
let req = test::TestRequest::post()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
@ -67,7 +77,12 @@ async fn add_documents_test_bad_content_types() {
]);
let server = Server::new().await;
let app = test::init_service(create_app!(&server.service.0, true)).await;
let app = test::init_service(create_app!(
&server.service.meilisearch,
true,
&server.service.options
))
.await;
let req = test::TestRequest::post()
.uri("/indexes/dog/documents")
.set_payload(document.to_string())
@ -137,8 +152,8 @@ async fn document_add_create_index_bad_uid() {
async fn document_update_create_index_bad_uid() {
let server = Server::new().await;
let index = server.index("883 fj!");
let (_response, code) = index.update_documents(json!([]), None).await;
assert_eq!(code, 400);
let (response, code) = index.update_documents(json!([]), None).await;
assert_eq!(code, 400, "{}", response);
}
#[actix_rt::test]

View File

@ -30,8 +30,8 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
#[actix_rt::test]
async fn get_settings_unexisting_index() {
let server = Server::new().await;
let (_response, code) = server.index("test").settings().await;
assert_eq!(code, 404)
let (response, code) = server.index("test").settings().await;
assert_eq!(code, 404, "{}", response)
}
#[actix_rt::test]
@ -167,8 +167,8 @@ async fn update_setting_unexisting_index() {
async fn update_setting_unexisting_index_invalid_uid() {
let server = Server::new().await;
let index = server.index("test##! ");
let (_response, code) = index.update_settings(json!({})).await;
assert_eq!(code, 400);
let (response, code) = index.update_settings(json!({})).await;
assert_eq!(code, 400, "{}", response);
}
macro_rules! test_setting_routes {

View File

@ -9,8 +9,8 @@ use meilisearch_http::Opt;
#[actix_rt::test]
async fn perform_snapshot() {
let temp = tempfile::tempdir_in(".").unwrap();
let snapshot_dir = tempfile::tempdir_in(".").unwrap();
let temp = tempfile::tempdir().unwrap();
let snapshot_dir = tempfile::tempdir().unwrap();
let options = Opt {
snapshot_dir: snapshot_dir.path().to_owned(),
@ -29,7 +29,7 @@ async fn perform_snapshot() {
sleep(Duration::from_secs(2)).await;
let temp = tempfile::tempdir_in(".").unwrap();
let temp = tempfile::tempdir().unwrap();
let snapshot_path = snapshot_dir
.path()

View File

@ -0,0 +1,63 @@
[package]
name = "meilisearch-lib"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
actix-web = { version = "4.0.0-beta.9", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
anyhow = { version = "1.0.43", features = ["backtrace"] }
async-stream = "0.3.2"
async-trait = "0.1.51"
arc-swap = "1.3.2"
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
bytes = "1.1.0"
chrono = { version = "0.4.19", features = ["serde"] }
csv = "1.1.6"
crossbeam-channel = "0.5.1"
either = "1.6.1"
flate2 = "1.0.21"
fst = "0.4.7"
futures = "0.3.17"
futures-util = "0.3.17"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.4"
indexmap = { version = "1.7.0", features = ["serde-1"] }
itertools = "0.10.1"
lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.17.0"}
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.8.0"
parking_lot = "0.11.2"
rand = "0.8.4"
rayon = "1.5.1"
regex = "1.5.4"
rustls = "0.19.1"
serde = { version = "1.0.130", features = ["derive"] }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
siphasher = "0.3.7"
slice-group-by = "0.2.6"
structopt = "0.3.23"
tar = "0.4.37"
tempfile = "3.2.0"
thiserror = "1.0.28"
tokio = { version = "1.11.0", features = ["full"] }
uuid = { version = "0.8.2", features = ["serde"] }
walkdir = "2.3.2"
obkv = "0.2.0"
pin-project = "1.0.8"
whoami = { version = "1.1.3", optional = true }
reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true }
sysinfo = "0.20.2"
derivative = "2.2.0"
[dev-dependencies]
actix-rt = "2.2.0"
paste = "1.0.5"

View File

@ -0,0 +1,364 @@
use std::fmt;
use std::io::{self, Read, Result as IoResult, Seek, Write};
use csv::{Reader as CsvReader, StringRecordsIntoIter};
use milli::documents::DocumentBatchBuilder;
use serde_json::{Deserializer, Map, Value};
type Result<T> = std::result::Result<T, DocumentFormatError>;
#[derive(Debug)]
pub enum PayloadType {
Ndjson,
Json,
Csv,
}
impl fmt::Display for PayloadType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
PayloadType::Ndjson => write!(f, "ndjson"),
PayloadType::Json => write!(f, "json"),
PayloadType::Csv => write!(f, "csv"),
}
}
}
#[derive(thiserror::Error, Debug)]
pub enum DocumentFormatError {
#[error("Internal error: {0}")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}. The {1} payload provided is malformed.")]
MalformedPayload(
Box<dyn std::error::Error + Send + Sync + 'static>,
PayloadType,
),
}
internal_error!(DocumentFormatError: milli::documents::Error, io::Error);
macro_rules! malformed {
($type:path, $e:expr) => {
$e.map_err(|e| DocumentFormatError::MalformedPayload(Box::new(e), $type))
};
}
pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<()> {
let mut builder = DocumentBatchBuilder::new(writer).unwrap();
let iter = CsvDocumentIter::from_reader(input)?;
for doc in iter {
let doc = doc?;
builder.add_documents(doc).unwrap();
}
builder.finish().unwrap();
Ok(())
}
/// read jsonl from input and write an obkv batch to writer.
pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<()> {
let mut builder = DocumentBatchBuilder::new(writer)?;
let stream = Deserializer::from_reader(input).into_iter::<Map<String, Value>>();
for value in stream {
let value = malformed!(PayloadType::Ndjson, value)?;
builder.add_documents(&value)?;
}
builder.finish()?;
Ok(())
}
/// read json from input and write an obkv batch to writer.
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> {
let mut builder = DocumentBatchBuilder::new(writer).unwrap();
let documents: Vec<Map<String, Value>> =
malformed!(PayloadType::Json, serde_json::from_reader(input))?;
builder.add_documents(documents).unwrap();
builder.finish().unwrap();
Ok(())
}
enum AllowedType {
String,
Number,
}
fn parse_csv_header(header: &str) -> (String, AllowedType) {
// if there are several separators we only split on the last one.
match header.rsplit_once(':') {
Some((field_name, field_type)) => match field_type {
"string" => (field_name.to_string(), AllowedType::String),
"number" => (field_name.to_string(), AllowedType::Number),
// if the pattern isn't reconized, we keep the whole field.
_otherwise => (header.to_string(), AllowedType::String),
},
None => (header.to_string(), AllowedType::String),
}
}
pub struct CsvDocumentIter<R>
where
R: Read,
{
documents: StringRecordsIntoIter<R>,
headers: Vec<(String, AllowedType)>,
}
impl<R: Read> CsvDocumentIter<R> {
pub fn from_reader(reader: R) -> IoResult<Self> {
let mut records = CsvReader::from_reader(reader);
let headers = records
.headers()?
.into_iter()
.map(parse_csv_header)
.collect();
Ok(Self {
documents: records.into_records(),
headers,
})
}
}
impl<R: Read> Iterator for CsvDocumentIter<R> {
type Item = Result<Map<String, Value>>;
fn next(&mut self) -> Option<Self::Item> {
let csv_document = self.documents.next()?;
match csv_document {
Ok(csv_document) => {
let mut document = Map::new();
for ((field_name, field_type), value) in
self.headers.iter().zip(csv_document.into_iter())
{
let parsed_value = match field_type {
AllowedType::Number => {
malformed!(PayloadType::Csv, value.parse::<f64>().map(Value::from))
}
AllowedType::String => Ok(Value::String(value.to_string())),
};
match parsed_value {
Ok(value) => drop(document.insert(field_name.to_string(), value)),
Err(e) => return Some(Err(e)),
}
}
Some(Ok(document))
}
Err(e) => Some(Err(DocumentFormatError::MalformedPayload(
Box::new(e),
PayloadType::Csv,
))),
}
}
}
#[cfg(test)]
mod test {
use serde_json::json;
use super::*;
#[test]
fn simple_csv_document() {
let documents = r#"city,country,pop
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city": "Boston",
"country": "United States",
"pop": "4628910",
})
);
}
#[test]
fn coma_in_field() {
let documents = r#"city,country,pop
"Boston","United, States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city": "Boston",
"country": "United, States",
"pop": "4628910",
})
);
}
#[test]
fn quote_in_field() {
let documents = r#"city,country,pop
"Boston","United"" States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city": "Boston",
"country": "United\" States",
"pop": "4628910",
})
);
}
#[test]
fn integer_in_field() {
let documents = r#"city,country,pop:number
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city": "Boston",
"country": "United States",
"pop": 4628910.0,
})
);
}
#[test]
fn float_in_field() {
let documents = r#"city,country,pop:number
"Boston","United States","4628910.01""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city": "Boston",
"country": "United States",
"pop": 4628910.01,
})
);
}
#[test]
fn several_colon_in_header() {
let documents = r#"city:love:string,country:state,pop
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city:love": "Boston",
"country:state": "United States",
"pop": "4628910",
})
);
}
#[test]
fn ending_by_colon_in_header() {
let documents = r#"city:,country,pop
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city:": "Boston",
"country": "United States",
"pop": "4628910",
})
);
}
#[test]
fn starting_by_colon_in_header() {
let documents = r#":city,country,pop
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
":city": "Boston",
"country": "United States",
"pop": "4628910",
})
);
}
#[ignore]
#[test]
fn starting_by_colon_in_header2() {
let documents = r#":string,country,pop
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert!(dbg!(csv_iter.next().unwrap()).is_err());
}
#[test]
fn double_colon_in_header() {
let documents = r#"city::string,country,pop
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert_eq!(
Value::Object(csv_iter.next().unwrap().unwrap()),
json!({
"city:": "Boston",
"country": "United States",
"pop": "4628910",
})
);
}
#[test]
fn bad_type_in_header() {
let documents = r#"city,country:number,pop
"Boston","United States","4628910""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert!(csv_iter.next().unwrap().is_err());
}
#[test]
fn bad_column_count1() {
let documents = r#"city,country,pop
"Boston","United States","4628910", "too much""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert!(csv_iter.next().unwrap().is_err());
}
#[test]
fn bad_column_count2() {
let documents = r#"city,country,pop
"Boston","United States""#;
let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap();
assert!(csv_iter.next().unwrap().is_err());
}
}

View File

@ -0,0 +1,62 @@
use std::error::Error;
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use milli::UserError;
macro_rules! internal_error {
($target:ty : $($other:path), *) => {
$(
impl From<$other> for $target {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);
impl Error for MilliError<'_> {}
impl fmt::Display for MilliError<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl ErrorCode for MilliError<'_> {
fn error_code(&self) -> Code {
match self.0 {
milli::Error::InternalError(_) => Code::Internal,
milli::Error::IoError(_) => Code::Internal,
milli::Error::UserError(ref error) => {
match error {
// TODO: wait for spec for new error codes.
UserError::SerdeJson(_)
| UserError::MaxDatabaseSizeReached
| UserError::InvalidDocumentId { .. }
| UserError::InvalidStoreFile
| UserError::NoSpaceLeftOnDevice
| UserError::DocumentLimitReached => Code::Internal,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::Filter,
UserError::InvalidFilterAttribute(_) => Code::Filter,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent,
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
UserError::SortRankingRuleMissing => Code::Sort,
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
UserError::CriterionError(_) => Code::InvalidRankingRule,
UserError::InvalidGeoField { .. } => Code::InvalidGeoField,
UserError::SortError(_) => Code::Sort,
}
}
}
}
}

View File

@ -1,20 +1,21 @@
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, Write};
use std::io::{BufReader, Seek, SeekFrom, Write};
use std::path::Path;
use std::sync::Arc;
use anyhow::{bail, Context};
use heed::RoTxn;
use anyhow::Context;
use heed::{EnvOpenOptions, RoTxn};
use indexmap::IndexMap;
use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream};
use milli::documents::DocumentBatchReader;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::document_formats::read_ndjson;
use crate::index::update_handler::UpdateHandler;
use crate::index::updates::apply_settings_to_builder;
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
use crate::option::IndexerOpts;
use super::error::Result;
use super::{update_handler::UpdateHandler, Index, Settings, Unchecked};
use super::{Index, Settings, Unchecked};
#[derive(Serialize, Deserialize)]
struct DumpMeta {
@ -29,6 +30,11 @@ impl Index {
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
// acquire write txn make sure any ongoing write is finished before we start.
let txn = self.env.write_txn()?;
let path = path
.as_ref()
.join(format!("indexes/{}", self.uuid.to_string()));
create_dir_all(&path)?;
self.dump_documents(&txn, &path)?;
self.dump_meta(&txn, &path)?;
@ -83,7 +89,7 @@ impl Index {
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
size: usize,
indexing_options: &IndexerOpts,
update_handler: &UpdateHandler,
) -> anyhow::Result<()> {
let dir_name = src
.as_ref()
@ -112,36 +118,48 @@ impl Index {
primary_key,
} = serde_json::from_slice(&patched_meta)?;
let settings = settings.check();
let index = Self::open(&dst_dir_path, size)?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
let index = milli::Index::new(options, &dst_dir_path)?;
let mut txn = index.write_txn()?;
let handler = UpdateHandler::new(indexing_options)?;
// Apply settings first
let builder = update_handler.update_builder(0);
let mut builder = builder.settings(&mut txn, &index);
index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
if let Some(primary_key) = primary_key {
builder.set_primary_key(primary_key);
}
apply_settings_to_builder(&settings, &mut builder);
builder.execute(|_, _| ())?;
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
let reader = File::open(&document_file_path)?;
let mut reader = BufReader::new(reader);
reader.fill_buf()?;
// If the document file is empty, we don't perform the document addition, to prevent
// a primary key error to be thrown.
if !reader.buffer().is_empty() {
index.update_documents_txn(
&mut txn,
JsonStream,
IndexDocumentsMethod::UpdateDocuments,
Some(reader),
handler.update_builder(0),
primary_key.as_deref(),
)?;
let reader = BufReader::new(File::open(&document_file_path)?);
let mut tmp_doc_file = tempfile::tempfile()?;
read_ndjson(reader, &mut tmp_doc_file)?;
tmp_doc_file.seek(SeekFrom::Start(0))?;
let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?;
//If the document file is empty, we don't perform the document addition, to prevent
//a primary key error to be thrown.
if !documents_reader.is_empty() {
let builder = update_handler
.update_builder(0)
.index_documents(&mut txn, &index);
builder.execute(documents_reader, |_, _| ())?;
}
txn.commit()?;
match Arc::try_unwrap(index.0) {
Ok(inner) => inner.prepare_for_closing().wait(),
Err(_) => bail!("Could not close index properly."),
}
index.prepare_for_closing().wait();
Ok(())
}

View File

@ -17,6 +17,8 @@ pub enum IndexError {
Facet(#[from] FacetError),
#[error("{0}")]
Milli(#[from] milli::Error),
#[error("A primary key is already present. It's impossible to update it")]
ExistingPrimaryKey,
}
internal_error!(
@ -33,6 +35,7 @@ impl ErrorCode for IndexError {
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
IndexError::Facet(e) => e.error_code(),
IndexError::Milli(e) => MilliError(e).error_code(),
IndexError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
}
}
}

View File

@ -5,18 +5,23 @@ use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::{EnvOpenOptions, RoTxn};
use milli::update::Setting;
use milli::{obkv_to_json, FieldId};
use milli::{obkv_to_json, FieldDistribution, FieldId};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use error::Result;
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Checked, Facets, Settings, Unchecked};
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
use uuid::Uuid;
use crate::helpers::EnvSizer;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::EnvSizer;
use self::error::IndexError;
use self::update_handler::UpdateHandler;
pub mod error;
pub mod update_handler;
@ -27,26 +32,99 @@ mod updates;
pub type Document = Map<String, Value>;
#[derive(Clone)]
pub struct Index(pub Arc<milli::Index>);
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
#[serde(skip)]
pub size: u64,
pub number_of_documents: u64,
/// Whether the current index is performing an update. It is initially `None` when the
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>,
pub field_distribution: FieldDistribution,
}
impl IndexMeta {
pub fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
Ok(Self {
created_at,
updated_at,
primary_key,
})
}
}
#[derive(Clone, derivative::Derivative)]
#[derivative(Debug)]
pub struct Index {
pub uuid: Uuid,
#[derivative(Debug = "ignore")]
pub inner: Arc<milli::Index>,
#[derivative(Debug = "ignore")]
update_file_store: Arc<UpdateFileStore>,
#[derivative(Debug = "ignore")]
update_handler: Arc<UpdateHandler>,
}
impl Deref for Index {
type Target = milli::Index;
fn deref(&self) -> &Self::Target {
self.0.as_ref()
self.inner.as_ref()
}
}
impl Index {
pub fn open(path: impl AsRef<Path>, size: usize) -> Result<Self> {
pub fn open(
path: impl AsRef<Path>,
size: usize,
update_file_store: Arc<UpdateFileStore>,
uuid: Uuid,
update_handler: Arc<UpdateHandler>,
) -> Result<Self> {
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
let index = milli::Index::new(options, &path)?;
Ok(Index(Arc::new(index)))
let inner = Arc::new(milli::Index::new(options, &path)?);
Ok(Index {
inner,
update_file_store,
uuid,
update_handler,
})
}
pub fn stats(&self) -> Result<IndexStats> {
let rtxn = self.read_txn()?;
Ok(IndexStats {
size: self.size(),
number_of_documents: self.number_of_documents(&rtxn)?,
is_indexing: None,
field_distribution: self.field_distribution(&rtxn)?,
})
}
pub fn meta(&self) -> Result<IndexMeta> {
IndexMeta::new(self)
}
pub fn settings(&self) -> Result<Settings<Checked>> {
let txn = self.read_txn()?;
self.settings_txn(&txn)
@ -195,4 +273,15 @@ impl Index {
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
Ok(displayed_fields_ids)
}
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid));
create_dir_all(&dst)?;
dst.push("data.mdb");
let _txn = self.write_txn()?;
self.inner
.env
.copy_to_path(dst, heed::CompactionOption::Enabled)?;
Ok(())
}
}

View File

@ -6,9 +6,10 @@ use either::Either;
use heed::RoTxn;
use indexmap::IndexMap;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, UserError};
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, SortError};
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use serde_json::{json, Value};
use crate::index::error::FacetError;
use crate::index::IndexError;
@ -110,12 +111,9 @@ impl Index {
if let Some(ref sort) = query.sort {
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(UserError::InvalidAscDescSyntax { name }) => {
return Err(IndexError::Milli(
UserError::InvalidSortName { name }.into(),
))
Err(asc_desc_error) => {
return Err(IndexError::Milli(SortError::from(asc_desc_error).into()))
}
Err(err) => return Err(IndexError::Milli(err.into())),
};
search.sort_criteria(sort);
@ -193,7 +191,7 @@ impl Index {
let documents_iter = self.documents(&rtxn, documents_ids)?;
for (_id, obkv) in documents_iter {
let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
let matches_info = query
.matches
@ -207,6 +205,10 @@ impl Index {
&formatted_options,
)?;
if let Some(sort) = query.sort.as_ref() {
insert_geo_distance(sort, &mut document);
}
let hit = SearchHit {
document,
formatted,
@ -247,6 +249,25 @@ impl Index {
}
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =
Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap();
};
if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) {
// TODO: TAMO: milli encountered an internal error, what do we want to do?
let base = [
capture_group[1].parse().unwrap(),
capture_group[2].parse().unwrap(),
];
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
}
}
}
fn compute_matches<A: AsRef<[u8]>>(
matcher: &impl Matcher,
document: &Document,
@ -662,7 +683,7 @@ fn parse_filter_array(
}
}
Ok(FilterCondition::from_array(txn, &index.0, ands)?)
Ok(FilterCondition::from_array(txn, index, ands)?)
}
#[cfg(test)]
@ -1332,4 +1353,65 @@ mod test {
r##"{"about": [MatchInfo { start: 0, length: 6 }, MatchInfo { start: 31, length: 7 }, MatchInfo { start: 191, length: 7 }, MatchInfo { start: 225, length: 7 }, MatchInfo { start: 233, length: 6 }], "color": [MatchInfo { start: 0, length: 3 }]}"##
);
}
#[test]
fn test_insert_geo_distance() {
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": 50.629973371633746,
"lng": 3.0569447399419567
},
"city": "Lille",
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let sorters =
&["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let sorters = &[
"prix:asc",
"villeneuve:desc",
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
"ubu:asc",
]
.map(|s| s.to_string());
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
// only the first geoPoint is used to compute the distance
let sorters = &[
"chien:desc",
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
"pangolin:desc",
"_geoPoint(100.0, -80.0):asc",
"chat:asc",
]
.map(|s| s.to_string());
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
// there was no _geoPoint so nothing is inserted in the document
let sorters = &["chien:asc".to_string()];
let mut document = value;
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), None);
}
}

View File

@ -1,13 +1,8 @@
use std::fs::File;
use crate::index::Index;
use milli::update::UpdateBuilder;
use milli::CompressionType;
use rayon::ThreadPool;
use crate::index_controller::UpdateMeta;
use crate::index_controller::{Failed, Processed, Processing};
use crate::option::IndexerOpts;
use crate::options::IndexerOpts;
pub struct UpdateHandler {
max_nb_chunks: Option<usize>,
@ -51,39 +46,4 @@ impl UpdateHandler {
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder
}
pub fn handle_update(
&self,
meta: Processing,
content: Option<File>,
index: Index,
) -> Result<Processed, Failed> {
use UpdateMeta::*;
let update_id = meta.id();
let update_builder = self.update_builder(update_id);
let result = match meta.meta() {
DocumentsAddition {
method,
format,
primary_key,
} => index.update_documents(
*format,
*method,
content,
update_builder,
primary_key.as_deref(),
),
ClearDocuments => index.clear_documents(update_builder),
DeleteDocuments { ids } => index.delete_documents(ids, update_builder),
Settings(settings) => index.update_settings(&settings.clone().check(), update_builder),
};
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.into())),
}
}
}

View File

@ -1,17 +1,18 @@
use std::collections::{BTreeMap, BTreeSet};
use std::io;
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use flate2::read::GzDecoder;
use log::{debug, info, trace};
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat};
use milli::documents::DocumentBatchReader;
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
use serde::{Deserialize, Serialize, Serializer};
use uuid::Uuid;
use crate::index_controller::UpdateResult;
use crate::index_controller::updates::status::{Failed, Processed, Processing, UpdateResult};
use crate::Update;
use super::error::Result;
use super::Index;
use super::error::{IndexError, Result};
use super::{Index, IndexMeta};
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,
@ -35,6 +36,9 @@ pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
pub struct Unchecked;
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
@ -160,33 +164,84 @@ pub struct Facets {
}
impl Index {
pub fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: Option<impl io::Read>,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> Result<UpdateResult> {
let mut txn = self.write_txn()?;
let result = self.update_documents_txn(
&mut txn,
format,
method,
content,
update_builder,
primary_key,
)?;
txn.commit()?;
Ok(result)
pub fn handle_update(&self, update: Processing) -> std::result::Result<Processed, Failed> {
let update_id = update.id();
let update_builder = self.update_handler.update_builder(update_id);
let result = (|| {
let mut txn = self.write_txn()?;
let result = match update.meta() {
Update::DocumentAddition {
primary_key,
content_uuid,
method,
} => self.update_documents(
&mut txn,
*method,
*content_uuid,
update_builder,
primary_key.as_deref(),
),
Update::Settings(settings) => {
let settings = settings.clone().check();
self.update_settings(&mut txn, &settings, update_builder)
}
Update::ClearDocuments => {
let builder = update_builder.clear_documents(&mut txn, self);
let _count = builder.execute()?;
Ok(UpdateResult::Other)
}
Update::DeleteDocuments(ids) => {
let mut builder = update_builder.delete_documents(&mut txn, self)?;
// We ignore unexisting document ids
ids.iter().for_each(|id| {
builder.delete_external_id(id);
});
let deleted = builder.execute()?;
Ok(UpdateResult::DocumentDeletion { deleted })
}
};
txn.commit()?;
result
})();
if let Update::DocumentAddition { content_uuid, .. } = update.from.meta() {
let _ = self.update_file_store.delete(*content_uuid);
}
match result {
Ok(result) => Ok(update.process(result)),
Err(e) => Err(update.fail(e)),
}
}
pub fn update_documents_txn<'a, 'b>(
pub fn update_primary_key(&self, primary_key: Option<String>) -> Result<IndexMeta> {
match primary_key {
Some(primary_key) => {
let mut txn = self.write_txn()?;
if self.primary_key(&txn)?.is_some() {
return Err(IndexError::ExistingPrimaryKey);
}
let mut builder = UpdateBuilder::new(0).settings(&mut txn, self);
builder.set_primary_key(primary_key);
builder.execute(|_, _| ())?;
let meta = IndexMeta::new_txn(self, &txn)?;
txn.commit()?;
Ok(meta)
}
None => {
let meta = IndexMeta::new(self)?;
Ok(meta)
}
}
}
fn update_documents<'a, 'b>(
&'a self,
txn: &mut heed::RwTxn<'a, 'b>,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: Option<impl io::Read>,
content_uuid: Uuid,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> Result<UpdateResult> {
@ -199,40 +254,22 @@ impl Index {
builder.execute(|_, _| ())?;
}
let mut builder = update_builder.index_documents(txn, self);
builder.update_format(format);
builder.index_documents_method(method);
let indexing_callback =
|indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step);
let gzipped = false;
let addition = match content {
Some(content) if gzipped => {
builder.execute(GzDecoder::new(content), indexing_callback)?
}
Some(content) => builder.execute(content, indexing_callback)?,
None => builder.execute(std::io::empty(), indexing_callback)?,
};
let content_file = self.update_file_store.get_update(content_uuid).unwrap();
let reader = DocumentBatchReader::from_reader(content_file).unwrap();
let mut builder = update_builder.index_documents(txn, self);
builder.index_documents_method(method);
let addition = builder.execute(reader, indexing_callback)?;
info!("document addition done: {:?}", addition);
Ok(UpdateResult::DocumentsAddition(addition))
}
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, self);
let _count = builder.execute()?;
wtxn.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into)
}
pub fn update_settings_txn<'a, 'b>(
fn update_settings<'a, 'b>(
&'a self,
txn: &mut heed::RwTxn<'a, 'b>,
settings: &Settings<Checked>,
@ -241,59 +278,7 @@ impl Index {
// We must use the write transaction of the update here.
let mut builder = update_builder.settings(txn, self);
match settings.searchable_attributes {
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
Setting::Reset => builder.reset_searchable_fields(),
Setting::NotSet => (),
}
match settings.displayed_attributes {
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
Setting::Reset => builder.reset_displayed_fields(),
Setting::NotSet => (),
}
match settings.filterable_attributes {
Setting::Set(ref facets) => {
builder.set_filterable_fields(facets.clone().into_iter().collect())
}
Setting::Reset => builder.reset_filterable_fields(),
Setting::NotSet => (),
}
match settings.sortable_attributes {
Setting::Set(ref fields) => {
builder.set_sortable_fields(fields.iter().cloned().collect())
}
Setting::Reset => builder.reset_sortable_fields(),
Setting::NotSet => (),
}
match settings.ranking_rules {
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
Setting::Reset => builder.reset_criteria(),
Setting::NotSet => (),
}
match settings.stop_words {
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
Setting::Reset => builder.reset_stop_words(),
Setting::NotSet => (),
}
match settings.synonyms {
Setting::Set(ref synonyms) => {
builder.set_synonyms(synonyms.clone().into_iter().collect())
}
Setting::Reset => builder.reset_synonyms(),
Setting::NotSet => (),
}
match settings.distinct_attribute {
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
Setting::Reset => builder.reset_distinct_field(),
Setting::NotSet => (),
}
apply_settings_to_builder(settings, &mut builder);
builder.execute(|indexing_step, update_id| {
debug!("update {}: {:?}", update_id, indexing_step)
@ -301,35 +286,60 @@ impl Index {
Ok(UpdateResult::Other)
}
}
pub fn update_settings(
&self,
settings: &Settings<Checked>,
update_builder: UpdateBuilder,
) -> Result<UpdateResult> {
let mut txn = self.write_txn()?;
let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
txn.commit()?;
Ok(result)
pub fn apply_settings_to_builder(
settings: &Settings<Checked>,
builder: &mut milli::update::Settings,
) {
match settings.searchable_attributes {
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
Setting::Reset => builder.reset_searchable_fields(),
Setting::NotSet => (),
}
pub fn delete_documents(
&self,
document_ids: &[String],
update_builder: UpdateBuilder,
) -> Result<UpdateResult> {
let mut txn = self.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, self)?;
match settings.displayed_attributes {
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
Setting::Reset => builder.reset_displayed_fields(),
Setting::NotSet => (),
}
// We ignore unexisting document ids
document_ids.iter().for_each(|id| {
builder.delete_external_id(id);
});
match settings.filterable_attributes {
Setting::Set(ref facets) => {
builder.set_filterable_fields(facets.clone().into_iter().collect())
}
Setting::Reset => builder.reset_filterable_fields(),
Setting::NotSet => (),
}
let deleted = builder.execute()?;
txn.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into)
match settings.sortable_attributes {
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
Setting::Reset => builder.reset_sortable_fields(),
Setting::NotSet => (),
}
match settings.ranking_rules {
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
Setting::Reset => builder.reset_criteria(),
Setting::NotSet => (),
}
match settings.stop_words {
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
Setting::Reset => builder.reset_stop_words(),
Setting::NotSet => (),
}
match settings.synonyms {
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
Setting::Reset => builder.reset_synonyms(),
Setting::NotSet => (),
}
match settings.distinct_attribute {
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
Setting::Reset => builder.reset_distinct_field(),
Setting::NotSet => (),
}
}

View File

@ -7,19 +7,18 @@ use chrono::Utc;
use futures::{lock::Mutex, stream::StreamExt};
use log::{error, trace};
use tokio::sync::{mpsc, oneshot, RwLock};
use update_actor::UpdateActorHandle;
use uuid_resolver::UuidResolverHandle;
use super::error::{DumpActorError, Result};
use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask};
use crate::index_controller::{update_actor, uuid_resolver};
use crate::index_controller::index_resolver::HardStateIndexResolver;
use crate::index_controller::updates::UpdateSender;
pub const CONCURRENT_DUMP_MSG: usize = 10;
pub struct DumpActor<UuidResolver, Update> {
pub struct DumpActor {
inbox: Option<mpsc::Receiver<DumpMsg>>,
uuid_resolver: UuidResolver,
update: Update,
index_resolver: Arc<HardStateIndexResolver>,
update: UpdateSender,
dump_path: PathBuf,
lock: Arc<Mutex<()>>,
dump_infos: Arc<RwLock<HashMap<String, DumpInfo>>>,
@ -32,15 +31,11 @@ fn generate_uid() -> String {
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
}
impl<UuidResolver, Update> DumpActor<UuidResolver, Update>
where
UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static,
Update: UpdateActorHandle + Send + Sync + Clone + 'static,
{
impl DumpActor {
pub fn new(
inbox: mpsc::Receiver<DumpMsg>,
uuid_resolver: UuidResolver,
update: Update,
index_resolver: Arc<HardStateIndexResolver>,
update: UpdateSender,
dump_path: impl AsRef<Path>,
index_db_size: usize,
update_db_size: usize,
@ -49,7 +44,7 @@ where
let lock = Arc::new(Mutex::new(()));
Self {
inbox: Some(inbox),
uuid_resolver,
index_resolver,
update,
dump_path: dump_path.as_ref().into(),
dump_infos,
@ -118,7 +113,7 @@ where
let task = DumpTask {
path: self.dump_path.clone(),
uuid_resolver: self.uuid_resolver.clone(),
index_resolver: self.index_resolver.clone(),
update_handle: self.update.clone(),
uid: uid.clone(),
update_db_size: self.update_db_size,

View File

@ -1,7 +1,7 @@
use meilisearch_error::{Code, ErrorCode};
use crate::index_controller::update_actor::error::UpdateActorError;
use crate::index_controller::uuid_resolver::error::UuidResolverError;
use crate::index_controller::index_resolver::error::IndexResolverError;
use crate::index_controller::updates::error::UpdateLoopError;
pub type Result<T> = std::result::Result<T, DumpActorError>;
@ -14,9 +14,9 @@ pub enum DumpActorError {
#[error("Internal error: {0}")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
UuidResolver(#[from] UuidResolverError),
IndexResolver(#[from] IndexResolverError),
#[error("{0}")]
UpdateActor(#[from] UpdateActorError),
UpdateLoop(#[from] UpdateLoopError),
}
macro_rules! internal_error {
@ -45,8 +45,8 @@ impl ErrorCode for DumpActorError {
DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress,
DumpActorError::DumpDoesNotExist(_) => Code::NotFound,
DumpActorError::Internal(_) => Code::Internal,
DumpActorError::UuidResolver(e) => e.error_code(),
DumpActorError::UpdateActor(e) => e.error_code(),
DumpActorError::IndexResolver(e) => e.error_code(),
DumpActorError::UpdateLoop(e) => e.error_code(),
}
}
}

View File

@ -1,8 +1,10 @@
use std::path::Path;
use std::sync::Arc;
use actix_web::web::Bytes;
use tokio::sync::{mpsc, oneshot};
use crate::index_controller::index_resolver::HardStateIndexResolver;
use super::error::Result;
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg};
@ -31,15 +33,15 @@ impl DumpActorHandle for DumpActorHandleImpl {
impl DumpActorHandleImpl {
pub fn new(
path: impl AsRef<Path>,
uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl,
update: crate::index_controller::update_actor::UpdateActorHandleImpl<Bytes>,
index_resolver: Arc<HardStateIndexResolver>,
update: crate::index_controller::updates::UpdateSender,
index_db_size: usize,
update_db_size: usize,
) -> anyhow::Result<Self> {
let (sender, receiver) = mpsc::channel(10);
let actor = DumpActor::new(
receiver,
uuid_resolver,
index_resolver,
update,
path,
index_db_size,

View File

@ -1,22 +1,22 @@
use std::collections::{BTreeMap, BTreeSet};
use std::fs::{create_dir_all, File};
use std::io::BufRead;
use std::io::{BufReader, Seek, SeekFrom};
use std::marker::PhantomData;
use std::path::Path;
use std::sync::Arc;
use heed::EnvOpenOptions;
use log::{error, info, warn};
use milli::update::{IndexDocumentsMethod, Setting, UpdateFormat};
use milli::documents::DocumentBatchReader;
use milli::update::Setting;
use serde::{Deserialize, Deserializer, Serialize};
use uuid::Uuid;
use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata};
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
use crate::{
index::{update_handler::UpdateHandler, Index, Unchecked},
option::IndexerOpts,
};
use crate::document_formats::read_ndjson;
use crate::index::apply_settings_to_builder;
use crate::index::update_handler::UpdateHandler;
use crate::index_controller::index_resolver::uuid_store::HeedUuidStore;
use crate::index_controller::{self, asc_ranking_rule, desc_ranking_rule, IndexMetadata};
use crate::{index::Unchecked, options::IndexerOpts};
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
@ -99,40 +99,49 @@ fn load_index(
let mut options = EnvOpenOptions::new();
options.map_size(size);
let index = milli::Index::new(options, index_path)?;
let index = Index(Arc::new(index));
let update_handler = UpdateHandler::new(indexer_options)?;
let mut txn = index.write_txn()?;
// extract `settings.json` file and import content
let settings = import_settings(&src)?;
let settings: index_controller::Settings<Unchecked> = settings.into();
let mut txn = index.write_txn()?;
let handler = UpdateHandler::new(indexer_options)?;
index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?;
let mut builder = handler.update_builder(0).settings(&mut txn, &index);
let file = File::open(&src.as_ref().join("documents.jsonl"))?;
let mut reader = std::io::BufReader::new(file);
reader.fill_buf()?;
if !reader.buffer().is_empty() {
index.update_documents_txn(
&mut txn,
UpdateFormat::JsonStream,
IndexDocumentsMethod::ReplaceDocuments,
Some(reader),
handler.update_builder(0),
primary_key,
)?;
if let Some(primary_key) = primary_key {
builder.set_primary_key(primary_key.to_string());
}
apply_settings_to_builder(&settings.check(), &mut builder);
builder.execute(|_, _| ())?;
let reader = BufReader::new(File::open(&src.as_ref().join("documents.jsonl"))?);
let mut tmp_doc_file = tempfile::tempfile()?;
read_ndjson(reader, &mut tmp_doc_file)?;
tmp_doc_file.seek(SeekFrom::Start(0))?;
let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?;
//If the document file is empty, we don't perform the document addition, to prevent
//a primary key error to be thrown.
if !documents_reader.is_empty() {
let builder = update_handler
.update_builder(0)
.index_documents(&mut txn, &index);
builder.execute(documents_reader, |_, _| ())?;
}
txn.commit()?;
// Finaly, we extract the original milli::Index and close it
Arc::try_unwrap(index.0)
.map_err(|_e| "Couldn't close the index properly")
.unwrap()
.prepare_for_closing()
.wait();
index.prepare_for_closing().wait();
// Updates are ignored in dumps V1.

View File

@ -4,9 +4,10 @@ use chrono::{DateTime, Utc};
use log::info;
use serde::{Deserialize, Serialize};
use crate::index::Index;
use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore};
use crate::option::IndexerOpts;
use crate::index_controller::index_resolver::IndexResolver;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::index_controller::updates::store::UpdateStore;
use crate::options::IndexerOpts;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
@ -40,19 +41,11 @@ impl MetadataV2 {
self.dump_date, self.db_version
);
info!("Loading index database.");
HeedUuidStore::load_dump(src.as_ref(), &dst)?;
info!("Loading updates.");
IndexResolver::load_dump(src.as_ref(), &dst, index_db_size, indexing_options)?;
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
UpdateStore::load_dump(&src, &dst, update_db_size)?;
info!("Loading indexes.");
let indexes_path = src.as_ref().join("indexes");
let indexes = indexes_path.read_dir()?;
for index in indexes {
let index = index?;
Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?;
}
Ok(())
}

View File

@ -1,11 +1,9 @@
use std::fs::File;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::Context;
use chrono::{DateTime, Utc};
use log::{info, trace, warn};
#[cfg(test)]
use mockall::automock;
use serde::{Deserialize, Serialize};
use tokio::fs::create_dir_all;
@ -16,9 +14,12 @@ pub use actor::DumpActor;
pub use handle_impl::*;
pub use message::DumpMsg;
use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle};
use super::index_resolver::HardStateIndexResolver;
use super::updates::UpdateSender;
use crate::compression::{from_tar_gz, to_tar_gz};
use crate::index_controller::dump_actor::error::DumpActorError;
use crate::{helpers::compression, option::IndexerOpts};
use crate::index_controller::updates::UpdateMsg;
use crate::options::IndexerOpts;
use error::Result;
mod actor;
@ -30,7 +31,6 @@ mod message;
const META_FILE_NAME: &str = "metadata.json";
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait DumpActorHandle {
/// Start the creation of a dump
/// Implementation: [handle_impl::DumpActorHandleImpl::create_dump]
@ -109,21 +109,16 @@ pub fn load_dump(
update_db_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<()> {
let tmp_src = tempfile::tempdir_in(".")?;
let tmp_src = tempfile::tempdir()?;
let tmp_src_path = tmp_src.path();
compression::from_tar_gz(&src_path, tmp_src_path)?;
from_tar_gz(&src_path, tmp_src_path)?;
let meta_path = tmp_src_path.join(META_FILE_NAME);
let mut meta_file = File::open(&meta_path)?;
let meta: Metadata = serde_json::from_reader(&mut meta_file)?;
let dst_dir = dst_path
.as_ref()
.parent()
.with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?;
let tmp_dst = tempfile::tempdir_in(dst_dir)?;
let tmp_dst = tempfile::tempdir()?;
match meta {
Metadata::V1(meta) => {
@ -149,28 +144,22 @@ pub fn load_dump(
Ok(())
}
struct DumpTask<U, P> {
struct DumpTask {
path: PathBuf,
uuid_resolver: U,
update_handle: P,
index_resolver: Arc<HardStateIndexResolver>,
update_handle: UpdateSender,
uid: String,
update_db_size: usize,
index_db_size: usize,
}
impl<U, P> DumpTask<U, P>
where
U: UuidResolverHandle + Send + Sync + Clone + 'static,
P: UpdateActorHandle + Send + Sync + Clone + 'static,
{
impl DumpTask {
async fn run(self) -> Result<()> {
trace!("Performing dump.");
create_dir_all(&self.path).await?;
let path_clone = self.path.clone();
let temp_dump_dir =
tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??;
let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
let temp_dump_path = temp_dump_dir.path().to_owned();
let meta = Metadata::new_v2(self.index_db_size, self.update_db_size);
@ -178,15 +167,13 @@ where
let mut meta_file = File::create(&meta_path)?;
serde_json::to_writer(&mut meta_file, &meta)?;
let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?;
let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?;
self.update_handle
.dump(uuids, temp_dump_path.clone())
.await?;
UpdateMsg::dump(&self.update_handle, uuids, temp_dump_path.clone()).await?;
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
compression::to_tar_gz(temp_dump_path, temp_dump_file.path())
let temp_dump_file = tempfile::NamedTempFile::new()?;
to_tar_gz(temp_dump_path, temp_dump_file.path())
.map_err(|e| DumpActorError::Internal(e.into()))?;
let dump_path = self.path.join(self.uid).with_extension("dump");

View File

@ -1,12 +1,14 @@
use std::error::Error;
use meilisearch_error::Code;
use meilisearch_error::ErrorCode;
use tokio::task::JoinError;
use crate::index::error::IndexError;
use super::dump_actor::error::DumpActorError;
use super::index_actor::error::IndexActorError;
use super::update_actor::error::UpdateActorError;
use super::uuid_resolver::error::UuidResolverError;
use super::index_resolver::error::IndexResolverError;
use super::updates::error::UpdateLoopError;
pub type Result<T> = std::result::Result<T, IndexControllerError>;
@ -15,26 +17,28 @@ pub enum IndexControllerError {
#[error("Index creation must have an uid")]
MissingUid,
#[error("{0}")]
Uuid(#[from] UuidResolverError),
IndexResolver(#[from] IndexResolverError),
#[error("{0}")]
IndexActor(#[from] IndexActorError),
#[error("{0}")]
UpdateActor(#[from] UpdateActorError),
UpdateLoop(#[from] UpdateLoopError),
#[error("{0}")]
DumpActor(#[from] DumpActorError),
#[error("{0}")]
IndexError(#[from] IndexError),
#[error("Internal error: {0}")]
Internal(Box<dyn Error + Send + Sync + 'static>),
}
internal_error!(IndexControllerError: JoinError);
impl ErrorCode for IndexControllerError {
fn error_code(&self) -> Code {
match self {
IndexControllerError::MissingUid => Code::BadRequest,
IndexControllerError::Uuid(e) => e.error_code(),
IndexControllerError::IndexActor(e) => e.error_code(),
IndexControllerError::UpdateActor(e) => e.error_code(),
IndexControllerError::IndexResolver(e) => e.error_code(),
IndexControllerError::UpdateLoop(e) => e.error_code(),
IndexControllerError::DumpActor(e) => e.error_code(),
IndexControllerError::IndexError(e) => e.error_code(),
IndexControllerError::Internal(_) => Code::Internal,
}
}
}

View File

@ -0,0 +1,64 @@
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use tokio::sync::mpsc::error::SendError as MpscSendError;
use tokio::sync::oneshot::error::RecvError as OneshotRecvError;
use crate::{error::MilliError, index::error::IndexError};
pub type Result<T> = std::result::Result<T, IndexResolverError>;
#[derive(thiserror::Error, Debug)]
pub enum IndexResolverError {
#[error("{0}")]
IndexError(#[from] IndexError),
#[error("Index already exists")]
IndexAlreadyExists,
#[error("Index {0} not found")]
UnexistingIndex(String),
#[error("A primary key is already present. It's impossible to update it")]
ExistingPrimaryKey,
#[error("Internal Error: {0}")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
Milli(#[from] milli::Error),
#[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")]
BadlyFormatted(String),
}
impl<T> From<MpscSendError<T>> for IndexResolverError
where
T: Send + Sync + 'static + fmt::Debug,
{
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<OneshotRecvError> for IndexResolverError {
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
Self::Internal(Box::new(other))
}
}
internal_error!(
IndexResolverError: heed::Error,
uuid::Error,
std::io::Error,
tokio::task::JoinError,
serde_json::Error
);
impl ErrorCode for IndexResolverError {
fn error_code(&self) -> Code {
match self {
IndexResolverError::IndexError(e) => e.error_code(),
IndexResolverError::IndexAlreadyExists => Code::IndexAlreadyExists,
IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound,
IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
IndexResolverError::Internal(_) => Code::Internal,
IndexResolverError::Milli(e) => MilliError(e).error_code(),
IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid,
}
}
}

View File

@ -8,8 +8,11 @@ use tokio::sync::RwLock;
use tokio::task::spawn_blocking;
use uuid::Uuid;
use super::error::{IndexActorError, Result};
use super::error::{IndexResolverError, Result};
use crate::index::update_handler::UpdateHandler;
use crate::index::Index;
use crate::index_controller::update_file_store::UpdateFileStore;
use crate::options::IndexerOpts;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
@ -24,17 +27,27 @@ pub struct MapIndexStore {
index_store: AsyncMap<Uuid, Index>,
path: PathBuf,
index_size: usize,
update_file_store: Arc<UpdateFileStore>,
update_handler: Arc<UpdateHandler>,
}
impl MapIndexStore {
pub fn new(path: impl AsRef<Path>, index_size: usize) -> Self {
pub fn new(
path: impl AsRef<Path>,
index_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<Self> {
let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?);
let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap());
let path = path.as_ref().join("indexes/");
let index_store = Arc::new(RwLock::new(HashMap::new()));
Self {
Ok(Self {
index_store,
path,
index_size,
}
update_file_store,
update_handler,
})
}
}
@ -48,14 +61,16 @@ impl IndexStore for MapIndexStore {
if let Some(index) = lock.get(&uuid) {
return Ok(index.clone());
}
let path = self.path.join(format!("index-{}", uuid));
let path = self.path.join(format!("{}", uuid));
if path.exists() {
return Err(IndexActorError::IndexAlreadyExists);
return Err(IndexResolverError::IndexAlreadyExists);
}
let index_size = self.index_size;
let file_store = self.update_file_store.clone();
let update_handler = self.update_handler.clone();
let index = spawn_blocking(move || -> Result<Index> {
let index = Index::open(path, index_size)?;
let index = Index::open(path, index_size, file_store, uuid, update_handler)?;
if let Some(primary_key) = primary_key {
let mut txn = index.write_txn()?;
@ -81,13 +96,18 @@ impl IndexStore for MapIndexStore {
None => {
// drop the guard here so we can perform the write after without deadlocking;
drop(guard);
let path = self.path.join(format!("index-{}", uuid));
let path = self.path.join(format!("{}", uuid));
if !path.exists() {
return Ok(None);
}
let index_size = self.index_size;
let index = spawn_blocking(move || Index::open(path, index_size)).await??;
let file_store = self.update_file_store.clone();
let update_handler = self.update_handler.clone();
let index = spawn_blocking(move || {
Index::open(path, index_size, file_store, uuid, update_handler)
})
.await??;
self.index_store.write().await.insert(uuid, index.clone());
Ok(Some(index))
}
@ -95,7 +115,7 @@ impl IndexStore for MapIndexStore {
}
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
let db_path = self.path.join(format!("index-{}", uuid));
let db_path = self.path.join(format!("{}", uuid));
fs::remove_dir_all(db_path).await?;
let index = self.index_store.write().await.remove(&uuid);
Ok(index)

View File

@ -1,22 +1,22 @@
use std::collections::HashSet;
use std::path::PathBuf;
use std::{collections::HashSet, path::PathBuf};
use tokio::sync::oneshot;
use uuid::Uuid;
use super::Result;
use crate::index::Index;
use super::error::Result;
pub enum UuidResolveMsg {
pub enum IndexResolverMsg {
Get {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
ret: oneshot::Sender<Result<Index>>,
},
Delete {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
ret: oneshot::Sender<Result<Index>>,
},
List {
ret: oneshot::Sender<Result<Vec<(String, Uuid)>>>,
ret: oneshot::Sender<Result<Vec<(String, Index)>>>,
},
Insert {
uuid: Uuid,
@ -25,13 +25,13 @@ pub enum UuidResolveMsg {
},
SnapshotRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
ret: oneshot::Sender<Result<HashSet<Index>>>,
},
GetSize {
ret: oneshot::Sender<Result<u64>>,
},
DumpRequest {
path: PathBuf,
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
ret: oneshot::Sender<Result<HashSet<Index>>>,
},
}

View File

@ -0,0 +1,167 @@
pub mod error;
mod index_store;
pub mod uuid_store;
use std::path::Path;
use error::{IndexResolverError, Result};
use index_store::{IndexStore, MapIndexStore};
use uuid::Uuid;
use uuid_store::{HeedUuidStore, UuidStore};
use crate::{
index::{update_handler::UpdateHandler, Index},
options::IndexerOpts,
};
pub type HardStateIndexResolver = IndexResolver<HeedUuidStore, MapIndexStore>;
pub fn create_index_resolver(
path: impl AsRef<Path>,
index_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<HardStateIndexResolver> {
let uuid_store = HeedUuidStore::new(&path)?;
let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?;
Ok(IndexResolver::new(uuid_store, index_store))
}
pub struct IndexResolver<U, I> {
index_uuid_store: U,
index_store: I,
}
impl IndexResolver<HeedUuidStore, MapIndexStore> {
pub fn load_dump(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<()> {
HeedUuidStore::load_dump(&src, &dst)?;
let indexes_path = src.as_ref().join("indexes");
let indexes = indexes_path.read_dir()?;
let update_handler = UpdateHandler::new(indexer_opts)?;
for index in indexes {
let index = index?;
Index::load_dump(&index.path(), &dst, index_db_size, &update_handler)?;
}
Ok(())
}
}
impl<U, I> IndexResolver<U, I>
where
U: UuidStore,
I: IndexStore,
{
pub fn new(index_uuid_store: U, index_store: I) -> Self {
Self {
index_uuid_store,
index_store,
}
}
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
let uuids = self.index_uuid_store.dump(path.as_ref().to_owned()).await?;
let mut indexes = Vec::new();
for uuid in uuids {
indexes.push(self.get_index_by_uuid(uuid).await?);
}
Ok(indexes)
}
pub async fn get_uuids_size(&self) -> Result<u64> {
Ok(self.index_uuid_store.get_size().await?)
}
pub async fn snapshot(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
let uuids = self
.index_uuid_store
.snapshot(path.as_ref().to_owned())
.await?;
let mut indexes = Vec::new();
for uuid in uuids {
indexes.push(self.get_index_by_uuid(uuid).await?);
}
Ok(indexes)
}
pub async fn create_index(&self, uid: String, primary_key: Option<String>) -> Result<Index> {
if !is_index_uid_valid(&uid) {
return Err(IndexResolverError::BadlyFormatted(uid));
}
let uuid = Uuid::new_v4();
let index = self.index_store.create(uuid, primary_key).await?;
self.index_uuid_store.insert(uid, uuid).await?;
Ok(index)
}
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
let uuids = self.index_uuid_store.list().await?;
let mut indexes = Vec::new();
for (name, uuid) in uuids {
match self.index_store.get(uuid).await? {
Some(index) => indexes.push((name, index)),
None => {
// we found an unexisting index, we remove it from the uuid store
let _ = self.index_uuid_store.delete(name).await;
}
}
}
Ok(indexes)
}
pub async fn delete_index(&self, uid: String) -> Result<Uuid> {
match self.index_uuid_store.delete(uid.clone()).await? {
Some(uuid) => {
let _ = self.index_store.delete(uuid).await;
Ok(uuid)
}
None => Err(IndexResolverError::UnexistingIndex(uid)),
}
}
pub async fn get_index_by_uuid(&self, uuid: Uuid) -> Result<Index> {
// TODO: Handle this error better.
self.index_store
.get(uuid)
.await?
.ok_or_else(|| IndexResolverError::UnexistingIndex(String::new()))
}
pub async fn get_index(&self, uid: String) -> Result<Index> {
match self.index_uuid_store.get_uuid(uid).await? {
(name, Some(uuid)) => {
match self.index_store.get(uuid).await? {
Some(index) => Ok(index),
None => {
// For some reason we got a uuid to an unexisting index, we return an error,
// and remove the uuid from the uuid store.
let _ = self.index_uuid_store.delete(name.clone()).await;
Err(IndexResolverError::UnexistingIndex(name))
}
}
}
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
}
}
pub async fn get_uuid(&self, uid: String) -> Result<Uuid> {
match self.index_uuid_store.get_uuid(uid).await? {
(_, Some(uuid)) => Ok(uuid),
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
}
}
}
fn is_index_uid_valid(uid: &str) -> bool {
uid.chars()
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
}

View File

@ -8,8 +8,10 @@ use heed::{CompactionOption, Database, Env, EnvOpenOptions};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::{error::UuidResolverError, Result, UUID_STORE_SIZE};
use crate::helpers::EnvSizer;
use super::error::{IndexResolverError, Result};
use crate::EnvSizer;
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
#[derive(Serialize, Deserialize)]
struct DumpEntry {
@ -23,7 +25,7 @@ const UUIDS_DB_PATH: &str = "index_uuids";
pub trait UuidStore: Sized {
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
// the uuid otherwise.
async fn get_uuid(&self, uid: String) -> Result<Option<Uuid>>;
async fn get_uuid(&self, uid: String) -> Result<(String, Option<Uuid>)>;
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
@ -44,16 +46,17 @@ impl HeedUuidStore {
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(UUID_STORE_SIZE); // 1GB
options.max_dbs(1);
let env = options.open(path)?;
let db = env.create_database(None)?;
let db = env.create_database(Some("uuids"))?;
Ok(Self { env, db })
}
pub fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
pub fn get_uuid(&self, name: &str) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
let txn = env.read_txn()?;
match db.get(&txn, &name)? {
match db.get(&txn, name)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
Ok(Some(uuid))
@ -96,7 +99,7 @@ impl HeedUuidStore {
let mut txn = env.write_txn()?;
if db.get(&txn, &name)?.is_some() {
return Err(UuidResolverError::NameAlreadyExist);
return Err(IndexResolverError::IndexAlreadyExists);
}
db.put(&mut txn, &name, uuid.as_bytes())?;
@ -188,9 +191,9 @@ impl HeedUuidStore {
#[async_trait::async_trait]
impl UuidStore for HeedUuidStore {
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
async fn get_uuid(&self, name: String) -> Result<(String, Option<Uuid>)> {
let this = self.clone();
tokio::task::spawn_blocking(move || this.get_uuid(name)).await?
tokio::task::spawn_blocking(move || this.get_uuid(&name).map(|res| (name, res))).await?
}
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {

View File

@ -0,0 +1,504 @@
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use actix_web::error::PayloadError;
use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::Stream;
use log::info;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use tokio::task::spawn_blocking;
use tokio::time::sleep;
use uuid::Uuid;
use dump_actor::DumpActorHandle;
pub use dump_actor::{DumpInfo, DumpStatus};
use snapshot::load_snapshot;
use crate::index::error::Result as IndexResult;
use crate::index::{
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
};
use crate::index_controller::index_resolver::create_index_resolver;
use crate::index_controller::snapshot::SnapshotService;
use crate::options::IndexerOpts;
use error::Result;
use self::dump_actor::load_dump;
use self::index_resolver::error::IndexResolverError;
use self::index_resolver::HardStateIndexResolver;
use self::updates::status::UpdateStatus;
use self::updates::UpdateMsg;
mod dump_actor;
pub mod error;
mod index_resolver;
mod snapshot;
pub mod update_file_store;
pub mod updates;
pub type Payload = Box<
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
#[serde(skip)]
pub uuid: Uuid,
pub uid: String,
name: String,
#[serde(flatten)]
pub meta: IndexMeta,
}
#[derive(Clone, Debug)]
pub struct IndexSettings {
pub uid: Option<String>,
pub primary_key: Option<String>,
}
#[derive(Clone)]
pub struct IndexController {
index_resolver: Arc<HardStateIndexResolver>,
update_sender: updates::UpdateSender,
dump_handle: dump_actor::DumpActorHandleImpl,
}
#[derive(Debug)]
pub enum DocumentAdditionFormat {
Json,
Csv,
Ndjson,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
pub last_update: Option<DateTime<Utc>>,
pub indexes: BTreeMap<String, IndexStats>,
}
#[allow(clippy::large_enum_variant)]
#[derive(derivative::Derivative)]
#[derivative(Debug)]
pub enum Update {
DeleteDocuments(Vec<String>),
ClearDocuments,
Settings(Settings<Unchecked>),
DocumentAddition {
#[derivative(Debug = "ignore")]
payload: Payload,
primary_key: Option<String>,
method: IndexDocumentsMethod,
format: DocumentAdditionFormat,
},
}
#[derive(Default, Debug)]
pub struct IndexControllerBuilder {
max_index_size: Option<usize>,
max_update_store_size: Option<usize>,
snapshot_dir: Option<PathBuf>,
import_snapshot: Option<PathBuf>,
snapshot_interval: Option<Duration>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
schedule_snapshot: bool,
dump_src: Option<PathBuf>,
dump_dst: Option<PathBuf>,
}
impl IndexControllerBuilder {
pub fn build(
self,
db_path: impl AsRef<Path>,
indexer_options: IndexerOpts,
) -> anyhow::Result<IndexController> {
let index_size = self
.max_index_size
.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
let update_store_size = self
.max_index_size
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
if let Some(ref path) = self.import_snapshot {
info!("Loading from snapshot {:?}", path);
load_snapshot(
db_path.as_ref(),
path,
self.ignore_snapshot_if_db_exists,
self.ignore_missing_snapshot,
)?;
} else if let Some(ref src_path) = self.dump_src {
load_dump(
db_path.as_ref(),
src_path,
index_size,
update_store_size,
&indexer_options,
)?;
}
std::fs::create_dir_all(db_path.as_ref())?;
let index_resolver = Arc::new(create_index_resolver(
&db_path,
index_size,
&indexer_options,
)?);
#[allow(unreachable_code)]
let update_sender =
updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?;
let dump_path = self
.dump_dst
.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?;
let dump_handle = dump_actor::DumpActorHandleImpl::new(
dump_path,
index_resolver.clone(),
update_sender.clone(),
index_size,
update_store_size,
)?;
if self.schedule_snapshot {
let snapshot_service = SnapshotService::new(
index_resolver.clone(),
update_sender.clone(),
self.snapshot_interval
.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?,
self.snapshot_dir
.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?,
db_path
.as_ref()
.file_name()
.map(|n| n.to_owned().into_string().expect("invalid path"))
.unwrap_or_else(|| String::from("data.ms")),
);
tokio::task::spawn(snapshot_service.run());
}
Ok(IndexController {
index_resolver,
update_sender,
dump_handle,
})
}
/// Set the index controller builder's max update store size.
pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
self.max_update_store_size.replace(max_update_store_size);
self
}
pub fn set_max_index_size(&mut self, size: usize) -> &mut Self {
self.max_index_size.replace(size);
self
}
/// Set the index controller builder's snapshot path.
pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self {
self.snapshot_dir.replace(snapshot_dir);
self
}
/// Set the index controller builder's ignore snapshot if db exists.
pub fn set_ignore_snapshot_if_db_exists(
&mut self,
ignore_snapshot_if_db_exists: bool,
) -> &mut Self {
self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
self
}
/// Set the index controller builder's ignore missing snapshot.
pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self {
self.ignore_missing_snapshot = ignore_missing_snapshot;
self
}
/// Set the index controller builder's dump src.
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
self.dump_src.replace(dump_src);
self
}
/// Set the index controller builder's dump dst.
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
self.dump_dst.replace(dump_dst);
self
}
/// Set the index controller builder's import snapshot.
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
self.import_snapshot.replace(import_snapshot);
self
}
/// Set the index controller builder's snapshot interval sec.
pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self {
self.snapshot_interval = Some(snapshot_interval);
self
}
/// Set the index controller builder's schedule snapshot.
pub fn set_schedule_snapshot(&mut self) -> &mut Self {
self.schedule_snapshot = true;
self
}
}
impl IndexController {
pub fn builder() -> IndexControllerBuilder {
IndexControllerBuilder::default()
}
pub async fn register_update(
&self,
uid: String,
update: Update,
create_index: bool,
) -> Result<UpdateStatus> {
match self.index_resolver.get_uuid(uid).await {
Ok(uuid) => {
let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?;
Ok(update_result)
}
Err(IndexResolverError::UnexistingIndex(name)) => {
if create_index {
let index = self.index_resolver.create_index(name, None).await?;
let update_result =
UpdateMsg::update(&self.update_sender, index.uuid, update).await?;
// ignore if index creation fails now, since it may already have been created
Ok(update_result)
} else {
Err(IndexResolverError::UnexistingIndex(name).into())
}
}
Err(e) => Err(e.into()),
}
}
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
let uuid = self.index_resolver.get_uuid(uid).await?;
let result = UpdateMsg::get_update(&self.update_sender, uuid, id).await?;
Ok(result)
}
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
let uuid = self.index_resolver.get_uuid(uid).await?;
let result = UpdateMsg::list_updates(&self.update_sender, uuid).await?;
Ok(result)
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
let indexes = self.index_resolver.list().await?;
let mut ret = Vec::new();
for (uid, index) in indexes {
let meta = index.meta()?;
let meta = IndexMetadata {
uuid: index.uuid,
name: uid.clone(),
uid,
meta,
};
ret.push(meta);
}
Ok(ret)
}
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
let index = self.index_resolver.get_index(uid).await?;
let settings = spawn_blocking(move || index.settings()).await??;
Ok(settings)
}
pub async fn documents(
&self,
uid: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let index = self.index_resolver.get_index(uid).await?;
let documents =
spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve))
.await??;
Ok(documents)
}
pub async fn document(
&self,
uid: String,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let index = self.index_resolver.get_index(uid).await?;
let document =
spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve))
.await??;
Ok(document)
}
pub async fn update_index(
&self,
uid: String,
mut index_settings: IndexSettings,
) -> Result<IndexMetadata> {
index_settings.uid.take();
let index = self.index_resolver.get_index(uid.clone()).await?;
let uuid = index.uuid;
let meta =
spawn_blocking(move || index.update_primary_key(index_settings.primary_key)).await??;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
let index = self.index_resolver.get_index(uid.clone()).await?;
let result = spawn_blocking(move || index.perform_search(query)).await??;
Ok(result)
}
pub async fn get_index(&self, uid: String) -> Result<IndexMetadata> {
let index = self.index_resolver.get_index(uid.clone()).await?;
let uuid = index.uuid;
let meta = spawn_blocking(move || index.meta()).await??;
let meta = IndexMetadata {
uuid,
name: uid.clone(),
uid,
meta,
};
Ok(meta)
}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
let index = self.index_resolver.get_index(uid).await?;
let uuid = index.uuid;
let mut stats = spawn_blocking(move || index.stats()).await??;
// Check if the currently indexing update is from our index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
Ok(stats)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
let mut database_size = self.index_resolver.get_uuids_size().await? + update_infos.size;
let mut last_update: Option<DateTime<_>> = None;
let mut indexes = BTreeMap::new();
for (index_uid, index) in self.index_resolver.list().await? {
let uuid = index.uuid;
let (mut stats, meta) = spawn_blocking::<_, IndexResult<_>>(move || {
let stats = index.stats()?;
let meta = index.meta()?;
Ok((stats, meta))
})
.await??;
database_size += stats.size;
last_update = last_update.map_or(Some(meta.updated_at), |last| {
Some(last.max(meta.updated_at))
});
// Check if the currently indexing update is from our index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
indexes.insert(index_uid, stats);
}
Ok(Stats {
database_size,
last_update,
indexes,
})
}
pub async fn create_dump(&self) -> Result<DumpInfo> {
Ok(self.dump_handle.create_dump().await?)
}
pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
Ok(self.dump_handle.dump_info(uid).await?)
}
pub async fn create_index(
&self,
uid: String,
primary_key: Option<String>,
) -> Result<IndexMetadata> {
let index = self
.index_resolver
.create_index(uid.clone(), primary_key)
.await?;
let meta = spawn_blocking(move || -> IndexResult<_> {
let meta = index.meta()?;
let meta = IndexMetadata {
uuid: index.uuid,
uid: uid.clone(),
name: uid,
meta,
};
Ok(meta)
})
.await??;
Ok(meta)
}
pub async fn delete_index(&self, uid: String) -> Result<()> {
let uuid = self.index_resolver.delete_index(uid).await?;
let update_sender = self.update_sender.clone();
tokio::spawn(async move {
let _ = UpdateMsg::delete(&update_sender, uuid).await;
});
Ok(())
}
}
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
sleep(Duration::from_millis(100)).await;
continue;
}
}
}
}
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}

View File

@ -0,0 +1,257 @@
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::bail;
use log::{error, info, trace};
use tokio::fs;
use tokio::task::spawn_blocking;
use tokio::time::sleep;
use crate::compression::from_tar_gz;
use crate::index_controller::updates::UpdateMsg;
use super::index_resolver::HardStateIndexResolver;
use super::updates::UpdateSender;
pub struct SnapshotService {
index_resolver: Arc<HardStateIndexResolver>,
update_sender: UpdateSender,
snapshot_period: Duration,
snapshot_path: PathBuf,
db_name: String,
}
impl SnapshotService {
pub fn new(
index_resolver: Arc<HardStateIndexResolver>,
update_sender: UpdateSender,
snapshot_period: Duration,
snapshot_path: PathBuf,
db_name: String,
) -> Self {
Self {
index_resolver,
update_sender,
snapshot_period,
snapshot_path,
db_name,
}
}
pub async fn run(self) {
info!(
"Snapshot scheduled every {}s.",
self.snapshot_period.as_secs()
);
loop {
if let Err(e) = self.perform_snapshot().await {
error!("Error while performing snapshot: {}", e);
}
sleep(self.snapshot_period).await;
}
}
async fn perform_snapshot(&self) -> anyhow::Result<()> {
trace!("Performing snapshot.");
let snapshot_dir = self.snapshot_path.clone();
fs::create_dir_all(&snapshot_dir).await?;
let temp_snapshot_dir = spawn_blocking(tempfile::tempdir).await??;
let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
let indexes = self
.index_resolver
.snapshot(temp_snapshot_path.clone())
.await?;
if indexes.is_empty() {
return Ok(());
}
UpdateMsg::snapshot(&self.update_sender, temp_snapshot_path.clone(), indexes).await?;
let snapshot_path = self
.snapshot_path
.join(format!("{}.snapshot", self.db_name));
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
let temp_snapshot_file = tempfile::NamedTempFile::new()?;
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
temp_snapshot_file.persist(&snapshot_path)?;
Ok(snapshot_path)
})
.await??;
trace!("Created snapshot in {:?}.", snapshot_path);
Ok(())
}
}
pub fn load_snapshot(
db_path: impl AsRef<Path>,
snapshot_path: impl AsRef<Path>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
) -> anyhow::Result<()> {
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
match from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()),
Err(e) => {
//clean created db folder
std::fs::remove_dir_all(&db_path)?;
Err(e)
}
}
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
db_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| db_path.as_ref().to_owned())
)
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
bail!(
"snapshot doesn't exist at {:?}",
snapshot_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
)
} else {
Ok(())
}
}
//#[cfg(test)]
//mod test {
//use std::iter::FromIterator;
//use std::{collections::HashSet, sync::Arc};
//use futures::future::{err, ok};
//use rand::Rng;
//use tokio::time::timeout;
//use uuid::Uuid;
//use super::*;
//#[actix_rt::test]
//async fn test_normal() {
//let mut rng = rand::thread_rng();
//let uuids_num: usize = rng.gen_range(5..10);
//let uuids = (0..uuids_num)
//.map(|_| Uuid::new_v4())
//.collect::<HashSet<_>>();
//let mut uuid_resolver = MockUuidResolverHandle::new();
//let uuids_clone = uuids.clone();
//uuid_resolver
//.expect_snapshot()
//.times(1)
//.returning(move |_| Box::pin(ok(uuids_clone.clone())));
//let uuids_clone = uuids.clone();
//let mut index_handle = MockIndexActorHandle::new();
//index_handle
//.expect_snapshot()
//.withf(move |uuid, _path| uuids_clone.contains(uuid))
//.times(uuids_num)
//.returning(move |_, _| Box::pin(ok(())));
//let dir = tempfile::tempdir_in(".").unwrap();
//let handle = Arc::new(index_handle);
//let update_handle =
//UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
//let snapshot_path = tempfile::tempdir_in(".").unwrap();
//let snapshot_service = SnapshotService::new(
//uuid_resolver,
//update_handle,
//Duration::from_millis(100),
//snapshot_path.path().to_owned(),
//"data.ms".to_string(),
//);
//snapshot_service.perform_snapshot().await.unwrap();
//}
//#[actix_rt::test]
//async fn error_performing_uuid_snapshot() {
//let mut uuid_resolver = MockUuidResolverHandle::new();
//uuid_resolver
//.expect_snapshot()
//.times(1)
////abitrary error
//.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
//let update_handle = MockUpdateActorHandle::new();
//let snapshot_path = tempfile::tempdir_in(".").unwrap();
//let snapshot_service = SnapshotService::new(
//uuid_resolver,
//update_handle,
//Duration::from_millis(100),
//snapshot_path.path().to_owned(),
//"data.ms".to_string(),
//);
//assert!(snapshot_service.perform_snapshot().await.is_err());
////Nothing was written to the file
//assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
//}
//#[actix_rt::test]
//async fn error_performing_index_snapshot() {
//let uuid = Uuid::new_v4();
//let mut uuid_resolver = MockUuidResolverHandle::new();
//uuid_resolver
//.expect_snapshot()
//.times(1)
//.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
//let mut update_handle = MockUpdateActorHandle::new();
//update_handle
//.expect_snapshot()
////abitrary error
//.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0))));
//let snapshot_path = tempfile::tempdir_in(".").unwrap();
//let snapshot_service = SnapshotService::new(
//uuid_resolver,
//update_handle,
//Duration::from_millis(100),
//snapshot_path.path().to_owned(),
//"data.ms".to_string(),
//);
//assert!(snapshot_service.perform_snapshot().await.is_err());
////Nothing was written to the file
//assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
//}
//#[actix_rt::test]
//async fn test_loop() {
//let mut uuid_resolver = MockUuidResolverHandle::new();
//uuid_resolver
//.expect_snapshot()
////we expect the funtion to be called between 2 and 3 time in the given interval.
//.times(2..4)
////abitrary error, to short-circuit the function
//.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
//let update_handle = MockUpdateActorHandle::new();
//let snapshot_path = tempfile::tempdir_in(".").unwrap();
//let snapshot_service = SnapshotService::new(
//uuid_resolver,
//update_handle,
//Duration::from_millis(100),
//snapshot_path.path().to_owned(),
//"data.ms".to_string(),
//);
//let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await;
//}
//}

View File

@ -0,0 +1,172 @@
use std::fs::{create_dir_all, File};
use std::io::{self, BufReader, BufWriter, Write};
use std::ops::{Deref, DerefMut};
use std::path::{Path, PathBuf};
use milli::documents::DocumentBatchReader;
use serde_json::Map;
use tempfile::{NamedTempFile, PersistError};
use uuid::Uuid;
const UPDATE_FILES_PATH: &str = "updates/updates_files";
use crate::document_formats::read_ndjson;
pub struct UpdateFile {
path: PathBuf,
file: NamedTempFile,
}
#[derive(Debug, thiserror::Error)]
#[error("Error while persisting update to disk: {0}")]
pub struct UpdateFileStoreError(Box<dyn std::error::Error + Sync + Send + 'static>);
type Result<T> = std::result::Result<T, UpdateFileStoreError>;
macro_rules! into_update_store_error {
($($other:path),*) => {
$(
impl From<$other> for UpdateFileStoreError {
fn from(other: $other) -> Self {
Self(Box::new(other))
}
}
)*
};
}
into_update_store_error!(
PersistError,
io::Error,
serde_json::Error,
milli::documents::Error
);
impl UpdateFile {
pub fn persist(self) -> Result<()> {
self.file.persist(&self.path)?;
Ok(())
}
}
impl Deref for UpdateFile {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for UpdateFile {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[derive(Clone, Debug)]
pub struct UpdateFileStore {
path: PathBuf,
}
impl UpdateFileStore {
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH);
let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH);
create_dir_all(&dst_update_files_path)?;
let entries = std::fs::read_dir(src_update_files_path)?;
for entry in entries {
let entry = entry?;
let update_file = BufReader::new(File::open(entry.path())?);
let file_uuid = entry.file_name();
let file_uuid = file_uuid
.to_str()
.ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
let dst_path = dst_update_files_path.join(file_uuid);
let dst_file = BufWriter::new(File::create(dst_path)?);
read_ndjson(update_file, dst_file)?;
}
Ok(())
}
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&path)?;
Ok(Self { path })
}
/// Creates a new temporary update file.
///
/// A call to `persist` is needed to persist the file in the database.
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
let file = NamedTempFile::new()?;
let uuid = Uuid::new_v4();
let path = self.path.join(uuid.to_string());
let update_file = UpdateFile { file, path };
Ok((uuid, update_file))
}
/// Returns the file corresponding to the requested uuid.
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
let path = self.path.join(uuid.to_string());
let file = File::open(path)?;
Ok(file)
}
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
let src = self.path.join(uuid.to_string());
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&dst)?;
dst.push(uuid.to_string());
std::fs::copy(src, dst)?;
Ok(())
}
/// Peforms a dump of the given update file uuid into the provided dump path.
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
let uuid_string = uuid.to_string();
let update_file_path = self.path.join(&uuid_string);
let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&dst)?;
dst.push(&uuid_string);
let update_file = File::open(update_file_path)?;
let mut dst_file = NamedTempFile::new()?;
let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
let mut document_buffer = Map::new();
// TODO: we need to find a way to do this more efficiently. (create a custom serializer
// for jsonl for example...)
while let Some((index, document)) = document_reader.next_document_with_index()? {
for (field_id, content) in document.iter() {
if let Some(field_name) = index.get_by_left(&field_id) {
let content = serde_json::from_slice(content)?;
document_buffer.insert(field_name.to_string(), content);
}
}
serde_json::to_writer(&mut dst_file, &document_buffer)?;
dst_file.write_all(b"\n")?;
document_buffer.clear();
}
dst_file.persist(dst)?;
Ok(())
}
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
Ok(self.get_update(uuid)?.metadata()?.len())
}
pub fn delete(&self, uuid: Uuid) -> Result<()> {
let path = self.path.join(uuid.to_string());
std::fs::remove_file(path)?;
Ok(())
}
}

View File

@ -0,0 +1,71 @@
use std::error::Error;
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use crate::{
document_formats::DocumentFormatError,
index_controller::update_file_store::UpdateFileStoreError,
};
pub type Result<T> = std::result::Result<T, UpdateLoopError>;
#[derive(Debug, thiserror::Error)]
#[allow(clippy::large_enum_variant)]
pub enum UpdateLoopError {
#[error("Update {0} not found.")]
UnexistingUpdate(u64),
#[error("Internal error: {0}")]
Internal(Box<dyn Error + Send + Sync + 'static>),
#[error(
"update store was shut down due to a fatal error, please check your logs for more info."
)]
FatalUpdateStoreError,
#[error("{0}")]
InvalidPayload(#[from] DocumentFormatError),
#[error("{0}")]
MalformedPayload(Box<dyn Error + Send + Sync + 'static>),
// TODO: The reference to actix has to go.
#[error("{0}")]
PayloadError(#[from] actix_web::error::PayloadError),
}
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateLoopError
where
T: Sync + Send + 'static + fmt::Debug,
{
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<tokio::sync::oneshot::error::RecvError> for UpdateLoopError {
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
Self::Internal(Box::new(other))
}
}
internal_error!(
UpdateLoopError: heed::Error,
std::io::Error,
serde_json::Error,
tokio::task::JoinError,
UpdateFileStoreError
);
impl ErrorCode for UpdateLoopError {
fn error_code(&self) -> Code {
match self {
Self::UnexistingUpdate(_) => Code::NotFound,
Self::Internal(_) => Code::Internal,
//Self::IndexActor(e) => e.error_code(),
Self::FatalUpdateStoreError => Code::Internal,
Self::InvalidPayload(_) => Code::BadRequest,
Self::MalformedPayload(_) => Code::BadRequest,
Self::PayloadError(error) => match error {
actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge,
_ => Code::Internal,
},
}
}
}

View File

@ -0,0 +1,113 @@
use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::index::Index;
use super::error::Result;
use super::{Update, UpdateStatus, UpdateStoreInfo};
#[derive(Debug)]
pub enum UpdateMsg {
Update {
uuid: Uuid,
update: Update,
ret: oneshot::Sender<Result<UpdateStatus>>,
},
ListUpdates {
uuid: Uuid,
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
},
GetUpdate {
uuid: Uuid,
ret: oneshot::Sender<Result<UpdateStatus>>,
id: u64,
},
DeleteIndex {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
Snapshot {
indexes: Vec<Index>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
Dump {
indexes: Vec<Index>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},
GetInfo {
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
},
}
impl UpdateMsg {
pub async fn snapshot(
sender: &mpsc::Sender<Self>,
path: PathBuf,
indexes: Vec<Index>,
) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Snapshot { path, indexes, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn dump(
sender: &mpsc::Sender<Self>,
indexes: Vec<Index>,
path: PathBuf,
) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Dump { path, indexes, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn update(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
update: Update,
) -> Result<UpdateStatus> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Update { uuid, update, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn get_update(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
id: u64,
) -> Result<UpdateStatus> {
let (ret, rcv) = oneshot::channel();
let msg = Self::GetUpdate { uuid, id, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn list_updates(
sender: &mpsc::Sender<Self>,
uuid: Uuid,
) -> Result<Vec<UpdateStatus>> {
let (ret, rcv) = oneshot::channel();
let msg = Self::ListUpdates { uuid, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn get_info(sender: &mpsc::Sender<Self>) -> Result<UpdateStoreInfo> {
let (ret, rcv) = oneshot::channel();
let msg = Self::GetInfo { ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn delete(sender: &mpsc::Sender<Self>, uuid: Uuid) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::DeleteIndex { ret, uuid };
sender.send(msg).await?;
rcv.await?
}
}

View File

@ -0,0 +1,286 @@
pub mod error;
mod message;
pub mod status;
pub mod store;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use actix_web::error::PayloadError;
use async_stream::stream;
use bytes::Bytes;
use futures::{Stream, StreamExt};
use log::trace;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use uuid::Uuid;
use self::error::{Result, UpdateLoopError};
pub use self::message::UpdateMsg;
use self::store::{UpdateStore, UpdateStoreInfo};
use crate::document_formats::{read_csv, read_json, read_ndjson};
use crate::index::{Index, Settings, Unchecked};
use crate::index_controller::update_file_store::UpdateFileStore;
use status::UpdateStatus;
use super::index_resolver::HardStateIndexResolver;
use super::{DocumentAdditionFormat, Update};
pub type UpdateSender = mpsc::Sender<UpdateMsg>;
pub fn create_update_handler(
index_resolver: Arc<HardStateIndexResolver>,
db_path: impl AsRef<Path>,
update_store_size: usize,
) -> anyhow::Result<UpdateSender> {
let path = db_path.as_ref().to_owned();
let (sender, receiver) = mpsc::channel(100);
let actor = UpdateLoop::new(update_store_size, receiver, path, index_resolver)?;
tokio::task::spawn(actor.run());
Ok(sender)
}
/// A wrapper type to implement read on a `Stream<Result<Bytes, Error>>`.
struct StreamReader<S> {
stream: S,
current: Option<Bytes>,
}
impl<S> StreamReader<S> {
fn new(stream: S) -> Self {
Self {
stream,
current: None,
}
}
}
impl<S: Stream<Item = std::result::Result<Bytes, PayloadError>> + Unpin> io::Read
for StreamReader<S>
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
// TODO: optimize buf filling
match self.current.take() {
Some(mut bytes) => {
let split_at = bytes.len().min(buf.len());
let copied = bytes.split_to(split_at);
buf[..split_at].copy_from_slice(&copied);
if !bytes.is_empty() {
self.current.replace(bytes);
}
Ok(copied.len())
}
None => match tokio::runtime::Handle::current().block_on(self.stream.next()) {
Some(Ok(bytes)) => {
self.current.replace(bytes);
self.read(buf)
}
Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)),
None => Ok(0),
},
}
}
}
pub struct UpdateLoop {
store: Arc<UpdateStore>,
inbox: Option<mpsc::Receiver<UpdateMsg>>,
update_file_store: UpdateFileStore,
must_exit: Arc<AtomicBool>,
}
impl UpdateLoop {
pub fn new(
update_db_size: usize,
inbox: mpsc::Receiver<UpdateMsg>,
path: impl AsRef<Path>,
index_resolver: Arc<HardStateIndexResolver>,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned();
std::fs::create_dir_all(&path)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(update_db_size);
let must_exit = Arc::new(AtomicBool::new(false));
let update_file_store = UpdateFileStore::new(&path).unwrap();
let store = UpdateStore::open(
options,
&path,
index_resolver,
must_exit.clone(),
update_file_store.clone(),
)?;
let inbox = Some(inbox);
Ok(Self {
store,
inbox,
must_exit,
update_file_store,
})
}
pub async fn run(mut self) {
use UpdateMsg::*;
trace!("Started update actor.");
let mut inbox = self
.inbox
.take()
.expect("A receiver should be present by now.");
let must_exit = self.must_exit.clone();
let stream = stream! {
loop {
let msg = inbox.recv().await;
if must_exit.load(std::sync::atomic::Ordering::Relaxed) {
break;
}
match msg {
Some(msg) => yield msg,
None => break,
}
}
};
stream
.for_each_concurrent(Some(10), |msg| async {
match msg {
Update { uuid, update, ret } => {
let _ = ret.send(self.handle_update(uuid, update).await);
}
ListUpdates { uuid, ret } => {
let _ = ret.send(self.handle_list_updates(uuid).await);
}
GetUpdate { uuid, ret, id } => {
let _ = ret.send(self.handle_get_update(uuid, id).await);
}
DeleteIndex { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Snapshot { indexes, path, ret } => {
let _ = ret.send(self.handle_snapshot(indexes, path).await);
}
GetInfo { ret } => {
let _ = ret.send(self.handle_get_info().await);
}
Dump { indexes, path, ret } => {
let _ = ret.send(self.handle_dump(indexes, path).await);
}
}
})
.await;
}
async fn handle_update(&self, index_uuid: Uuid, update: Update) -> Result<UpdateStatus> {
let registration = match update {
Update::DocumentAddition {
payload,
primary_key,
method,
format,
} => {
let reader = StreamReader::new(payload);
let (content_uuid, mut update_file) = self.update_file_store.new_update()?;
tokio::task::spawn_blocking(move || -> Result<_> {
match format {
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
}
update_file.persist()?;
Ok(())
})
.await??;
store::Update::DocumentAddition {
primary_key,
method,
content_uuid,
}
}
Update::Settings(settings) => store::Update::Settings(settings),
Update::ClearDocuments => store::Update::ClearDocuments,
Update::DeleteDocuments(ids) => store::Update::DeleteDocuments(ids),
};
let store = self.store.clone();
let status =
tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration))
.await??;
Ok(status.into())
}
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = update_store.list(uuid)?;
Ok(result)
})
.await?
}
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || {
let result = store
.meta(uuid, id)?
.ok_or(UpdateLoopError::UnexistingUpdate(id))?;
Ok(result)
})
.await?
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let store = self.store.clone();
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
Ok(())
}
async fn handle_snapshot(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || update_store.snapshot(indexes, path)).await??;
Ok(())
}
async fn handle_dump(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> Result<()> {
update_store.dump(&indexes, path.to_path_buf())?;
Ok(())
})
.await??;
Ok(())
}
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone();
let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> {
let info = update_store.get_info()?;
Ok(info)
})
.await??;
Ok(info)
}
}

View File

@ -1,11 +1,14 @@
use std::{error::Error, fmt::Display};
use chrono::{DateTime, Utc};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat};
use meilisearch_error::{Code, ErrorCode};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::{
error::ResponseError,
index::{Settings, Unchecked},
Update,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -21,7 +24,6 @@ pub enum UpdateResult {
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
@ -35,18 +37,16 @@ pub enum UpdateMeta {
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: UpdateMeta,
pub meta: Update,
pub enqueued_at: DateTime<Utc>,
pub content: Option<Uuid>,
}
impl Enqueued {
pub fn new(meta: UpdateMeta, update_id: u64, content: Option<Uuid>) -> Self {
pub fn new(meta: Update, update_id: u64) -> Self {
Self {
enqueued_at: Utc::now(),
meta,
update_id,
content,
}
}
@ -64,7 +64,7 @@ impl Enqueued {
}
}
pub fn meta(&self) -> &UpdateMeta {
pub fn meta(&self) -> &Update {
&self.meta
}
@ -87,7 +87,7 @@ impl Processed {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
@ -105,7 +105,7 @@ impl Processing {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
pub fn meta(&self) -> &Update {
self.from.meta()
}
@ -117,10 +117,13 @@ impl Processing {
}
}
pub fn fail(self, error: ResponseError) -> Failed {
pub fn fail(self, error: impl ErrorCode) -> Failed {
let msg = error.to_string();
let code = error.error_code();
Failed {
from: self,
error,
msg,
code,
failed_at: Utc::now(),
}
}
@ -139,7 +142,7 @@ impl Aborted {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
@ -149,16 +152,31 @@ impl Aborted {
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub error: ResponseError,
pub msg: String,
pub code: Code,
pub failed_at: DateTime<Utc>,
}
impl Display for Failed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.msg.fmt(f)
}
}
impl Error for Failed {}
impl ErrorCode for Failed {
fn error_code(&self) -> Code {
self.code
}
}
impl Failed {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &UpdateMeta {
pub fn meta(&self) -> &Update {
self.from.meta()
}
}
@ -184,7 +202,7 @@ impl UpdateStatus {
}
}
pub fn meta(&self) -> &UpdateMeta {
pub fn meta(&self) -> &Update {
match self {
UpdateStatus::Processing(u) => u.meta(),
UpdateStatus::Enqueued(u) => u.meta(),

View File

@ -0,0 +1,159 @@
use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::io::{BufReader, Write};
use std::path::{Path, PathBuf};
use heed::{EnvOpenOptions, RoTxn};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use serde_json::Deserializer;
use tempfile::{NamedTempFile, TempDir};
use uuid::Uuid;
use super::{Result, State, UpdateStore};
use crate::{
index::Index,
index_controller::{
update_file_store::UpdateFileStore,
updates::status::{Enqueued, UpdateStatus},
},
Update,
};
#[derive(Serialize, Deserialize)]
struct UpdateEntry {
uuid: Uuid,
update: UpdateStatus,
}
impl UpdateStore {
pub fn dump(&self, indexes: &[Index], path: PathBuf) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Dumping);
// txn must *always* be acquired after state lock, or it will dead lock.
let txn = self.env.write_txn()?;
let uuids = indexes.iter().map(|i| i.uuid).collect();
self.dump_updates(&txn, &uuids, &path)?;
indexes
.par_iter()
.try_for_each(|index| index.dump(&path))
.unwrap();
Ok(())
}
fn dump_updates(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
path: impl AsRef<Path>,
) -> Result<()> {
let mut dump_data_file = NamedTempFile::new()?;
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
self.dump_completed(txn, uuids, &mut dump_data_file)?;
let mut dst_path = path.as_ref().join("updates");
create_dir_all(&dst_path)?;
dst_path.push("data.jsonl");
dump_data_file.persist(dst_path).unwrap();
Ok(())
}
fn dump_pending(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
mut file: impl Write,
dst_path: impl AsRef<Path>,
) -> Result<()> {
let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
for pending in pendings {
let ((_, uuid, _), data) = pending?;
if uuids.contains(&uuid) {
let update = data.decode()?;
if let Enqueued {
meta: Update::DocumentAddition { content_uuid, .. },
..
} = update
{
self.update_file_store
.dump(content_uuid, &dst_path)
.unwrap();
}
let update_json = UpdateEntry {
uuid,
update: update.into(),
};
serde_json::to_writer(&mut file, &update_json)?;
file.write_all(b"\n")?;
}
}
Ok(())
}
fn dump_completed(
&self,
txn: &RoTxn,
uuids: &HashSet<Uuid>,
mut file: impl Write,
) -> Result<()> {
let updates = self.updates.iter(txn)?.lazily_decode_data();
for update in updates {
let ((uuid, _), data) = update?;
if uuids.contains(&uuid) {
let update = data.decode()?;
let update_json = UpdateEntry { uuid, update };
serde_json::to_writer(&mut file, &update_json)?;
file.write_all(b"\n")?;
}
}
Ok(())
}
pub fn load_dump(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
db_size: usize,
) -> anyhow::Result<()> {
println!("target path: {}", dst.as_ref().display());
let mut options = EnvOpenOptions::new();
options.map_size(db_size as usize);
// create a dummy update fiel store, since it is not needed right now.
let tmp = TempDir::new().unwrap();
let update_file_store = UpdateFileStore::new(tmp.path()).unwrap();
let (store, _) = UpdateStore::new(options, &dst, update_file_store)?;
let src_update_path = src.as_ref().join("updates");
let update_data = File::open(&src_update_path.join("data.jsonl"))?;
let update_data = BufReader::new(update_data);
let stream = Deserializer::from_reader(update_data).into_iter::<UpdateEntry>();
let mut wtxn = store.env.write_txn()?;
for entry in stream {
let UpdateEntry { uuid, update } = entry?;
store.register_raw_updates(&mut wtxn, &update, uuid)?;
}
wtxn.commit()?;
Ok(())
}
}

View File

@ -1,7 +1,7 @@
mod codec;
pub mod dump;
use std::fs::{copy, create_dir_all, remove_file, File};
use std::fs::create_dir_all;
use std::path::Path;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
@ -12,12 +12,12 @@ use std::{
};
use arc_swap::ArcSwap;
use futures::StreamExt;
use heed::types::{ByteSlice, OwnedType, SerdeJson};
use heed::zerocopy::U64;
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
use log::error;
use parking_lot::{Mutex, MutexGuard};
use rayon::prelude::*;
use tokio::runtime::Handle;
use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError;
@ -27,15 +27,28 @@ use uuid::Uuid;
use codec::*;
use super::error::Result;
use super::UpdateMeta;
use crate::helpers::EnvSizer;
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle};
use super::status::{Enqueued, Processing};
use crate::index::Index;
use crate::index_controller::updates::*;
use crate::EnvSizer;
#[allow(clippy::upper_case_acronyms)]
type BEU64 = U64<heed::byteorder::BE>;
const UPDATE_DIR: &str = "update_files";
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Update {
DeleteDocuments(Vec<String>),
DocumentAddition {
primary_key: Option<String>,
method: IndexDocumentsMethod,
content_uuid: Uuid,
},
Settings(Settings<Unchecked>),
ClearDocuments,
}
#[derive(Debug)]
pub struct UpdateStoreInfo {
/// Size of the update store in bytes.
pub size: u64,
@ -106,6 +119,7 @@ pub struct UpdateStore {
state: Arc<StateLock>,
/// Wake up the loop when a new event occurs.
notification_sender: mpsc::Sender<()>,
update_file_store: UpdateFileStore,
path: PathBuf,
}
@ -113,10 +127,13 @@ impl UpdateStore {
fn new(
mut options: EnvOpenOptions,
path: impl AsRef<Path>,
update_file_store: UpdateFileStore,
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
options.max_dbs(5);
let env = options.open(&path)?;
let update_path = path.as_ref().join("updates");
std::fs::create_dir_all(&update_path)?;
let env = options.open(update_path)?;
let pending_queue = env.create_database(Some("pending-queue"))?;
let next_update_id = env.create_database(Some("next-update-id"))?;
let updates = env.create_database(Some("updates"))?;
@ -134,6 +151,7 @@ impl UpdateStore {
state,
notification_sender,
path: path.as_ref().to_owned(),
update_file_store,
},
notification_receiver,
))
@ -142,10 +160,12 @@ impl UpdateStore {
pub fn open(
options: EnvOpenOptions,
path: impl AsRef<Path>,
index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static,
index_resolver: Arc<HardStateIndexResolver>,
must_exit: Arc<AtomicBool>,
update_file_store: UpdateFileStore,
) -> anyhow::Result<Arc<Self>> {
let (update_store, mut notification_receiver) = Self::new(options, path)?;
let (update_store, mut notification_receiver) =
Self::new(options, path, update_file_store)?;
let update_store = Arc::new(update_store);
// Send a first notification to trigger the process.
@ -157,7 +177,7 @@ impl UpdateStore {
// want to close the index.
let duration = Duration::from_secs(10 * 60); // 10 minutes
let update_store_weak = Arc::downgrade(&update_store);
tokio::task::spawn(async move {
tokio::task::spawn_local(async move {
// Block and wait for something to process with a timeout. The timeout
// function returns a Result and we must just unlock the loop on Result.
'outer: while timeout(duration, notification_receiver.recv())
@ -167,7 +187,7 @@ impl UpdateStore {
loop {
match update_store_weak.upgrade() {
Some(update_store) => {
let handler = index_handle.clone();
let handler = index_resolver.clone();
let res = tokio::task::spawn_blocking(move || {
update_store.process_pending_update(handler)
})
@ -231,16 +251,10 @@ impl UpdateStore {
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(
&self,
meta: UpdateMeta,
content: Option<Uuid>,
index_uuid: Uuid,
) -> heed::Result<Enqueued> {
pub fn register_update(&self, index_uuid: Uuid, update: Update) -> heed::Result<Enqueued> {
let mut txn = self.env.write_txn()?;
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
let meta = Enqueued::new(meta, update_id, content);
let meta = Enqueued::new(update, update_id);
self.pending_queue
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
@ -282,7 +296,10 @@ impl UpdateStore {
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update(&self, index_handle: impl IndexActorHandle) -> Result<Option<()>> {
fn process_pending_update(
&self,
index_resolver: Arc<HardStateIndexResolver>,
) -> Result<Option<()>> {
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_queue.first(&rtxn)?;
@ -291,16 +308,14 @@ impl UpdateStore {
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some(((global_id, index_uuid, _), mut pending)) => {
let content = pending.content.take();
Some(((global_id, index_uuid, _), pending)) => {
let processing = pending.processing();
// Acquire the state lock and set the current state to processing.
// txn must *always* be acquired after state lock, or it will dead lock.
let state = self.state.write();
state.swap(State::Processing(index_uuid, processing.clone()));
let result =
self.perform_update(content, processing, index_handle, index_uuid, global_id);
let result = self.perform_update(processing, index_resolver, index_uuid, global_id);
state.swap(State::Idle);
@ -312,30 +327,19 @@ impl UpdateStore {
fn perform_update(
&self,
content: Option<Uuid>,
processing: Processing,
index_handle: impl IndexActorHandle,
index_resolver: Arc<HardStateIndexResolver>,
index_uuid: Uuid,
global_id: u64,
) -> Result<Option<()>> {
let content_path = content.map(|uuid| update_uuid_to_file_path(&self.path, uuid));
let update_id = processing.id();
let file = match content_path {
Some(ref path) => {
let file = File::open(path)?;
Some(file)
}
None => None,
};
// Process the pending update using the provided user function.
let handle = Handle::current();
let result =
match handle.block_on(index_handle.update(index_uuid, processing.clone(), file)) {
Ok(result) => result,
Err(e) => Err(processing.fail(e.into())),
};
let update_id = processing.id();
//IndexMsg::update(index_resolver, index_uuid, processing.clone()
let result = match handle.block_on(index_resolver.get_index_by_uuid(index_uuid)) {
Ok(index) => index.handle_update(processing),
Err(e) => Err(processing.fail(e)),
};
// Once the pending update have been successfully processed
// we must remove the content from the pending and processing stores and
@ -354,10 +358,6 @@ impl UpdateStore {
wtxn.commit()?;
if let Some(ref path) = content_path {
remove_file(&path)?;
}
Ok(Some(()))
}
@ -441,13 +441,13 @@ impl UpdateStore {
while let Some(Ok(((_, uuid, _), pending))) = pendings.next() {
if uuid == index_uuid {
let mut pending = pending.decode()?;
if let Some(update_uuid) = pending.content.take() {
uuids_to_remove.push(update_uuid);
let pending = pending.decode()?;
if let Update::DocumentAddition { content_uuid, .. } = pending.meta() {
uuids_to_remove.push(*content_uuid);
}
// Invariant check: we can only delete the current entry when we don't hold
// references to it anymore. This must be done after we have retrieved its content.
//Invariant check: we can only delete the current entry when we don't hold
//references to it anymore. This must be done after we have retrieved its content.
unsafe {
pendings.del_current()?;
}
@ -484,22 +484,14 @@ impl UpdateStore {
// Finally, remove any outstanding update files. This must be done after waiting for the
// last update to ensure that the update files are not deleted before the update needs
// them.
uuids_to_remove
.iter()
.map(|uuid| update_uuid_to_file_path(&self.path, *uuid))
.for_each(|path| {
let _ = remove_file(path);
});
uuids_to_remove.iter().for_each(|uuid| {
let _ = self.update_file_store.delete(*uuid);
});
Ok(())
}
pub fn snapshot(
&self,
uuids: &HashSet<Uuid>,
path: impl AsRef<Path>,
handle: impl IndexActorHandle + Clone,
) -> Result<()> {
pub fn snapshot(&self, indexes: Vec<Index>, path: impl AsRef<Path>) -> Result<()> {
let state_lock = self.state.write();
state_lock.swap(State::Snapshoting);
@ -515,39 +507,29 @@ impl UpdateStore {
// create db snapshot
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
let update_files_path = update_path.join(UPDATE_DIR);
create_dir_all(&update_files_path)?;
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
let uuids: HashSet<_> = indexes.iter().map(|i| i.uuid).collect();
for entry in pendings {
let ((_, uuid, _), pending) = entry?;
if uuids.contains(&uuid) {
if let Enqueued {
content: Some(uuid),
meta: Update::DocumentAddition { content_uuid, .. },
..
} = pending.decode()?
{
let path = update_uuid_to_file_path(&self.path, uuid);
copy(path, &update_files_path)?;
self.update_file_store
.snapshot(content_uuid, &path)
.unwrap();
}
}
}
let path = &path.as_ref().to_path_buf();
let handle = &handle;
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
let mut stream = futures::stream::iter(uuids.iter())
.map(move |uuid| handle.snapshot(*uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
Handle::current().block_on(async {
while let Some(res) = stream.next().await {
res?;
}
Ok(()) as Result<()>
})?;
let path = path.as_ref().to_owned();
indexes
.par_iter()
.try_for_each(|index| index.snapshot(path.clone()))
.unwrap();
Ok(())
}
@ -558,12 +540,12 @@ impl UpdateStore {
for entry in self.pending_queue.iter(&txn)? {
let (_, pending) = entry?;
if let Enqueued {
content: Some(uuid),
meta: store::Update::DocumentAddition { content_uuid, .. },
..
} = pending
{
let path = update_uuid_to_file_path(&self.path, uuid);
size += File::open(path)?.metadata()?.len();
let len = self.update_file_store.get_size(content_uuid)?;
size += len;
}
}
let processing = match *self.state.read() {
@ -575,155 +557,149 @@ impl UpdateStore {
}
}
fn update_uuid_to_file_path(root: impl AsRef<Path>, uuid: Uuid) -> PathBuf {
root.as_ref()
.join(UPDATE_DIR)
.join(format!("update_{}", uuid))
}
//#[cfg(test)]
//mod test {
//use super::*;
//use crate::index_controller::{
//index_actor::{error::IndexActorError, MockIndexActorHandle},
//UpdateResult,
//};
#[cfg(test)]
mod test {
use super::*;
use crate::index_controller::{
index_actor::{error::IndexActorError, MockIndexActorHandle},
UpdateResult,
};
//use futures::future::ok;
use futures::future::ok;
//#[actix_rt::test]
//async fn test_next_id() {
//let dir = tempfile::tempdir_in(".").unwrap();
//let mut options = EnvOpenOptions::new();
//let handle = Arc::new(MockIndexActorHandle::new());
//options.map_size(4096 * 100);
//let update_store = UpdateStore::open(
//options,
//dir.path(),
//handle,
//Arc::new(AtomicBool::new(false)),
//)
//.unwrap();
#[actix_rt::test]
async fn test_next_id() {
let dir = tempfile::tempdir_in(".").unwrap();
let mut options = EnvOpenOptions::new();
let handle = Arc::new(MockIndexActorHandle::new());
options.map_size(4096 * 100);
let update_store = UpdateStore::open(
options,
dir.path(),
handle,
Arc::new(AtomicBool::new(false)),
)
.unwrap();
//let index1_uuid = Uuid::new_v4();
//let index2_uuid = Uuid::new_v4();
let index1_uuid = Uuid::new_v4();
let index2_uuid = Uuid::new_v4();
//let mut txn = update_store.env.write_txn().unwrap();
//let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
//txn.commit().unwrap();
//assert_eq!((0, 0), ids);
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((0, 0), ids);
//let mut txn = update_store.env.write_txn().unwrap();
//let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap();
//txn.commit().unwrap();
//assert_eq!((1, 0), ids);
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((1, 0), ids);
//let mut txn = update_store.env.write_txn().unwrap();
//let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
//txn.commit().unwrap();
//assert_eq!((2, 1), ids);
//}
let mut txn = update_store.env.write_txn().unwrap();
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
txn.commit().unwrap();
assert_eq!((2, 1), ids);
}
//#[actix_rt::test]
//async fn test_register_update() {
//let dir = tempfile::tempdir_in(".").unwrap();
//let mut options = EnvOpenOptions::new();
//let handle = Arc::new(MockIndexActorHandle::new());
//options.map_size(4096 * 100);
//let update_store = UpdateStore::open(
//options,
//dir.path(),
//handle,
//Arc::new(AtomicBool::new(false)),
//)
//.unwrap();
//let meta = UpdateMeta::ClearDocuments;
//let uuid = Uuid::new_v4();
//let store_clone = update_store.clone();
//tokio::task::spawn_blocking(move || {
//store_clone.register_update(meta, None, uuid).unwrap();
//})
//.await
//.unwrap();
#[actix_rt::test]
async fn test_register_update() {
let dir = tempfile::tempdir_in(".").unwrap();
let mut options = EnvOpenOptions::new();
let handle = Arc::new(MockIndexActorHandle::new());
options.map_size(4096 * 100);
let update_store = UpdateStore::open(
options,
dir.path(),
handle,
Arc::new(AtomicBool::new(false)),
)
.unwrap();
let meta = UpdateMeta::ClearDocuments;
let uuid = Uuid::new_v4();
let store_clone = update_store.clone();
tokio::task::spawn_blocking(move || {
store_clone.register_update(meta, None, uuid).unwrap();
})
.await
.unwrap();
//let txn = update_store.env.read_txn().unwrap();
//assert!(update_store
//.pending_queue
//.get(&txn, &(0, uuid, 0))
//.unwrap()
//.is_some());
//}
let txn = update_store.env.read_txn().unwrap();
assert!(update_store
.pending_queue
.get(&txn, &(0, uuid, 0))
.unwrap()
.is_some());
}
//#[actix_rt::test]
//async fn test_process_update() {
//let dir = tempfile::tempdir_in(".").unwrap();
//let mut handle = MockIndexActorHandle::new();
#[actix_rt::test]
async fn test_process_update() {
let dir = tempfile::tempdir_in(".").unwrap();
let mut handle = MockIndexActorHandle::new();
//handle
//.expect_update()
//.times(2)
//.returning(|_index_uuid, processing, _file| {
//if processing.id() == 0 {
//Box::pin(ok(Ok(processing.process(UpdateResult::Other))))
//} else {
//Box::pin(ok(Err(
//processing.fail(IndexActorError::ExistingPrimaryKey.into())
//)))
//}
//});
handle
.expect_update()
.times(2)
.returning(|_index_uuid, processing, _file| {
if processing.id() == 0 {
Box::pin(ok(Ok(processing.process(UpdateResult::Other))))
} else {
Box::pin(ok(Err(
processing.fail(IndexActorError::ExistingPrimaryKey.into())
)))
}
});
//let handle = Arc::new(handle);
let handle = Arc::new(handle);
//let mut options = EnvOpenOptions::new();
//options.map_size(4096 * 100);
//let store = UpdateStore::open(
//options,
//dir.path(),
//handle.clone(),
//Arc::new(AtomicBool::new(false)),
//)
//.unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let store = UpdateStore::open(
options,
dir.path(),
handle.clone(),
Arc::new(AtomicBool::new(false)),
)
.unwrap();
//// wait a bit for the event loop exit.
//tokio::time::sleep(std::time::Duration::from_millis(50)).await;
// wait a bit for the event loop exit.
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
//let mut txn = store.env.write_txn().unwrap();
let mut txn = store.env.write_txn().unwrap();
//let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None);
//let uuid = Uuid::new_v4();
let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None);
let uuid = Uuid::new_v4();
//store
//.pending_queue
//.put(&mut txn, &(0, uuid, 0), &update)
//.unwrap();
store
.pending_queue
.put(&mut txn, &(0, uuid, 0), &update)
.unwrap();
//let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None);
let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None);
//store
//.pending_queue
//.put(&mut txn, &(1, uuid, 1), &update)
//.unwrap();
store
.pending_queue
.put(&mut txn, &(1, uuid, 1), &update)
.unwrap();
//txn.commit().unwrap();
txn.commit().unwrap();
//// Process the pending, and check that it has been moved to the update databases, and
//// removed from the pending database.
//let store_clone = store.clone();
//tokio::task::spawn_blocking(move || {
//store_clone.process_pending_update(handle.clone()).unwrap();
//store_clone.process_pending_update(handle).unwrap();
//})
//.await
//.unwrap();
// Process the pending, and check that it has been moved to the update databases, and
// removed from the pending database.
let store_clone = store.clone();
tokio::task::spawn_blocking(move || {
store_clone.process_pending_update(handle.clone()).unwrap();
store_clone.process_pending_update(handle).unwrap();
})
.await
.unwrap();
//let txn = store.env.read_txn().unwrap();
let txn = store.env.read_txn().unwrap();
//assert!(store.pending_queue.first(&txn).unwrap().is_none());
//let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap();
assert!(store.pending_queue.first(&txn).unwrap().is_none());
let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap();
//assert!(matches!(update, UpdateStatus::Processed(_)));
//let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap();
assert!(matches!(update, UpdateStatus::Processed(_)));
let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap();
assert!(matches!(update, UpdateStatus::Failed(_)));
}
}
//assert!(matches!(update, UpdateStatus::Failed(_)));
//}
//}

View File

@ -0,0 +1,30 @@
#[macro_use]
pub mod error;
pub mod options;
pub mod index;
pub mod index_controller;
pub use index_controller::{updates::store::Update, IndexController as MeiliSearch};
pub use milli;
mod compression;
mod document_formats;
use walkdir::WalkDir;
pub trait EnvSizer {
fn size(&self) -> u64;
}
impl EnvSizer for heed::Env {
fn size(&self) -> u64 {
WalkDir::new(self.path())
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len())
}
}

View File

@ -0,0 +1,114 @@
use core::fmt;
use std::{ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use milli::CompressionType;
use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
}
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory)
}
}
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(
total_memory_bytes()
.map(|bytes| bytes * 2 / 3)
.map(Byte::from_bytes),
)
}
}
impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
None => f.write_str("unknown"),
}
}
}
impl Deref for MaxMemory {
type Target = Option<Byte>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl MaxMemory {
pub fn unlimited() -> Self {
Self(None)
}
}
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory() * 1024) // KiB into bytes
} else {
None
}
}