get rids of meilisearch-lib

This commit is contained in:
Tamo 2022-09-27 16:33:37 +02:00 committed by Clément Renault
parent 0ba1c46e19
commit 2c8f1a43e9
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
38 changed files with 398 additions and 1620 deletions

2
Cargo.lock generated
View File

@ -2352,6 +2352,7 @@ dependencies = [
"document-formats",
"either",
"env_logger",
"file-store",
"flate2",
"fst",
"futures",
@ -2486,6 +2487,7 @@ dependencies = [
"proptest-derive",
"serde",
"serde_json",
"tokio",
]
[[package]]

View File

@ -3,7 +3,6 @@ resolver = "2"
members = [
"meilisearch-http",
"meilisearch-types",
"meilisearch-lib",
"meilisearch-auth",
"index-scheduler",
"document-formats",

View File

@ -4,11 +4,8 @@ use crate::{
Error, IndexScheduler, Result, TaskId,
};
use index::{Settings, Unchecked};
use milli::{
heed::{RoTxn, RwTxn},
update::{DocumentAdditionResult, IndexDocumentsMethod},
DocumentId,
};
use milli::heed::RoTxn;
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use uuid::Uuid;
pub(crate) enum Batch {

View File

@ -6,6 +6,8 @@ mod index_scheduler;
pub mod task;
mod utils;
pub use milli;
pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;

View File

@ -268,7 +268,7 @@ impl Index {
pub fn retrieve_document<S: AsRef<str>>(
&self,
doc_id: String,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document> {
let txn = self.read_txn()?;
@ -279,14 +279,14 @@ impl Index {
let internal_id = self
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.to_string()))?;
let document = self
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or(IndexError::DocumentNotFound(doc_id))?;
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.to_string()))?;
let document = obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {

View File

@ -107,7 +107,7 @@ pub mod test {
pub fn retrieve_document<S: AsRef<str>>(
&self,
doc_id: String,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document> {
match self {

View File

@ -50,6 +50,7 @@ meilisearch-types = { path = "../meilisearch-types" }
meilisearch-lib = { path = "../meilisearch-lib", default-features = false }
index = { path = "../index" }
index-scheduler = { path = "../index-scheduler" }
file-store = { path = "../file-store" }
document-formats = { path = "../document-formats" }
mimalloc = { version = "0.1.29", default-features = false }
mime = "0.3.16"

View File

@ -1,7 +1,6 @@
use actix_web as aweb;
use aweb::error::{JsonPayloadError, QueryPayloadError};
use document_formats::DocumentFormatError;
use meilisearch_lib::IndexControllerError;
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use tokio::task::JoinError;
@ -20,9 +19,9 @@ pub enum MeilisearchHttpError {
#[error(transparent)]
Payload(#[from] PayloadError),
#[error(transparent)]
DocumentFormat(#[from] DocumentFormatError),
FileStore(#[from] file_store::Error),
#[error(transparent)]
IndexController(#[from] IndexControllerError),
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
}
@ -34,8 +33,8 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
MeilisearchHttpError::Payload(e) => e.error_code(),
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::IndexController(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal,
}
}

View File

@ -17,28 +17,36 @@ use std::time::Duration;
use crate::error::MeilisearchHttpError;
use actix_web::error::JsonPayloadError;
use actix_web::web::Data;
use analytics::Analytics;
use error::PayloadError;
use http::header::CONTENT_TYPE;
use index_scheduler::milli::update::IndexerConfig;
pub use option::Opt;
use actix_web::{web, HttpRequest};
use extractors::payload::PayloadConfig;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_lib::MeiliSearch;
use sysinfo::{RefreshKind, System, SystemExt};
pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
let mut meilisearch = MeiliSearch::builder();
// disable autobatching?
AUTOBATCHING_ENABLED.store(
!opt.scheduler_options.disable_auto_batching,
std::sync::atomic::Ordering::Relaxed,
);
// TODO: TAMO: Finish setting up things
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
let meilisearch = IndexScheduler::new(
opt.db_path.join("tasks"),
opt.db_path.join("update_files"),
opt.db_path.join("indexes"),
opt.max_index_size.get_bytes() as usize,
(&opt.indexer_options).try_into()?,
#[cfg(test)]
todo!("We'll see later"),
)?;
/*
TODO: We should start a thread to handle the snapshots.
meilisearch
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
@ -63,24 +71,21 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
if opt.schedule_snapshot {
meilisearch.set_schedule_snapshot();
}
*/
meilisearch.build(
opt.db_path.clone(),
opt.indexer_options.clone(),
opt.scheduler_options.clone(),
)
Ok(meilisearch)
}
pub fn configure_data(
config: &mut web::ServiceConfig,
data: MeiliSearch,
index_scheduler: Data<IndexScheduler>,
auth: AuthController,
opt: &Opt,
analytics: Arc<dyn Analytics>,
) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config
.app_data(data)
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::from(analytics))
.app_data(
@ -170,7 +175,7 @@ macro_rules! create_app {
use meilisearch_types::error::ResponseError;
let app = App::new()
.configure(|s| configure_data(s, $data.clone(), $auth.clone(), &$opt, $analytics))
.configure(|s| configure_data(s, $data, $auth.clone(), &$opt, $analytics))
.configure(routes::configure)
.configure(|s| dashboard(s, $enable_frontend));

View File

@ -2,13 +2,16 @@ use std::env;
use std::path::PathBuf;
use std::sync::Arc;
use actix_cors::Cors;
use actix_web::http::KeepAlive;
use actix_web::HttpServer;
use actix_web::web::Data;
use actix_web::{middleware, HttpServer};
use clap::Parser;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_http::analytics;
use meilisearch_http::analytics::Analytics;
use meilisearch_http::{create_app, setup_meilisearch, Opt};
use meilisearch_lib::MeiliSearch;
use meilisearch_http::{analytics, configure_data, create_app, dashboard, routes};
use meilisearch_http::{setup_meilisearch, Opt};
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
@ -45,9 +48,7 @@ async fn main() -> anyhow::Result<()> {
_ => unreachable!(),
}
let meilisearch = setup_meilisearch(&opt)?;
let m = meilisearch.clone();
tokio::task::spawn_blocking(move || m.run());
let index_scheduler = setup_meilisearch(&opt)?;
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?;
@ -62,39 +63,81 @@ async fn main() -> anyhow::Result<()> {
print_launch_resume(&opt, &user, config_read_from);
run_http(meilisearch, auth_controller, opt, analytics).await?;
run_http(index_scheduler, auth_controller, opt, analytics).await?;
Ok(())
}
async fn run_http(
data: MeiliSearch,
index_scheduler: IndexScheduler,
auth_controller: AuthController,
opt: Opt,
analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> {
let _enable_dashboard = &opt.env == "development";
let enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone();
let index_scheduler = Data::new(index_scheduler);
let http_server = HttpServer::new(move || {
let app = actix_web::App::new()
.configure(|s| {
configure_data(
s,
index_scheduler.clone(),
auth_controller.clone(),
&opt,
analytics.clone(),
)
})
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
#[cfg(feature = "metrics")]
let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route));
let app = app
.wrap(
Cors::default()
.send_wildcard()
.allow_any_header()
.allow_any_origin()
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(middleware::NormalizePath::new(
middleware::TrailingSlash::Trim,
));
#[cfg(feature = "metrics")]
let app = app.wrap(Condition::new(
opt.enable_metrics_route,
route_metrics::RouteMetrics,
));
app
/*
create_app!(
data,
auth_controller,
_enable_dashboard,
opt_clone,
index_scheduler.clone(),
auth_controller.clone(),
enable_dashboard,
opt,
analytics.clone()
)
*/
})
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals()
.keep_alive(KeepAlive::Os);
if let Some(config) = opt.get_ssl_config()? {
if let Some(config) = opt_clone.get_ssl_config()? {
http_server
.bind_rustls(opt.http_addr, config)?
.bind_rustls(opt_clone.http_addr, config)?
.run()
.await?;
} else {
http_server.bind(&opt.http_addr)?.run().await?;
http_server.bind(&opt_clone.http_addr)?.run().await?;
}
Ok(())
}

View File

@ -1,15 +1,21 @@
use std::convert::TryFrom;
use std::env;
use std::fs;
use std::io::{BufReader, Read};
use std::num::ParseIntError;
use std::ops::Deref;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::{fmt, fs};
use byte_unit::Byte;
use byte_unit::{Byte, ByteError};
use clap::Parser;
use meilisearch_lib::{
export_to_env_if_not_present,
options::{IndexerOpts, SchedulerConfig},
};
use index_scheduler::milli::update::IndexerConfig;
use rustls::{
server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,
@ -19,6 +25,7 @@ use rustls::{
};
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize};
use sysinfo::{RefreshKind, System, SystemExt};
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
@ -434,6 +441,164 @@ impl Opt {
}
}
#[derive(Debug, Clone, Parser, Serialize)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[serde(skip)]
#[clap(long, default_value = "100000", hide = true)] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[serde(skip)]
#[clap(long, hide = true)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)]
pub max_indexing_memory: MaxMemory,
/// The maximum number of threads the indexer will use.
/// If the number set is higher than the real number of cores available in the machine,
/// it will use the maximum number of available cores.
///
/// It defaults to half of the available threads.
#[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)]
pub max_indexing_threads: MaxThreads,
}
#[derive(Debug, Clone, Parser, Default, Serialize)]
pub struct SchedulerConfig {
/// The engine will disable task auto-batching,
/// and will sequencialy compute each task one by one.
#[clap(long, env = "DISABLE_AUTO_BATCHING")]
pub disable_auto_batching: bool,
}
impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(*other.max_indexing_threads)
.build()?;
Ok(Self {
log_every_n: Some(other.log_every_n),
max_nb_chunks: other.max_nb_chunks,
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
..Default::default()
})
}
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_indexing_memory: MaxMemory::default(),
max_indexing_threads: MaxThreads::default(),
}
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy, Serialize)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory)
}
}
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(
total_memory_bytes()
.map(|bytes| bytes * 2 / 3)
.map(Byte::from_bytes),
)
}
}
impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
None => f.write_str("unknown"),
}
}
}
impl Deref for MaxMemory {
type Target = Option<Byte>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl MaxMemory {
pub fn unlimited() -> Self {
Self(None)
}
}
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory() * 1024) // KiB into bytes
} else {
None
}
}
#[derive(Debug, Clone, Copy, Serialize)]
pub struct MaxThreads(usize);
impl FromStr for MaxThreads {
type Err = ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
usize::from_str(s).map(Self)
}
}
impl Default for MaxThreads {
fn default() -> Self {
MaxThreads(num_cpus::get() / 2)
}
}
impl fmt::Display for MaxThreads {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Deref for MaxThreads {
type Target = usize;
fn deref(&self) -> &Self::Target {
&self.0
}
}
fn load_certs(filename: PathBuf) -> anyhow::Result<Vec<rustls::Certificate>> {
let certfile =
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;

View File

@ -1,7 +1,8 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use index_scheduler::KindWithContent;
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use serde_json::json;
@ -14,16 +15,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
}
pub async fn create_dump(
meilisearch: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
let task = KindWithContent::DumpExport {
output: "toto".to_string().into(),
output: "todo".to_string().into(),
};
let res = meilisearch.register_task(task).await?;
let res = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", res);
Ok(HttpResponse::Accepted().json(res))

View File

@ -2,16 +2,16 @@ use std::io::Cursor;
use actix_web::error::PayloadError;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Bytes;
use actix_web::web::{Bytes, Data};
use actix_web::HttpMessage;
use actix_web::{web, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use futures::{Stream, StreamExt};
use index_scheduler::milli::update::IndexDocumentsMethod;
use index_scheduler::IndexScheduler;
use index_scheduler::{KindWithContent, TaskView};
use log::debug;
use meilisearch_lib::milli::update::IndexDocumentsMethod;
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use meilisearch_types::star_or::StarOr;
use mime::Mime;
@ -95,24 +95,21 @@ pub struct GetDocument {
}
pub async fn get_document(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
params: web::Query<GetDocument>,
) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone();
let id = path.document_id.clone();
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
let document = meilisearch
.document(index, id, attributes_to_retrieve)
.await?;
let index = index_scheduler.index(&path.index_uid)?;
let document = index.retrieve_document(&path.document_id, attributes_to_retrieve)?;
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
pub async fn delete_document(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam {
@ -123,7 +120,7 @@ pub async fn delete_document(
index_uid,
documents_ids: vec![document_id],
};
let task = meilisearch.register_task(task).await?;
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -139,8 +136,8 @@ pub struct BrowseQuery {
}
pub async fn get_all_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, MeiliSearch>,
path: web::Path<String>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
@ -151,9 +148,8 @@ pub async fn get_all_documents(
} = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
let (total, documents) = meilisearch
.documents(path.into_inner(), offset, limit, attributes_to_retrieve)
.await?;
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = index.retrieve_documents(offset, limit, attributes_to_retrieve)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@ -168,8 +164,8 @@ pub struct UpdateDocumentsQuery {
}
pub async fn add_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, MeiliSearch>,
path: web::Path<String>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
req: HttpRequest,
@ -177,19 +173,14 @@ pub async fn add_documents(
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let params = params.into_inner();
let index_uid = path.into_inner();
analytics.add_documents(
&params,
meilisearch.get_index(index_uid.clone()).await.is_err(),
&req,
);
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = meilisearch.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
meilisearch,
index_uid,
index_scheduler,
index_uid.into_inner(),
params.primary_key,
body,
IndexDocumentsMethod::ReplaceDocuments,
@ -201,7 +192,7 @@ pub async fn add_documents(
}
pub async fn update_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
path: web::Path<String>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
@ -211,16 +202,12 @@ pub async fn update_documents(
debug!("called with params: {:?}", params);
let index_uid = path.into_inner();
analytics.update_documents(
&params,
meilisearch.get_index(index_uid.clone()).await.is_err(),
&req,
);
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = meilisearch.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
meilisearch,
index_scheduler,
index_uid,
params.into_inner().primary_key,
body,
@ -234,7 +221,7 @@ pub async fn update_documents(
async fn document_addition(
mime_type: Option<Mime>,
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: String,
primary_key: Option<String>,
mut body: Payload,
@ -262,7 +249,7 @@ async fn document_addition(
}
};
let (uuid, mut update_file) = meilisearch.create_update_file()?;
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
// push the entire stream into a `Vec`.
// TODO: Maybe we should write it to a file to reduce the RAM consumption
@ -281,7 +268,7 @@ async fn document_addition(
PayloadType::Ndjson => read_ndjson(reader, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist();
update_file.persist()?;
Ok(documents_count)
})
.await;
@ -289,11 +276,11 @@ async fn document_addition(
let documents_count = match documents_count {
Ok(Ok(documents_count)) => documents_count,
Ok(Err(e)) => {
meilisearch.delete_update_file(uuid)?;
index_scheduler.delete_update_file(uuid)?;
return Err(e.into());
}
Err(e) => {
meilisearch.delete_update_file(uuid)?;
index_scheduler.delete_update_file(uuid)?;
return Err(e.into());
}
};
@ -318,10 +305,11 @@ async fn document_addition(
_ => todo!(),
};
let task = match meilisearch.register_task(task).await {
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
Ok(task) => task,
Err(e) => {
meilisearch.delete_update_file(uuid)?;
index_scheduler.delete_update_file(uuid)?;
return Err(e.into());
}
};
@ -331,7 +319,7 @@ async fn document_addition(
}
pub async fn delete_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<String>,
body: web::Json<Vec<Value>>,
) -> Result<HttpResponse, ResponseError> {
@ -349,20 +337,20 @@ pub async fn delete_documents(
index_uid: path.into_inner(),
documents_ids: ids,
};
let task = meilisearch.register_task(task).await?;
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn clear_all_documents(
meilisearch: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let task = KindWithContent::DocumentClear {
index_uid: path.into_inner(),
};
let task = meilisearch.register_task(task).await?;
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@ -1,7 +1,7 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::KindWithContent;
use index_scheduler::{IndexScheduler, KindWithContent};
use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use serde::{Deserialize, Serialize};
use serde_json::json;
@ -40,17 +40,17 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
}
pub async fn list_indexes(
data: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: web::Query<Pagination>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &data.filters().search_rules;
let indexes: Vec<_> = data.list_indexes().await?;
let search_rules = &index_scheduler.filters().search_rules;
let indexes: Vec<_> = index_scheduler.indexes()?;
let nb_indexes = indexes.len();
let iter = indexes
.into_iter()
.filter(|index| search_rules.is_index_authorized(&index.name));
/*
TODO: TAMO: implements me
TODO: TAMO: implements me. It's missing a kind of IndexView or something
let ret = paginate
.into_inner()
.auto_paginate_unsized(nb_indexes, iter);
@ -69,7 +69,7 @@ pub struct IndexCreateRequest {
}
pub async fn create_index(
meilisearch: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: web::Json<IndexCreateRequest>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -88,7 +88,7 @@ pub async fn create_index(
index_uid: uid,
primary_key,
};
let task = meilisearch.register_task(task).await?;
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
Ok(HttpResponse::Accepted().json(task))
} else {
@ -118,10 +118,10 @@ pub struct UpdateIndexResponse {
}
pub async fn get_index(
meilisearch: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, MeiliSearch>,
path: web::Path<String>,
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let meta = meilisearch.get_index(path.into_inner()).await?;
let meta = index_scheduler.index(&index_uid)?;
debug!("returns: {:?}", meta);
// TODO: TAMO: do this as well
@ -130,7 +130,7 @@ pub async fn get_index(
}
pub async fn update_index(
meilisearch: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
path: web::Path<String>,
body: web::Json<UpdateIndexRequest>,
req: HttpRequest,
@ -149,26 +149,27 @@ pub async fn update_index(
primary_key: body.primary_key,
};
let task = meilisearch.register_task(task).await?;
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn delete_index(
meilisearch: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, MeiliSearch>,
path: web::Path<String>,
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = path.into_inner();
let task = KindWithContent::IndexDeletion { index_uid };
let task = meilisearch.register_task(task).await?;
let task = KindWithContent::IndexDeletion {
index_uid: index_uid.into_inner(),
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get_index_stats(
meilisearch: GuardedData<ActionPolicy<{ actions::STATS_GET }>, MeiliSearch>,
path: web::Path<String>,
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@ -177,7 +178,10 @@ pub async fn get_index_stats(
json!({ "per_index_uid": true }),
Some(&req),
);
let response = meilisearch.get_index_stats(path.into_inner()).await?;
let index = index_scheduler.index(&index_uid)?;
// TODO: TAMO: Bring the index_stats in meilisearch-http
// let response = index.get_index_stats()?;
let response = todo!();
debug!("returns: {:?}", response);
Ok(HttpResponse::Ok().json(response))

View File

@ -1,12 +1,13 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index::{
MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use serde::Deserialize;
use serde_cs::vec::CS;
@ -136,8 +137,8 @@ fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
}
pub async fn search_with_url_query(
meilisearch: GuardedData<ActionPolicy<{ actions::SEARCH }>, MeiliSearch>,
path: web::Path<String>,
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<SearchQueryGet>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -145,9 +146,8 @@ pub async fn search_with_url_query(
debug!("called with params: {:?}", params);
let mut query: SearchQuery = params.into_inner().into();
let index_uid = path.into_inner();
// Tenant token search_rules.
if let Some(search_rules) = meilisearch
if let Some(search_rules) = index_scheduler
.filters()
.search_rules
.get_index_search_rules(&index_uid)
@ -157,7 +157,8 @@ pub async fn search_with_url_query(
let mut aggregate = SearchAggregator::from_query(&query, &req);
let search_result = meilisearch.search(index_uid, query).await;
let index = index_scheduler.index(&index_uid)?;
let search_result = index.perform_search(query);
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
@ -170,8 +171,8 @@ pub async fn search_with_url_query(
}
pub async fn search_with_post(
meilisearch: GuardedData<ActionPolicy<{ actions::SEARCH }>, MeiliSearch>,
path: web::Path<String>,
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Json<SearchQuery>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -179,9 +180,8 @@ pub async fn search_with_post(
let mut query = params.into_inner();
debug!("search called with params: {:?}", query);
let index_uid = path.into_inner();
// Tenant token search_rules.
if let Some(search_rules) = meilisearch
if let Some(search_rules) = index_scheduler
.filters()
.search_rules
.get_index_search_rules(&index_uid)
@ -191,7 +191,8 @@ pub async fn search_with_post(
let mut aggregate = SearchAggregator::from_query(&query, &req);
let search_result = meilisearch.search(index_uid, query).await;
let index = index_scheduler.index(&index_uid)?;
let search_result = index.perform_search(query);
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}

View File

@ -1,9 +1,9 @@
use actix_web::web::Data;
use log::debug;
use actix_web::{web, HttpRequest, HttpResponse};
use index::{Settings, Unchecked};
use index_scheduler::KindWithContent;
use meilisearch_lib::MeiliSearch;
use index_scheduler::{IndexScheduler, KindWithContent};
use meilisearch_types::error::ResponseError;
use serde_json::json;
@ -14,13 +14,13 @@ use crate::extractors::authentication::{policies::*, GuardedData};
macro_rules! make_setting_route {
($route:literal, $update_verb:ident, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
pub mod $attr {
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use log::debug;
use index::Settings;
use index_scheduler::KindWithContent;
use meilisearch_lib::milli::update::Setting;
use meilisearch_lib::MeiliSearch;
use index_scheduler::milli::update::Setting;
use index_scheduler::{IndexScheduler, KindWithContent};
use meilisearch_types::error::ResponseError;
use $crate::analytics::Analytics;
@ -28,7 +28,10 @@ macro_rules! make_setting_route {
use $crate::extractors::sequential_extractor::SeqHandler;
pub async fn delete(
meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, MeiliSearch>,
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = Settings {
@ -36,21 +39,25 @@ macro_rules! make_setting_route {
..Default::default()
};
let allow_index_creation = meilisearch.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = KindWithContent::Settings {
index_uid: index_uid.into_inner(),
new_settings,
is_deletion: true,
allow_index_creation,
};
let task = meilisearch.register_task(task).await?;
let task =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn update(
meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, MeiliSearch>,
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
body: actix_web::web::Json<Option<$type>>,
req: HttpRequest,
@ -68,24 +75,28 @@ macro_rules! make_setting_route {
..Default::default()
};
let allow_index_creation = meilisearch.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = KindWithContent::Settings {
index_uid: index_uid.into_inner(),
new_settings,
is_deletion: false,
allow_index_creation,
};
let task = meilisearch.register_task(task).await?;
let task =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get(
meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, MeiliSearch>,
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_GET }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
let index = meilisearch.get_index(index_uid.into_inner()).await?;
let index = index_scheduler.index(&index_uid)?;
let settings = index.settings()?;
debug!("returns: {:?}", settings);
@ -353,7 +364,7 @@ generate_configure!(
);
pub async fn update_all(
meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: web::Json<Settings<Unchecked>>,
req: HttpRequest,
@ -425,43 +436,43 @@ pub async fn update_all(
Some(&req),
);
let allow_index_creation = meilisearch.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = KindWithContent::Settings {
index_uid: index_uid.into_inner(),
new_settings,
is_deletion: false,
allow_index_creation,
};
let task = meilisearch.register_task(task).await?;
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get_all(
meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = meilisearch.get_index(index_uid.into_inner()).await?;
let index = index_scheduler.index(&index_uid)?;
let new_settings = index.settings()?;
debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings))
}
pub async fn delete_all(
data: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = Settings::cleared().into_unchecked();
let allow_index_creation = data.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = KindWithContent::Settings {
index_uid: index_uid.into_inner(),
new_settings,
is_deletion: true,
allow_index_creation,
};
let task = data.register_task(task).await?;
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@ -1,4 +1,6 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use serde::{Deserialize, Serialize};
@ -6,7 +8,6 @@ use serde_json::json;
use time::OffsetDateTime;
use index::{Settings, Unchecked};
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use meilisearch_types::star_or::StarOr;
@ -232,7 +233,7 @@ pub async fn running() -> HttpResponse {
}
async fn get_stats(
meilisearch: GuardedData<ActionPolicy<{ actions::STATS_GET }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@ -241,8 +242,9 @@ async fn get_stats(
json!({ "per_index_uid": false }),
Some(&req),
);
let search_rules = &meilisearch.filters().search_rules;
let response = meilisearch.get_all_stats(search_rules).await?;
let search_rules = &index_scheduler.filters().search_rules;
// let response = index_scheduler.get_all_stats(search_rules).await?;
let response = todo!();
debug!("returns: {:?}", response);
Ok(HttpResponse::Ok().json(response))
@ -257,7 +259,7 @@ struct VersionResponse {
}
async fn get_version(
_meilisearch: GuardedData<ActionPolicy<{ actions::VERSION }>, MeiliSearch>,
_index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
) -> HttpResponse {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");

View File

@ -1,7 +1,7 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::TaskId;
use index_scheduler::{IndexScheduler, TaskId};
use index_scheduler::{Kind, Status};
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::star_or::StarOr;
@ -15,7 +15,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
use super::fold_star_or;
const DEFAULT_LIMIT: fn() -> usize = || 20;
const DEFAULT_LIMIT: fn() -> u32 = || 20;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_tasks))))
@ -30,7 +30,7 @@ pub struct TasksFilterQuery {
status: Option<CS<StarOr<Status>>>,
index_uid: Option<CS<StarOr<IndexUid>>>,
#[serde(default = "DEFAULT_LIMIT")]
limit: usize,
limit: u32,
from: Option<TaskId>,
}
@ -60,7 +60,7 @@ fn task_status_matches_events(status: &TaskStatus, events: &[TaskEvent]) -> bool
}
async fn get_tasks(
meilisearch: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
params: web::Query<TasksFilterQuery>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -73,7 +73,7 @@ async fn get_tasks(
from,
} = params.into_inner();
let search_rules = &meilisearch.filters().search_rules;
let search_rules = &index_scheduler.filters().search_rules;
// We first transform a potential indexUid=* into a "not specified indexUid filter"
// for every one of the filters: type, status, and indexUid.
@ -124,14 +124,16 @@ async fn get_tasks(
}
}
filters.from = from;
// We +1 just to know if there is more after this "page" or not.
let limit = limit.saturating_add(1);
filters.limit = limit;
let mut tasks_results: Vec<_> = meilisearch.list_tasks(filters).await?.into_iter().collect();
let mut tasks_results: Vec<_> = index_scheduler.get_tasks(filters)?.into_iter().collect();
// If we were able to fetch the number +1 tasks we asked
// it means that there is more to come.
let next = if tasks_results.len() == limit {
let next = if tasks_results.len() == limit as usize {
tasks_results.pop().map(|t| t.uid)
} else {
None
@ -151,7 +153,7 @@ async fn get_tasks(
}
async fn get_task(
meilisearch: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, MeiliSearch>,
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
task_id: web::Path<TaskId>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -164,7 +166,7 @@ async fn get_task(
Some(&req),
);
let search_rules = &meilisearch.filters().search_rules;
let search_rules = &index_scheduler.filters().search_rules;
let mut filters = index_scheduler::Query::default();
if !search_rules.is_index_authorized("*") {
for (index, _policy) in search_rules.clone() {
@ -174,7 +176,7 @@ async fn get_task(
filters.uid = Some(vec![task_id]);
if let Some(task) = meilisearch.list_tasks(filters).await?.first() {
if let Some(task) = index_scheduler.get_tasks(filters)?.first() {
Ok(HttpResponse::Ok().json(task))
} else {
Err(index_scheduler::Error::TaskNotFound(task_id).into())

View File

@ -1,19 +0,0 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 6f3ae3cba934ba3e328e2306218c32f27a46ce2d54a1258b05fef65663208662 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentAddition { content_uuid: 37bc137d-2038-47f0-819f-b133233daadc, merge_strategy: ReplaceDocuments, primary_key: None, documents_count: 0 }, events: [] }
cc b726f7d9f44a9216aad302ddba0f04e7108817e741d656a4759aea8562de4d63 # shrinks to task = Task { id: 0, index_uid: IndexUid("_"), content: IndexDeletion, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
cc 427ec2dde3260b1ab334207bdc22adef28a5b8532b9902c84b55fd2c017ea7e1 # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = false, any_int = 0
cc c24f3d42f0f36fbdbf4e9d4327e75529b163ac580d63a5934ca05e9b5bd23a65 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = true, any_int = 0
cc 8084e2410801b997533b0bcbad75cd212873cfc2677f26847f68c568ead1604c # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: false }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
cc 330085e0200a9a2ddfdd764a03d768aa95c431bcaafbd530c8c949425beed18b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0
cc c70e901576ef2fb9622e814bdecd11e4747cd70d71a9a6ce771b5b7256a187c0 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: true }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
cc 3fe2c38cbc2cca34ecde321472141d386056f0cd332cbf700773657715a382b5 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
cc c31cf86692968483f1ab08a6a9d4667ccb9635c306998551bf1eb1f135ef0d4b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: Some("") }, events: [] }, index_exists = true, index_op_fails = false, any_int = 0
cc 3a01c78db082434b8a4f8914abf0d1059d39f4426d16df20d72e1bd7ebb94a6a # shrinks to task = Task { id: 0, index_uid: IndexUid("0"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = true, index_op_fails = true, any_int = 0
cc c450806df3921d1e6fe9b6af93d999e8196d0175b69b64f1810802582421e94a # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
cc fb6b98947cbdbdee05ed3c0bf2923aad2c311edc276253642eb43a0c0ec4888a # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0
cc 1aa59d8e22484e9915efbb5818e1e1ab684aa61b166dc82130d6221663ba00bf # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentDeletion(Clear), events: [] }, index_exists = true, index_op_fails = false, any_int = 0

View File

@ -1,7 +0,0 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 8cbd6c45ce8c5611ec3f2f94fd485f6a8eeccc470fa426e59bdfd4d9e7fce0e1 # shrinks to bytes = []

View File

@ -1,8 +0,0 @@
use std::{fs, path::Path};
/// Copy the `instance-uid` contained in one db to another. Ignore all errors.
pub fn copy_user_id(src: &Path, dst: &Path) {
if let Ok(user_id) = fs::read_to_string(src.join("instance-uid")) {
let _ = fs::write(dst.join("instance-uid"), &user_id);
}
}

View File

@ -1,26 +0,0 @@
use std::fs::{create_dir_all, File};
use std::io::Write;
use std::path::Path;
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use tar::{Archive, Builder};
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let mut f = File::create(dest)?;
let gz_encoder = GzEncoder::new(&mut f, Compression::default());
let mut tar_encoder = Builder::new(gz_encoder);
tar_encoder.append_dir_all(".", src)?;
let gz_encoder = tar_encoder.into_inner()?;
gz_encoder.finish()?;
f.flush()?;
Ok(())
}
pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let f = File::open(&src)?;
let gz = GzDecoder::new(f);
let mut ar = Archive::new(gz);
create_dir_all(&dest)?;
ar.unpack(&dest)?;
Ok(())
}

View File

@ -1,17 +0,0 @@
pub mod v2;
pub mod v3;
pub mod v4;
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(')'))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(')'))
.map(|(field, _)| field)
}

View File

@ -1,205 +0,0 @@
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use milli::update::IndexDocumentsMethod;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use super::v4::{Task, TaskContent, TaskEvent};
use crate::index::{Settings, Unchecked};
use crate::tasks::task::{DocumentDeletion, TaskId, TaskResult};
use super::v2;
#[derive(Serialize, Deserialize)]
pub struct DumpEntry {
pub uuid: Uuid,
pub uid: String,
}
#[derive(Serialize, Deserialize)]
pub struct UpdateEntry {
pub uuid: Uuid,
pub update: UpdateStatus,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Failed(Failed),
}
impl From<v2::UpdateResult> for TaskResult {
fn from(other: v2::UpdateResult) -> Self {
match other {
v2::UpdateResult::DocumentsAddition(result) => TaskResult::DocumentAddition {
indexed_documents: result.nb_documents as u64,
},
v2::UpdateResult::DocumentDeletion { deleted } => TaskResult::DocumentDeletion {
deleted_documents: deleted,
},
v2::UpdateResult::Other => TaskResult::Other,
}
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Update {
DeleteDocuments(Vec<String>),
DocumentAddition {
primary_key: Option<String>,
method: IndexDocumentsMethod,
content_uuid: Uuid,
},
Settings(Settings<Unchecked>),
ClearDocuments,
}
impl From<Update> for super::v4::TaskContent {
fn from(update: Update) -> Self {
match update {
Update::DeleteDocuments(ids) => {
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids))
}
Update::DocumentAddition {
primary_key,
method,
..
} => TaskContent::DocumentAddition {
content_uuid: Uuid::default(),
merge_strategy: method,
primary_key,
// document count is unknown for legacy updates
documents_count: 0,
allow_index_creation: true,
},
Update::Settings(settings) => TaskContent::SettingsUpdate {
settings,
// There is no way to know now, so we assume it isn't
is_deletion: false,
allow_index_creation: true,
},
Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear),
}
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments {
ids: Vec<String>,
},
Settings(Settings<Unchecked>),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: Update,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
}
impl Enqueued {
fn update_task(self, task: &mut Task) {
// we do not erase the `TaskId` that was given to us.
task.content = self.meta.into();
task.events.push(TaskEvent::Created(self.enqueued_at));
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed {
pub success: v2::UpdateResult,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
#[serde(flatten)]
pub from: Processing,
}
impl Processed {
fn update_task(self, task: &mut Task) {
self.from.update_task(task);
let event = TaskEvent::Succeded {
result: TaskResult::from(self.success),
timestamp: self.processed_at,
};
task.events.push(event);
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued,
#[serde(with = "time::serde::rfc3339")]
pub started_processing_at: OffsetDateTime,
}
impl Processing {
fn update_task(self, task: &mut Task) {
self.from.update_task(task);
let event = TaskEvent::Processing(self.started_processing_at);
task.events.push(event);
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Failed {
#[serde(flatten)]
pub from: Processing,
pub msg: String,
pub code: Code,
#[serde(with = "time::serde::rfc3339")]
pub failed_at: OffsetDateTime,
}
impl Failed {
fn update_task(self, task: &mut Task) {
self.from.update_task(task);
let event = TaskEvent::Failed {
error: ResponseError::from_msg(self.msg, self.code),
timestamp: self.failed_at,
};
task.events.push(event);
}
}
impl From<(UpdateStatus, String, TaskId)> for Task {
fn from((update, uid, task_id): (UpdateStatus, String, TaskId)) -> Self {
// Dummy task
let mut task = super::v4::Task {
id: task_id,
index_uid: IndexUid::new_unchecked(uid),
content: super::v4::TaskContent::IndexDeletion,
events: Vec::new(),
};
match update {
UpdateStatus::Processing(u) => u.update_task(&mut task),
UpdateStatus::Enqueued(u) => u.update_task(&mut task),
UpdateStatus::Processed(u) => u.update_task(&mut task),
UpdateStatus::Failed(u) => u.update_task(&mut task),
}
task
}
}

View File

@ -1,42 +0,0 @@
use meilisearch_auth::error::AuthControllerError;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::internal_error;
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
pub type Result<T> = std::result::Result<T, DumpError>;
#[derive(thiserror::Error, Debug)]
pub enum DumpError {
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
IndexResolver(Box<IndexResolverError>),
}
internal_error!(
DumpError: milli::heed::Error,
std::io::Error,
tokio::task::JoinError,
tokio::sync::oneshot::error::RecvError,
serde_json::error::Error,
tempfile::PersistError,
fs_extra::error::Error,
AuthControllerError,
TaskError
);
impl From<IndexResolverError> for DumpError {
fn from(e: IndexResolverError) -> Self {
Self::IndexResolver(Box::new(e))
}
}
impl ErrorCode for DumpError {
fn error_code(&self) -> Code {
match self {
DumpError::Internal(_) => Code::Internal,
DumpError::IndexResolver(e) => e.error_code(),
}
}
}

View File

@ -1,188 +0,0 @@
#[cfg(not(test))]
pub use real::DumpHandler;
#[cfg(test)]
pub use test::MockDumpHandler as DumpHandler;
use time::{macros::format_description, OffsetDateTime};
/// Generate uid from creation date
pub fn generate_uid() -> String {
OffsetDateTime::now_utc()
.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
))
.unwrap()
}
mod real {
use std::path::PathBuf;
use std::sync::Arc;
use log::{info, trace};
use meilisearch_auth::AuthController;
use milli::heed::Env;
use tokio::fs::create_dir_all;
use tokio::io::AsyncWriteExt;
use crate::analytics;
use crate::compression::to_tar_gz;
use crate::dump::error::{DumpError, Result};
use crate::dump::{MetadataVersion, META_FILE_NAME};
use crate::index_resolver::{
index_store::IndexStore, meta_store::IndexMetaStore, IndexResolver,
};
use crate::tasks::TaskStore;
use crate::update_file_store::UpdateFileStore;
pub struct DumpHandler<U, I> {
dump_path: PathBuf,
db_path: PathBuf,
update_file_store: UpdateFileStore,
task_store_size: usize,
index_db_size: usize,
env: Arc<Env>,
index_resolver: Arc<IndexResolver<U, I>>,
}
impl<U, I> DumpHandler<U, I>
where
U: IndexMetaStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
pub fn new(
dump_path: PathBuf,
db_path: PathBuf,
update_file_store: UpdateFileStore,
task_store_size: usize,
index_db_size: usize,
env: Arc<Env>,
index_resolver: Arc<IndexResolver<U, I>>,
) -> Self {
Self {
dump_path,
db_path,
update_file_store,
task_store_size,
index_db_size,
env,
index_resolver,
}
}
pub async fn run(&self, uid: String) -> Result<()> {
trace!("Performing dump.");
create_dir_all(&self.dump_path).await?;
let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
let temp_dump_path = temp_dump_dir.path().to_owned();
let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size);
let meta_path = temp_dump_path.join(META_FILE_NAME);
let meta_bytes = serde_json::to_vec(&meta)?;
let mut meta_file = tokio::fs::File::create(&meta_path).await?;
meta_file.write_all(&meta_bytes).await?;
analytics::copy_user_id(&self.db_path, &temp_dump_path);
create_dir_all(&temp_dump_path.join("indexes")).await?;
let db_path = self.db_path.clone();
let temp_dump_path_clone = temp_dump_path.clone();
tokio::task::spawn_blocking(move || -> Result<()> {
AuthController::dump(db_path, temp_dump_path_clone)?;
Ok(())
})
.await??;
TaskStore::dump(
self.env.clone(),
&temp_dump_path,
self.update_file_store.clone(),
)
.await?;
self.index_resolver.dump(&temp_dump_path).await?;
let dump_path = self.dump_path.clone();
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
// for now we simply copy the updates/updates_files
// FIXME: We may copy more files than necessary, if new files are added while we are
// performing the dump. We need a way to filter them out.
let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?;
to_tar_gz(temp_dump_path, temp_dump_file.path())
.map_err(|e| DumpError::Internal(e.into()))?;
let dump_path = dump_path.join(uid).with_extension("dump");
temp_dump_file.persist(&dump_path)?;
Ok(dump_path)
})
.await??;
info!("Created dump in {:?}.", dump_path);
Ok(())
}
}
}
#[cfg(test)]
mod test {
use std::path::PathBuf;
use std::sync::Arc;
use milli::heed::Env;
use nelson::Mocker;
use crate::dump::error::Result;
use crate::index_resolver::IndexResolver;
use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore};
use crate::update_file_store::UpdateFileStore;
use super::*;
pub enum MockDumpHandler<U, I> {
Real(super::real::DumpHandler<U, I>),
Mock(Mocker),
}
impl<U, I> MockDumpHandler<U, I> {
pub fn mock(mocker: Mocker) -> Self {
Self::Mock(mocker)
}
}
impl<U, I> MockDumpHandler<U, I>
where
U: IndexMetaStore + Sync + Send + 'static,
I: IndexStore + Sync + Send + 'static,
{
pub fn new(
dump_path: PathBuf,
db_path: PathBuf,
update_file_store: UpdateFileStore,
task_store_size: usize,
index_db_size: usize,
env: Arc<Env>,
index_resolver: Arc<IndexResolver<U, I>>,
) -> Self {
Self::Real(super::real::DumpHandler::new(
dump_path,
db_path,
update_file_store,
task_store_size,
index_db_size,
env,
index_resolver,
))
}
pub async fn run(&self, uid: String) -> Result<()> {
match self {
DumpHandler::Real(real) => real.run(uid).await,
DumpHandler::Mock(mocker) => unsafe { mocker.get("run").call(uid) },
}
}
}
}

View File

@ -1,4 +0,0 @@
pub mod v2;
pub mod v3;
pub mod v4;
pub mod v5;

View File

@ -1,216 +0,0 @@
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::{Path, PathBuf};
use serde_json::{Deserializer, Value};
use tempfile::NamedTempFile;
use crate::dump::compat::{self, v2, v3};
use crate::dump::Metadata;
use crate::options::IndexerOpts;
/// The dump v2 reads the dump folder and patches all the needed file to make it compatible with a
/// dump v3, then calls the dump v3 to actually handle the dump.
pub fn load_dump(
meta: Metadata,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
update_db_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
log::info!("Patching dump V2 to dump V3...");
let indexes_path = src.as_ref().join("indexes");
let dir_entries = std::fs::read_dir(indexes_path)?;
for entry in dir_entries {
let entry = entry?;
// rename the index folder
let path = entry.path();
let new_path = patch_index_uuid_path(&path).expect("invalid index folder.");
std::fs::rename(path, &new_path)?;
let settings_path = new_path.join("meta.json");
patch_settings(settings_path)?;
}
let update_dir = src.as_ref().join("updates");
let update_path = update_dir.join("data.jsonl");
patch_updates(update_dir, update_path)?;
super::v3::load_dump(
meta,
src,
dst,
index_db_size,
update_db_size,
indexing_options,
)
}
fn patch_index_uuid_path(path: &Path) -> Option<PathBuf> {
let uuid = path.file_name()?.to_str()?.trim_start_matches("index-");
let new_path = path.parent()?.join(uuid);
Some(new_path)
}
fn patch_settings(path: impl AsRef<Path>) -> anyhow::Result<()> {
let mut meta_file = File::open(&path)?;
let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
// We first deserialize the dump meta into a serde_json::Value and change
// the custom ranking rules settings from the old format to the new format.
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
patch_custom_ranking_rules(ranking_rules);
}
let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?;
serde_json::to_writer(&mut meta_file, &meta)?;
Ok(())
}
fn patch_updates(dir: impl AsRef<Path>, path: impl AsRef<Path>) -> anyhow::Result<()> {
let mut output_update_file = NamedTempFile::new_in(&dir)?;
let update_file = File::open(&path)?;
let stream = Deserializer::from_reader(update_file).into_iter::<v2::UpdateEntry>();
for update in stream {
let update_entry = update?;
let update_entry = v3::UpdateEntry::from(update_entry);
serde_json::to_writer(&mut output_update_file, &update_entry)?;
output_update_file.write_all(b"\n")?;
}
output_update_file.flush()?;
output_update_file.persist(path)?;
Ok(())
}
/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
///
/// This is done for compatibility reasons, and to avoid a new dump version,
/// since the new syntax was introduced soon after the new dump version.
fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
*ranking_rules = match ranking_rules.take() {
Value::Array(values) => values
.into_iter()
.filter_map(|value| match value {
Value::String(s) if s.starts_with("asc") => compat::asc_ranking_rule(&s)
.map(|f| format!("{}:asc", f))
.map(Value::String),
Value::String(s) if s.starts_with("desc") => compat::desc_ranking_rule(&s)
.map(|f| format!("{}:desc", f))
.map(Value::String),
otherwise => Some(otherwise),
})
.collect(),
otherwise => otherwise,
}
}
impl From<v2::UpdateEntry> for v3::UpdateEntry {
fn from(v2::UpdateEntry { uuid, update }: v2::UpdateEntry) -> Self {
let update = match update {
v2::UpdateStatus::Processing(meta) => v3::UpdateStatus::Processing(meta.into()),
v2::UpdateStatus::Enqueued(meta) => v3::UpdateStatus::Enqueued(meta.into()),
v2::UpdateStatus::Processed(meta) => v3::UpdateStatus::Processed(meta.into()),
v2::UpdateStatus::Aborted(_) => unreachable!("Updates could never be aborted."),
v2::UpdateStatus::Failed(meta) => v3::UpdateStatus::Failed(meta.into()),
};
Self { uuid, update }
}
}
impl From<v2::Failed> for v3::Failed {
fn from(other: v2::Failed) -> Self {
let v2::Failed {
from,
error,
failed_at,
} = other;
Self {
from: from.into(),
msg: error.message,
code: v2::error_code_from_str(&error.error_code)
.expect("Invalid update: Invalid error code"),
failed_at,
}
}
}
impl From<v2::Processing> for v3::Processing {
fn from(other: v2::Processing) -> Self {
let v2::Processing {
from,
started_processing_at,
} = other;
Self {
from: from.into(),
started_processing_at,
}
}
}
impl From<v2::Enqueued> for v3::Enqueued {
fn from(other: v2::Enqueued) -> Self {
let v2::Enqueued {
update_id,
meta,
enqueued_at,
content,
} = other;
let meta = match meta {
v2::UpdateMeta::DocumentsAddition {
method,
primary_key,
..
} => {
v3::Update::DocumentAddition {
primary_key,
method,
// Just ignore if the uuid is no present. If it is needed later, an error will
// be thrown.
content_uuid: content.unwrap_or_default(),
}
}
v2::UpdateMeta::ClearDocuments => v3::Update::ClearDocuments,
v2::UpdateMeta::DeleteDocuments { ids } => v3::Update::DeleteDocuments(ids),
v2::UpdateMeta::Settings(settings) => v3::Update::Settings(settings),
};
Self {
update_id,
meta,
enqueued_at,
}
}
}
impl From<v2::Processed> for v3::Processed {
fn from(other: v2::Processed) -> Self {
let v2::Processed {
from,
success,
processed_at,
} = other;
Self {
success,
processed_at,
from: from.into(),
}
}
}

View File

@ -1,136 +0,0 @@
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{BufReader, BufWriter, Write};
use std::path::Path;
use anyhow::Context;
use fs_extra::dir::{self, CopyOptions};
use log::info;
use tempfile::tempdir;
use uuid::Uuid;
use crate::dump::compat::{self, v3};
use crate::dump::Metadata;
use crate::index_resolver::meta_store::{DumpEntry, IndexMeta};
use crate::options::IndexerOpts;
use crate::tasks::task::TaskId;
/// dump structure for V3:
/// .
/// ├── indexes
/// │   └── 25f10bb8-6ea8-42f0-bd48-ad5857f77648
/// │   ├── documents.jsonl
/// │   └── meta.json
/// ├── index_uuids
/// │   └── data.jsonl
/// ├── metadata.json
/// └── updates
/// └── data.jsonl
pub fn load_dump(
meta: Metadata,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
meta_env_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
info!("Patching dump V3 to dump V4...");
let patched_dir = tempdir()?;
let options = CopyOptions::default();
dir::copy(src.as_ref().join("indexes"), patched_dir.path(), &options)?;
dir::copy(
src.as_ref().join("index_uuids"),
patched_dir.path(),
&options,
)?;
let uuid_map = patch_index_meta(
src.as_ref().join("index_uuids/data.jsonl"),
patched_dir.path(),
)?;
fs::copy(
src.as_ref().join("metadata.json"),
patched_dir.path().join("metadata.json"),
)?;
patch_updates(&src, patched_dir.path(), uuid_map)?;
super::v4::load_dump(
meta,
patched_dir.path(),
dst,
index_db_size,
meta_env_size,
indexing_options,
)
}
fn patch_index_meta(
path: impl AsRef<Path>,
dst: impl AsRef<Path>,
) -> anyhow::Result<HashMap<Uuid, String>> {
let file = BufReader::new(File::open(path)?);
let dst = dst.as_ref().join("index_uuids");
fs::create_dir_all(&dst)?;
let mut dst_file = File::create(dst.join("data.jsonl"))?;
let map = serde_json::Deserializer::from_reader(file)
.into_iter::<v3::DumpEntry>()
.try_fold(HashMap::new(), |mut map, entry| -> anyhow::Result<_> {
let entry = entry?;
map.insert(entry.uuid, entry.uid.clone());
let meta = IndexMeta {
uuid: entry.uuid,
// This is lost information, we patch it to 0;
creation_task_id: 0,
};
let entry = DumpEntry {
uid: entry.uid,
index_meta: meta,
};
serde_json::to_writer(&mut dst_file, &entry)?;
dst_file.write_all(b"\n")?;
Ok(map)
})?;
dst_file.flush()?;
Ok(map)
}
fn patch_updates(
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
uuid_map: HashMap<Uuid, String>,
) -> anyhow::Result<()> {
let dst = dst.as_ref().join("updates");
fs::create_dir_all(&dst)?;
let mut dst_file = BufWriter::new(File::create(dst.join("data.jsonl"))?);
let src_file = BufReader::new(File::open(src.as_ref().join("updates/data.jsonl"))?);
serde_json::Deserializer::from_reader(src_file)
.into_iter::<v3::UpdateEntry>()
.enumerate()
.try_for_each(|(task_id, entry)| -> anyhow::Result<()> {
let entry = entry?;
let name = uuid_map
.get(&entry.uuid)
.with_context(|| format!("Unknown index uuid: {}", entry.uuid))?
.clone();
serde_json::to_writer(
&mut dst_file,
&compat::v4::Task::from((entry.update, name, task_id as TaskId)),
)?;
dst_file.write_all(b"\n")?;
Ok(())
})?;
dst_file.flush()?;
Ok(())
}

View File

@ -1,47 +0,0 @@
use std::{path::Path, sync::Arc};
use log::info;
use meilisearch_auth::AuthController;
use milli::heed::EnvOpenOptions;
use crate::analytics;
use crate::dump::Metadata;
use crate::index_resolver::IndexResolver;
use crate::options::IndexerOpts;
use crate::tasks::TaskStore;
use crate::update_file_store::UpdateFileStore;
pub fn load_dump(
meta: Metadata,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
meta_env_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
info!(
"Loading dump from {}, dump database version: {}, dump version: V5",
meta.dump_date, meta.db_version
);
let mut options = EnvOpenOptions::new();
options.map_size(meta_env_size);
options.max_dbs(100);
let env = Arc::new(options.open(&dst)?);
IndexResolver::load_dump(
src.as_ref(),
&dst,
index_db_size,
env.clone(),
indexing_options,
)?;
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
TaskStore::load_dump(&src, env)?;
AuthController::load_dump(&src, &dst)?;
analytics::copy_user_id(src.as_ref(), dst.as_ref());
info!("Loading indexes.");
Ok(())
}

View File

@ -1,262 +0,0 @@
use std::fs::File;
use std::path::Path;
use anyhow::bail;
use log::info;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tempfile::TempDir;
use crate::compression::from_tar_gz;
use crate::options::IndexerOpts;
use self::loaders::{v2, v3, v4, v5};
pub use handler::{generate_uid, DumpHandler};
mod compat;
pub mod error;
mod handler;
mod loaders;
const META_FILE_NAME: &str = "metadata.json";
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
db_version: String,
index_db_size: usize,
update_db_size: usize,
#[serde(with = "time::serde::rfc3339")]
dump_date: OffsetDateTime,
}
impl Metadata {
pub fn new(index_db_size: usize, update_db_size: usize) -> Self {
Self {
db_version: env!("CARGO_PKG_VERSION").to_string(),
index_db_size,
update_db_size,
dump_date: OffsetDateTime::now_utc(),
}
}
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct MetadataV1 {
pub db_version: String,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "dumpVersion")]
pub enum MetadataVersion {
V1(MetadataV1),
V2(Metadata),
V3(Metadata),
V4(Metadata),
// V5 is forward compatible with V4 but not backward compatible.
V5(Metadata),
}
impl MetadataVersion {
pub fn load_dump(
self,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
index_db_size: usize,
meta_env_size: usize,
indexing_options: &IndexerOpts,
) -> anyhow::Result<()> {
match self {
MetadataVersion::V1(_meta) => {
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
}
MetadataVersion::V2(meta) => v2::load_dump(
meta,
src,
dst,
index_db_size,
meta_env_size,
indexing_options,
)?,
MetadataVersion::V3(meta) => v3::load_dump(
meta,
src,
dst,
index_db_size,
meta_env_size,
indexing_options,
)?,
MetadataVersion::V4(meta) => v4::load_dump(
meta,
src,
dst,
index_db_size,
meta_env_size,
indexing_options,
)?,
MetadataVersion::V5(meta) => v5::load_dump(
meta,
src,
dst,
index_db_size,
meta_env_size,
indexing_options,
)?,
}
Ok(())
}
pub fn new_v5(index_db_size: usize, update_db_size: usize) -> Self {
let meta = Metadata::new(index_db_size, update_db_size);
Self::V5(meta)
}
pub fn db_version(&self) -> &str {
match self {
Self::V1(meta) => &meta.db_version,
Self::V2(meta) | Self::V3(meta) | Self::V4(meta) | Self::V5(meta) => &meta.db_version,
}
}
pub fn version(&self) -> &'static str {
match self {
MetadataVersion::V1(_) => "V1",
MetadataVersion::V2(_) => "V2",
MetadataVersion::V3(_) => "V3",
MetadataVersion::V4(_) => "V4",
MetadataVersion::V5(_) => "V5",
}
}
pub fn dump_date(&self) -> Option<&OffsetDateTime> {
match self {
MetadataVersion::V1(_) => None,
MetadataVersion::V2(meta)
| MetadataVersion::V3(meta)
| MetadataVersion::V4(meta)
| MetadataVersion::V5(meta) => Some(&meta.dump_date),
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[serde(rename_all = "snake_case")]
pub enum DumpStatus {
Done,
InProgress,
Failed,
}
pub fn load_dump(
dst_path: impl AsRef<Path>,
src_path: impl AsRef<Path>,
ignore_dump_if_db_exists: bool,
ignore_missing_dump: bool,
index_db_size: usize,
update_db_size: usize,
indexer_opts: &IndexerOpts,
) -> anyhow::Result<()> {
let empty_db = crate::is_empty_db(&dst_path);
let src_path_exists = src_path.as_ref().exists();
if empty_db && src_path_exists {
let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?;
meta.load_dump(
tmp_src.path(),
tmp_dst.path(),
index_db_size,
update_db_size,
indexer_opts,
)?;
persist_dump(&dst_path, tmp_dst)?;
Ok(())
} else if !empty_db && !ignore_dump_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
dst_path
.as_ref()
.canonicalize()
.unwrap_or_else(|_| dst_path.as_ref().to_owned())
)
} else if !src_path_exists && !ignore_missing_dump {
bail!("dump doesn't exist at {:?}", src_path.as_ref())
} else {
// there is nothing to do
Ok(())
}
}
fn extract_dump(
dst_path: impl AsRef<Path>,
src_path: impl AsRef<Path>,
) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> {
// Setup a temp directory path in the same path as the database, to prevent cross devices
// references.
let temp_path = dst_path
.as_ref()
.parent()
.map(ToOwned::to_owned)
.unwrap_or_else(|| ".".into());
let tmp_src = tempfile::tempdir_in(temp_path)?;
let tmp_src_path = tmp_src.path();
from_tar_gz(&src_path, tmp_src_path)?;
let meta_path = tmp_src_path.join(META_FILE_NAME);
let mut meta_file = File::open(&meta_path)?;
let meta: MetadataVersion = serde_json::from_reader(&mut meta_file)?;
if !dst_path.as_ref().exists() {
std::fs::create_dir_all(dst_path.as_ref())?;
}
let tmp_dst = tempfile::tempdir_in(dst_path.as_ref())?;
info!(
"Loading dump {}, dump database version: {}, dump version: {}",
meta.dump_date()
.map(|t| format!("from {}", t))
.unwrap_or_else(String::new),
meta.db_version(),
meta.version()
);
Ok((tmp_src, tmp_dst, meta))
}
fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<()> {
let persisted_dump = tmp_dst.into_path();
// Delete everything in the `data.ms` except the tempdir.
if dst_path.as_ref().exists() {
for file in dst_path.as_ref().read_dir().unwrap() {
let file = file.unwrap().path();
if file.file_name() == persisted_dump.file_name() {
continue;
}
if file.is_file() {
std::fs::remove_file(&file)?;
} else {
std::fs::remove_dir_all(&file)?;
}
}
}
// Move the whole content of the tempdir into the `data.ms`.
for file in persisted_dump.read_dir().unwrap() {
let file = file.unwrap().path();
std::fs::rename(&file, &dst_path.as_ref().join(file.file_name().unwrap()))?;
}
// Delete the empty tempdir.
std::fs::remove_dir_all(&persisted_dump)?;
Ok(())
}

View File

@ -1,55 +0,0 @@
use std::error::Error;
use std::fmt;
use meilisearch_types::error::{Code, ErrorCode};
use milli::UserError;
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);
impl Error for MilliError<'_> {}
impl fmt::Display for MilliError<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl ErrorCode for MilliError<'_> {
fn error_code(&self) -> Code {
match self.0 {
milli::Error::InternalError(_) => Code::Internal,
milli::Error::IoError(_) => Code::Internal,
milli::Error::UserError(ref error) => {
match error {
// TODO: wait for spec for new error codes.
UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions
| UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStore,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::Filter,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId
}
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent,
UserError::SortRankingRuleMissing => Code::Sort,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
UserError::CriterionError(_) => Code::InvalidRankingRule,
UserError::InvalidGeoField { .. } => Code::InvalidGeoField,
UserError::SortError(_) => Code::Sort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidMinWordLengthForTypo
}
}
}
}
}
}

View File

@ -1,66 +0,0 @@
use std::error::Error;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::index_uid::IndexUidFormatError;
use meilisearch_types::internal_error;
use tokio::task::JoinError;
use super::DocumentAdditionFormat;
use crate::document_formats::DocumentFormatError;
// use crate::dump::error::DumpError;
use index::error::IndexError;
pub type Result<T> = std::result::Result<T, IndexControllerError>;
#[derive(Debug, thiserror::Error)]
pub enum IndexControllerError {
#[error("Index creation must have an uid")]
MissingUid,
#[error(transparent)]
IndexResolver(#[from] index_scheduler::Error),
#[error(transparent)]
IndexError(#[from] IndexError),
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn Error + Send + Sync + 'static>),
// #[error("{0}")]
// DumpError(#[from] DumpError),
#[error(transparent)]
DocumentFormatError(#[from] DocumentFormatError),
#[error("A {0} payload is missing.")]
MissingPayload(DocumentAdditionFormat),
#[error("The provided payload reached the size limit.")]
PayloadTooLarge,
}
internal_error!(IndexControllerError: JoinError, file_store::Error);
impl From<actix_web::error::PayloadError> for IndexControllerError {
fn from(other: actix_web::error::PayloadError) -> Self {
match other {
actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge,
_ => Self::Internal(Box::new(other)),
}
}
}
impl ErrorCode for IndexControllerError {
fn error_code(&self) -> Code {
match self {
IndexControllerError::MissingUid => Code::BadRequest,
IndexControllerError::Internal(_) => Code::Internal,
IndexControllerError::DocumentFormatError(e) => e.error_code(),
IndexControllerError::MissingPayload(_) => Code::MissingPayload,
IndexControllerError::PayloadTooLarge => Code::PayloadTooLarge,
IndexControllerError::IndexResolver(e) => e.error_code(),
IndexControllerError::IndexError(e) => e.error_code(),
}
}
}
/*
impl From<IndexUidFormatError> for IndexControllerError {
fn from(err: IndexUidFormatError) -> Self {
index_scheduler::Error::from(err).into()
}
}
*/

View File

@ -1,79 +0,0 @@
use std::error::Error;
use std::fmt;
use meilisearch_types::{internal_error, Code, ErrorCode};
use crate::{
document_formats::DocumentFormatError,
index::error::IndexError,
index_controller::{update_file_store::UpdateFileStoreError, DocumentAdditionFormat},
};
pub type Result<T> = std::result::Result<T, UpdateLoopError>;
#[derive(Debug, thiserror::Error)]
#[allow(clippy::large_enum_variant)]
pub enum UpdateLoopError {
#[error("Task `{0}` not found.")]
UnexistingUpdate(u64),
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn Error + Send + Sync + 'static>),
#[error(
"update store was shut down due to a fatal error, please check your logs for more info."
)]
FatalUpdateStoreError,
#[error("{0}")]
DocumentFormatError(#[from] DocumentFormatError),
#[error("The provided payload reached the size limit.")]
PayloadTooLarge,
#[error("A {0} payload is missing.")]
MissingPayload(DocumentAdditionFormat),
#[error("{0}")]
IndexError(#[from] IndexError),
}
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for UpdateLoopError
where
T: Sync + Send + 'static + fmt::Debug,
{
fn from(other: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<tokio::sync::oneshot::error::RecvError> for UpdateLoopError {
fn from(other: tokio::sync::oneshot::error::RecvError) -> Self {
Self::Internal(Box::new(other))
}
}
impl From<actix_web::error::PayloadError> for UpdateLoopError {
fn from(other: actix_web::error::PayloadError) -> Self {
match other {
actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge,
_ => Self::Internal(Box::new(other)),
}
}
}
internal_error!(
UpdateLoopError: heed::Error,
std::io::Error,
serde_json::Error,
tokio::task::JoinError,
UpdateFileStoreError
);
impl ErrorCode for UpdateLoopError {
fn error_code(&self) -> Code {
match self {
Self::UnexistingUpdate(_) => Code::TaskNotFound,
Self::Internal(_) => Code::Internal,
Self::FatalUpdateStoreError => Code::Internal,
Self::DocumentFormatError(error) => error.error_code(),
Self::PayloadTooLarge => Code::PayloadTooLarge,
Self::MissingPayload(_) => Code::MissingPayload,
Self::IndexError(e) => e.error_code(),
}
}
}

View File

@ -1,19 +0,0 @@
#[derive(thiserror::Error, Debug)]
pub enum VersionFileError {
#[error(
"Meilisearch (v{}) failed to infer the version of the database. Please consider using a dump to load your data.",
env!("CARGO_PKG_VERSION").to_string()
)]
MissingVersionFile,
#[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")]
MalformedVersionFile,
#[error(
"Expected Meilisearch engine version: {major}.{minor}.{patch}, current engine version: {}. To update Meilisearch use a dump.",
env!("CARGO_PKG_VERSION").to_string()
)]
VersionMismatch {
major: String,
minor: String,
patch: String,
},
}

View File

@ -1,56 +0,0 @@
use std::fs;
use std::io::ErrorKind;
use std::path::Path;
use self::error::VersionFileError;
mod error;
pub const VERSION_FILE_NAME: &str = "VERSION";
static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
// Persists the version of the current Meilisearch binary to a VERSION file
pub fn create_version_file(db_path: &Path) -> anyhow::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);
fs::write(
version_path,
format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH),
)?;
Ok(())
}
// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch.
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);
match fs::read_to_string(&version_path) {
Ok(version) => {
let version_components = version.split('.').collect::<Vec<_>>();
let (major, minor, patch) = match &version_components[..] {
[major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()),
_ => return Err(VersionFileError::MalformedVersionFile.into()),
};
if major != VERSION_MAJOR || minor != VERSION_MINOR {
return Err(VersionFileError::VersionMismatch {
major,
minor,
patch,
}
.into());
}
}
Err(error) => {
return match error.kind() {
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()),
_ => Err(error.into()),
}
}
}
Ok(())
}

View File

@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
actix-web = { version = "4.2.1", default-features = false }
tokio = "1.0"
proptest = { version = "1.0.0", optional = true }
proptest-derive = { version = "0.3.0", optional = true }
serde = { version = "1.0.145", features = ["derive"] }

View File

@ -1,6 +1,7 @@
use std::fmt;
use actix_web::{self as aweb, http::StatusCode, HttpResponseBuilder};
use aweb::rt::task::JoinError;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
@ -344,6 +345,12 @@ impl ErrCode {
}
}
impl ErrorCode for JoinError {
fn error_code(&self) -> Code {
Code::Internal
}
}
#[cfg(feature = "test-traits")]
mod strategy {
use proptest::strategy::Strategy;