2021-07-27 16:01:06 +02:00
|
|
|
#![allow(rustdoc::private_intra_doc_links)]
|
2021-06-15 17:55:27 +02:00
|
|
|
#[macro_use]
|
2020-12-12 13:32:06 +01:00
|
|
|
pub mod error;
|
2021-12-02 16:03:26 +01:00
|
|
|
pub mod analytics;
|
2021-06-24 14:22:12 +02:00
|
|
|
#[macro_use]
|
2021-06-23 14:56:02 +02:00
|
|
|
pub mod extractors;
|
2023-02-21 18:18:47 +01:00
|
|
|
pub mod metrics;
|
|
|
|
pub mod middleware;
|
2021-03-15 18:11:10 +01:00
|
|
|
pub mod option;
|
|
|
|
pub mod routes;
|
2022-10-11 17:42:43 +02:00
|
|
|
pub mod search;
|
2024-03-26 15:56:43 +01:00
|
|
|
pub mod search_queue;
|
2021-12-02 16:03:26 +01:00
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
use std::fs::File;
|
|
|
|
use std::io::{BufReader, BufWriter};
|
2024-03-26 17:28:03 +01:00
|
|
|
use std::num::NonZeroUsize;
|
2022-10-20 18:00:07 +02:00
|
|
|
use std::path::Path;
|
|
|
|
use std::sync::Arc;
|
2024-03-26 17:28:03 +01:00
|
|
|
use std::thread::{self, available_parallelism};
|
2022-10-25 16:28:33 +02:00
|
|
|
use std::time::Duration;
|
2021-09-28 18:10:09 +02:00
|
|
|
|
2022-10-18 11:57:00 +02:00
|
|
|
use actix_cors::Cors;
|
|
|
|
use actix_http::body::MessageBody;
|
2022-10-20 18:00:07 +02:00
|
|
|
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
|
|
|
use actix_web::error::JsonPayloadError;
|
|
|
|
use actix_web::web::Data;
|
2023-02-21 18:18:47 +01:00
|
|
|
use actix_web::{web, HttpRequest};
|
2021-10-12 14:32:44 +02:00
|
|
|
use analytics::Analytics;
|
2022-10-16 01:39:01 +02:00
|
|
|
use anyhow::bail;
|
2021-10-06 11:49:34 +02:00
|
|
|
use error::PayloadError;
|
2021-06-24 16:25:52 +02:00
|
|
|
use extractors::payload::PayloadConfig;
|
2022-10-20 18:00:07 +02:00
|
|
|
use http::header::CONTENT_TYPE;
|
2022-10-26 11:47:49 +02:00
|
|
|
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
2021-11-08 18:31:27 +01:00
|
|
|
use meilisearch_auth::AuthController;
|
2022-10-20 18:00:07 +02:00
|
|
|
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
|
|
|
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
|
|
|
use meilisearch_types::settings::apply_settings_to_builder;
|
2022-10-25 16:28:33 +02:00
|
|
|
use meilisearch_types::tasks::KindWithContent;
|
2022-10-25 15:51:15 +02:00
|
|
|
use meilisearch_types::versioning::{check_version_file, create_version_file};
|
2022-10-25 16:28:33 +02:00
|
|
|
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
2022-10-20 18:00:07 +02:00
|
|
|
pub use option::Opt;
|
2022-12-27 18:17:29 +01:00
|
|
|
use option::ScheduleSnapshot;
|
2024-03-26 17:28:03 +01:00
|
|
|
use search_queue::SearchQueue;
|
2024-02-08 13:49:13 +01:00
|
|
|
use tracing::{error, info_span};
|
2024-01-30 16:31:42 +01:00
|
|
|
use tracing_subscriber::filter::Targets;
|
2022-10-20 18:00:07 +02:00
|
|
|
|
|
|
|
use crate::error::MeilisearchHttpError;
|
2021-09-20 15:31:03 +02:00
|
|
|
|
2023-02-22 09:04:52 +01:00
|
|
|
/// Default number of simultaneously opened indexes.
|
|
|
|
///
|
|
|
|
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
|
|
|
|
///
|
|
|
|
/// Lower for Windows that dedicates a smaller virtual address space to processes.
|
2023-01-11 17:34:46 +01:00
|
|
|
///
|
|
|
|
/// The value was chosen this way:
|
|
|
|
///
|
|
|
|
/// - Windows provides a small virtual address space of about 10TiB to processes.
|
2023-02-22 09:04:52 +01:00
|
|
|
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
|
2023-01-11 17:34:46 +01:00
|
|
|
#[cfg(windows)]
|
2023-02-22 09:04:52 +01:00
|
|
|
const DEFAULT_INDEX_COUNT: usize = 4;
|
|
|
|
|
2023-01-11 17:34:46 +01:00
|
|
|
/// Default number of simultaneously opened indexes.
|
|
|
|
///
|
2023-02-22 09:04:52 +01:00
|
|
|
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
|
|
|
|
///
|
2023-01-11 17:34:46 +01:00
|
|
|
/// The higher, the better for avoiding reopening indexes.
|
|
|
|
///
|
|
|
|
/// The value was chosen this way:
|
|
|
|
///
|
|
|
|
/// - Opening an index consumes a file descriptor.
|
|
|
|
/// - The default on many unices is about 256 file descriptors for a process.
|
|
|
|
/// - 100 is a little bit less than half this value.
|
2023-02-22 09:04:52 +01:00
|
|
|
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
|
2023-01-11 17:34:46 +01:00
|
|
|
#[cfg(not(windows))]
|
2023-02-22 09:04:52 +01:00
|
|
|
const DEFAULT_INDEX_COUNT: usize = 20;
|
2023-01-11 17:34:46 +01:00
|
|
|
|
2022-10-16 01:39:01 +02:00
|
|
|
/// Check if a db is empty. It does not provide any information on the
|
|
|
|
/// validity of the data in it.
|
|
|
|
/// We consider a database as non empty when it's a non empty directory.
|
|
|
|
fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
|
|
|
let db_path = db_path.as_ref();
|
|
|
|
|
|
|
|
if !db_path.exists() {
|
|
|
|
true
|
|
|
|
// if we encounter an error or if the db is a file we consider the db non empty
|
|
|
|
} else if let Ok(dir) = db_path.read_dir() {
|
|
|
|
dir.count() == 0
|
|
|
|
} else {
|
|
|
|
true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-29 17:56:43 +01:00
|
|
|
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
|
|
|
|
pub type LogRouteHandle =
|
2024-01-29 18:45:55 +01:00
|
|
|
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
|
2024-01-30 16:31:42 +01:00
|
|
|
|
2024-01-29 18:45:55 +01:00
|
|
|
pub type LogRouteType = tracing_subscriber::filter::Filtered<
|
|
|
|
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
|
2024-01-30 16:31:42 +01:00
|
|
|
Targets,
|
2024-01-29 18:45:55 +01:00
|
|
|
tracing_subscriber::Registry,
|
|
|
|
>;
|
2024-01-29 17:56:43 +01:00
|
|
|
|
2024-02-12 11:06:37 +01:00
|
|
|
pub type SubscriberForSecondLayer = tracing_subscriber::layer::Layered<
|
|
|
|
tracing_subscriber::reload::Layer<LogRouteType, tracing_subscriber::Registry>,
|
|
|
|
tracing_subscriber::Registry,
|
|
|
|
>;
|
|
|
|
|
|
|
|
pub type LogStderrHandle =
|
|
|
|
tracing_subscriber::reload::Handle<LogStderrType, SubscriberForSecondLayer>;
|
|
|
|
|
|
|
|
pub type LogStderrType = tracing_subscriber::filter::Filtered<
|
|
|
|
Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
|
|
|
|
Targets,
|
|
|
|
SubscriberForSecondLayer,
|
|
|
|
>;
|
|
|
|
|
2022-10-18 11:57:00 +02:00
|
|
|
pub fn create_app(
|
|
|
|
index_scheduler: Data<IndexScheduler>,
|
2023-04-06 13:38:47 +02:00
|
|
|
auth_controller: Data<AuthController>,
|
2022-10-18 11:57:00 +02:00
|
|
|
opt: Opt,
|
2024-02-12 11:06:37 +01:00
|
|
|
logs: (LogRouteHandle, LogStderrHandle),
|
2022-10-18 11:57:00 +02:00
|
|
|
analytics: Arc<dyn Analytics>,
|
|
|
|
enable_dashboard: bool,
|
|
|
|
) -> actix_web::App<
|
|
|
|
impl ServiceFactory<
|
|
|
|
actix_web::dev::ServiceRequest,
|
|
|
|
Config = (),
|
|
|
|
Response = ServiceResponse<impl MessageBody>,
|
|
|
|
Error = actix_web::Error,
|
|
|
|
InitError = (),
|
|
|
|
>,
|
|
|
|
> {
|
|
|
|
let app = actix_web::App::new()
|
|
|
|
.configure(|s| {
|
|
|
|
configure_data(
|
|
|
|
s,
|
|
|
|
index_scheduler.clone(),
|
|
|
|
auth_controller.clone(),
|
|
|
|
&opt,
|
2024-01-29 17:56:43 +01:00
|
|
|
logs,
|
2022-10-18 11:57:00 +02:00
|
|
|
analytics.clone(),
|
|
|
|
)
|
|
|
|
})
|
2023-06-22 23:14:01 +02:00
|
|
|
.configure(routes::configure)
|
2022-10-18 11:57:00 +02:00
|
|
|
.configure(|s| dashboard(s, enable_dashboard));
|
2022-10-22 16:35:42 +02:00
|
|
|
|
2023-10-17 17:17:13 +02:00
|
|
|
let app = app.wrap(middleware::RouteMetrics);
|
2022-10-22 16:35:42 +02:00
|
|
|
app.wrap(
|
|
|
|
Cors::default()
|
|
|
|
.send_wildcard()
|
|
|
|
.allow_any_header()
|
|
|
|
.allow_any_origin()
|
|
|
|
.allow_any_method()
|
|
|
|
.max_age(86_400), // 24h
|
|
|
|
)
|
2024-02-08 13:49:13 +01:00
|
|
|
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
|
2023-02-21 18:18:47 +01:00
|
|
|
.wrap(actix_web::middleware::Compress::default())
|
|
|
|
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
|
2022-10-18 11:57:00 +02:00
|
|
|
}
|
|
|
|
|
2024-02-08 13:49:13 +01:00
|
|
|
struct AwebTracingLogger;
|
|
|
|
|
|
|
|
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
|
|
|
|
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
|
|
|
|
use tracing::field::Empty;
|
|
|
|
|
|
|
|
let conn_info = request.connection_info();
|
|
|
|
let headers = request.headers();
|
|
|
|
let user_agent = headers
|
|
|
|
.get(http::header::USER_AGENT)
|
|
|
|
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
|
|
|
|
.unwrap_or_default();
|
|
|
|
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn on_request_end<B: MessageBody>(
|
|
|
|
span: tracing::Span,
|
|
|
|
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
|
|
|
|
) {
|
|
|
|
match &outcome {
|
|
|
|
Ok(response) => {
|
|
|
|
let code: i32 = response.response().status().as_u16().into();
|
|
|
|
span.record("status_code", code);
|
|
|
|
|
|
|
|
if let Some(error) = response.response().error() {
|
|
|
|
// use the status code already constructed for the outgoing HTTP response
|
|
|
|
span.record("error", &tracing::field::display(error.as_response_error()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(error) => {
|
|
|
|
let code: i32 = error.error_response().status().as_u16().into();
|
|
|
|
span.record("status_code", code);
|
|
|
|
span.record("error", &tracing::field::display(error.as_response_error()));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-13 16:33:07 +01:00
|
|
|
enum OnFailure {
|
|
|
|
RemoveDb,
|
|
|
|
KeepDb,
|
|
|
|
}
|
2022-10-16 01:39:01 +02:00
|
|
|
|
2023-04-06 13:38:47 +02:00
|
|
|
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
2022-10-25 15:51:15 +02:00
|
|
|
let empty_db = is_empty_db(&opt.db_path);
|
2022-10-25 16:28:33 +02:00
|
|
|
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
|
|
|
let snapshot_path_exists = snapshot_path.exists();
|
2022-12-13 16:33:07 +01:00
|
|
|
// the db is empty and the snapshot exists, import it
|
2022-10-25 16:28:33 +02:00
|
|
|
if empty_db && snapshot_path_exists {
|
|
|
|
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
|
2022-12-13 17:25:49 +01:00
|
|
|
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
|
2022-10-25 16:28:33 +02:00
|
|
|
Err(e) => {
|
|
|
|
std::fs::remove_dir_all(&opt.db_path)?;
|
|
|
|
return Err(e);
|
|
|
|
}
|
|
|
|
}
|
2022-12-13 16:33:07 +01:00
|
|
|
// the db already exists and we should not ignore the snapshot => throw an error
|
2022-10-25 16:28:33 +02:00
|
|
|
} else if !empty_db && !opt.ignore_snapshot_if_db_exists {
|
|
|
|
bail!(
|
|
|
|
"database already exists at {:?}, try to delete it or rename it",
|
|
|
|
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
|
|
|
|
)
|
2022-12-13 17:02:07 +01:00
|
|
|
// the snapshot doesn't exist and we can't ignore it => throw an error
|
2022-10-25 16:28:33 +02:00
|
|
|
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
|
|
|
|
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
2022-12-13 17:02:07 +01:00
|
|
|
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
2022-10-25 16:28:33 +02:00
|
|
|
} else {
|
2022-12-13 17:25:49 +01:00
|
|
|
open_or_create_database(opt, empty_db)?
|
2022-10-25 16:28:33 +02:00
|
|
|
}
|
2022-10-16 01:39:01 +02:00
|
|
|
} else if let Some(ref path) = opt.import_dump {
|
|
|
|
let src_path_exists = path.exists();
|
2022-12-13 16:33:07 +01:00
|
|
|
// the db is empty and the dump exists, import it
|
2022-10-16 01:39:01 +02:00
|
|
|
if empty_db && src_path_exists {
|
2022-11-28 16:27:41 +01:00
|
|
|
let (mut index_scheduler, mut auth_controller) =
|
2022-12-13 17:25:49 +01:00
|
|
|
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
|
2022-10-21 18:09:51 +02:00
|
|
|
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
|
|
|
Ok(()) => (index_scheduler, auth_controller),
|
|
|
|
Err(e) => {
|
|
|
|
std::fs::remove_dir_all(&opt.db_path)?;
|
2022-10-22 16:35:42 +02:00
|
|
|
return Err(e);
|
2022-10-21 18:09:51 +02:00
|
|
|
}
|
|
|
|
}
|
2022-12-13 16:33:07 +01:00
|
|
|
// the db already exists and we should not ignore the dump option => throw an error
|
2022-10-16 01:39:01 +02:00
|
|
|
} else if !empty_db && !opt.ignore_dump_if_db_exists {
|
|
|
|
bail!(
|
|
|
|
"database already exists at {:?}, try to delete it or rename it",
|
2022-10-20 18:00:07 +02:00
|
|
|
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
|
2022-10-16 01:39:01 +02:00
|
|
|
)
|
2022-12-13 17:02:07 +01:00
|
|
|
// the dump doesn't exist and we can't ignore it => throw an error
|
2022-10-16 01:39:01 +02:00
|
|
|
} else if !src_path_exists && !opt.ignore_missing_dump {
|
|
|
|
bail!("dump doesn't exist at {:?}", path)
|
2022-12-13 17:02:07 +01:00
|
|
|
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
|
|
|
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
2022-10-16 01:39:01 +02:00
|
|
|
} else {
|
2022-12-13 17:25:49 +01:00
|
|
|
open_or_create_database(opt, empty_db)?
|
2022-10-16 01:39:01 +02:00
|
|
|
}
|
|
|
|
} else {
|
2022-12-13 17:25:49 +01:00
|
|
|
open_or_create_database(opt, empty_db)?
|
2022-10-16 01:39:01 +02:00
|
|
|
};
|
2022-09-27 16:33:37 +02:00
|
|
|
|
2022-10-25 16:28:33 +02:00
|
|
|
// We create a loop in a thread that registers snapshotCreation tasks
|
|
|
|
let index_scheduler = Arc::new(index_scheduler);
|
2023-04-06 13:38:47 +02:00
|
|
|
let auth_controller = Arc::new(auth_controller);
|
2022-12-27 18:17:29 +01:00
|
|
|
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
|
|
|
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
2022-10-25 16:28:33 +02:00
|
|
|
let index_scheduler = index_scheduler.clone();
|
2022-11-28 16:27:41 +01:00
|
|
|
thread::Builder::new()
|
|
|
|
.name(String::from("register-snapshot-tasks"))
|
|
|
|
.spawn(move || loop {
|
|
|
|
thread::sleep(snapshot_delay);
|
2024-02-21 11:21:26 +01:00
|
|
|
if let Err(e) =
|
|
|
|
index_scheduler.register(KindWithContent::SnapshotCreation, None, false)
|
|
|
|
{
|
2022-11-28 16:27:41 +01:00
|
|
|
error!("Error while registering snapshot: {}", e);
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap();
|
2021-09-28 18:10:09 +02:00
|
|
|
}
|
|
|
|
|
2022-10-16 01:39:01 +02:00
|
|
|
Ok((index_scheduler, auth_controller))
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:25:49 +01:00
|
|
|
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
|
|
|
|
fn open_or_create_database_unchecked(
|
2022-12-13 16:33:07 +01:00
|
|
|
opt: &Opt,
|
|
|
|
on_failure: OnFailure,
|
|
|
|
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
|
|
|
// we don't want to create anything in the data.ms yet, thus we
|
|
|
|
// wrap our two builders in a closure that'll be executed later.
|
|
|
|
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
|
2023-06-22 23:10:22 +02:00
|
|
|
let instance_features = opt.to_instance_features();
|
2022-12-13 16:33:07 +01:00
|
|
|
let index_scheduler_builder = || -> anyhow::Result<_> {
|
|
|
|
Ok(IndexScheduler::new(IndexSchedulerOptions {
|
|
|
|
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
|
|
|
auth_path: opt.db_path.join("auth"),
|
|
|
|
tasks_path: opt.db_path.join("tasks"),
|
|
|
|
update_file_path: opt.db_path.join("update_files"),
|
|
|
|
indexes_path: opt.db_path.join("indexes"),
|
|
|
|
snapshots_path: opt.snapshot_dir.clone(),
|
2022-12-14 20:02:39 +01:00
|
|
|
dumps_path: opt.dump_dir.clone(),
|
2023-11-28 16:28:11 +01:00
|
|
|
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
2023-12-19 12:18:45 +01:00
|
|
|
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
2022-12-13 16:33:07 +01:00
|
|
|
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
2023-01-11 17:34:46 +01:00
|
|
|
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
2023-05-15 11:23:58 +02:00
|
|
|
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
2022-12-13 16:33:07 +01:00
|
|
|
indexer_config: (&opt.indexer_options).try_into()?,
|
2023-01-03 14:26:37 +01:00
|
|
|
autobatching_enabled: true,
|
2024-02-21 14:33:40 +01:00
|
|
|
cleanup_enabled: !opt.experimental_replication_parameters,
|
2023-04-25 17:26:34 +02:00
|
|
|
max_number_of_tasks: 1_000_000,
|
2023-12-12 10:55:33 +01:00
|
|
|
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
2023-01-11 17:34:46 +01:00
|
|
|
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
|
|
|
index_count: DEFAULT_INDEX_COUNT,
|
2023-06-22 23:10:22 +02:00
|
|
|
instance_features,
|
2022-12-13 16:33:07 +01:00
|
|
|
})?)
|
|
|
|
};
|
|
|
|
|
|
|
|
match (
|
|
|
|
index_scheduler_builder(),
|
|
|
|
auth_controller.map_err(anyhow::Error::from),
|
|
|
|
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
|
|
|
|
) {
|
|
|
|
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
|
|
|
|
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
|
|
|
|
if matches!(on_failure, OnFailure::RemoveDb) {
|
|
|
|
std::fs::remove_dir_all(&opt.db_path)?;
|
|
|
|
}
|
|
|
|
Err(e)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:25:49 +01:00
|
|
|
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
|
|
|
|
fn open_or_create_database(
|
2022-12-13 16:33:07 +01:00
|
|
|
opt: &Opt,
|
|
|
|
empty_db: bool,
|
|
|
|
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
|
|
|
if !empty_db {
|
|
|
|
check_version_file(&opt.db_path)?;
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:25:49 +01:00
|
|
|
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
|
2022-12-13 16:33:07 +01:00
|
|
|
}
|
|
|
|
|
2022-10-16 01:39:01 +02:00
|
|
|
fn import_dump(
|
|
|
|
db_path: &Path,
|
|
|
|
dump_path: &Path,
|
|
|
|
index_scheduler: &mut IndexScheduler,
|
|
|
|
auth: &mut AuthController,
|
|
|
|
) -> Result<(), anyhow::Error> {
|
|
|
|
let reader = File::open(dump_path)?;
|
|
|
|
let mut dump_reader = dump::DumpReader::open(reader)?;
|
|
|
|
|
|
|
|
if let Some(date) = dump_reader.date() {
|
2024-02-07 15:51:38 +01:00
|
|
|
tracing::info!(
|
2024-02-07 17:55:40 +01:00
|
|
|
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
|
|
|
%date,
|
|
|
|
"Importing a dump of meilisearch"
|
2022-10-16 01:39:01 +02:00
|
|
|
);
|
|
|
|
} else {
|
2024-02-07 15:51:38 +01:00
|
|
|
tracing::info!(
|
2024-02-07 17:55:40 +01:00
|
|
|
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
|
|
|
"Importing a dump of meilisearch",
|
2022-10-16 01:39:01 +02:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
let instance_uid = dump_reader.instance_uid()?;
|
|
|
|
|
|
|
|
// 1. Import the instance-uid.
|
|
|
|
if let Some(ref instance_uid) = instance_uid {
|
|
|
|
// we don't want to panic if there is an error with the instance-uid.
|
2022-10-20 18:00:07 +02:00
|
|
|
let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes());
|
2022-10-16 01:39:01 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
// 2. Import the `Key`s.
|
|
|
|
let mut keys = Vec::new();
|
|
|
|
auth.raw_delete_all_keys()?;
|
2022-10-17 17:38:31 +02:00
|
|
|
for key in dump_reader.keys()? {
|
2022-10-16 01:39:01 +02:00
|
|
|
let key = key?;
|
|
|
|
auth.raw_insert_key(key.clone())?;
|
|
|
|
keys.push(key);
|
|
|
|
}
|
|
|
|
|
2023-06-26 12:24:55 +02:00
|
|
|
// 3. Import the runtime features.
|
|
|
|
let features = dump_reader.features()?.unwrap_or_default();
|
|
|
|
index_scheduler.put_runtime_features(features)?;
|
|
|
|
|
2022-10-22 16:35:42 +02:00
|
|
|
let indexer_config = index_scheduler.indexer_config();
|
2022-10-16 01:39:01 +02:00
|
|
|
|
2022-10-17 13:11:12 +02:00
|
|
|
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
|
|
|
// try to process tasks while we're trying to import the indexes.
|
2022-10-16 01:39:01 +02:00
|
|
|
|
2023-06-26 12:24:55 +02:00
|
|
|
// 4. Import the indexes.
|
2022-10-16 01:39:01 +02:00
|
|
|
for index_reader in dump_reader.indexes()? {
|
|
|
|
let mut index_reader = index_reader?;
|
|
|
|
let metadata = index_reader.metadata();
|
2024-02-07 15:51:38 +01:00
|
|
|
tracing::info!("Importing index `{}`.", metadata.uid);
|
2022-12-21 15:16:31 +01:00
|
|
|
|
2022-12-22 11:46:17 +01:00
|
|
|
let date = Some((metadata.created_at, metadata.updated_at));
|
2022-12-21 14:28:00 +01:00
|
|
|
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
|
2022-10-16 01:39:01 +02:00
|
|
|
|
|
|
|
let mut wtxn = index.write_txn()?;
|
|
|
|
|
|
|
|
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
2023-06-26 12:24:55 +02:00
|
|
|
// 4.1 Import the primary key if there is one.
|
2022-10-16 01:39:01 +02:00
|
|
|
if let Some(ref primary_key) = metadata.primary_key {
|
|
|
|
builder.set_primary_key(primary_key.to_string());
|
|
|
|
}
|
|
|
|
|
2023-06-26 12:24:55 +02:00
|
|
|
// 4.2 Import the settings.
|
2024-02-07 15:51:38 +01:00
|
|
|
tracing::info!("Importing the settings.");
|
2022-10-16 01:39:01 +02:00
|
|
|
let settings = index_reader.settings()?;
|
|
|
|
apply_settings_to_builder(&settings, &mut builder);
|
2024-02-07 15:51:38 +01:00
|
|
|
builder
|
|
|
|
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
|
2022-10-16 01:39:01 +02:00
|
|
|
|
2023-06-26 12:24:55 +02:00
|
|
|
// 4.3 Import the documents.
|
|
|
|
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
2024-02-07 15:51:38 +01:00
|
|
|
tracing::info!("Importing the documents.");
|
2022-10-17 17:12:37 +02:00
|
|
|
let file = tempfile::tempfile()?;
|
|
|
|
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
2022-10-16 01:39:01 +02:00
|
|
|
for document in index_reader.documents()? {
|
|
|
|
builder.append_json_object(&document?)?;
|
|
|
|
}
|
2022-10-17 17:12:37 +02:00
|
|
|
|
|
|
|
// This flush the content of the batch builder.
|
|
|
|
let file = builder.into_inner()?.into_inner()?;
|
2022-10-16 01:39:01 +02:00
|
|
|
|
2023-06-26 12:24:55 +02:00
|
|
|
// 4.3.2 We feed it to the milli index.
|
2022-10-16 01:39:01 +02:00
|
|
|
let reader = BufReader::new(file);
|
|
|
|
let reader = DocumentsBatchReader::from_reader(reader)?;
|
|
|
|
|
2024-02-26 22:15:57 +01:00
|
|
|
let embedder_configs = index.embedding_configs(&wtxn)?;
|
|
|
|
let embedders = index_scheduler.embedders(embedder_configs)?;
|
|
|
|
|
2022-10-16 01:39:01 +02:00
|
|
|
let builder = milli::update::IndexDocuments::new(
|
|
|
|
&mut wtxn,
|
|
|
|
&index,
|
|
|
|
indexer_config,
|
|
|
|
IndexDocumentsConfig {
|
|
|
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
|
|
|
..Default::default()
|
|
|
|
},
|
2024-02-07 15:51:38 +01:00
|
|
|
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
2022-10-18 13:47:22 +02:00
|
|
|
|| false,
|
2022-10-16 01:39:01 +02:00
|
|
|
)?;
|
|
|
|
|
2024-02-26 22:15:57 +01:00
|
|
|
let builder = builder.with_embedders(embedders);
|
|
|
|
|
2022-10-16 01:39:01 +02:00
|
|
|
let (builder, user_result) = builder.add_documents(reader)?;
|
2024-02-07 17:55:40 +01:00
|
|
|
let user_result = user_result?;
|
|
|
|
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
2022-10-16 01:39:01 +02:00
|
|
|
builder.execute()?;
|
|
|
|
wtxn.commit()?;
|
2024-02-07 15:51:38 +01:00
|
|
|
tracing::info!("All documents successfully imported.");
|
2022-10-16 01:39:01 +02:00
|
|
|
}
|
2022-10-17 13:11:12 +02:00
|
|
|
|
2023-03-29 14:27:40 +02:00
|
|
|
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
|
|
|
|
2023-06-26 12:24:55 +02:00
|
|
|
// 5. Import the tasks.
|
2022-10-17 17:38:31 +02:00
|
|
|
for ret in dump_reader.tasks()? {
|
2022-10-17 13:11:12 +02:00
|
|
|
let (task, file) = ret?;
|
2023-03-29 14:27:40 +02:00
|
|
|
index_scheduler_dump.register_dumped_task(task, file)?;
|
2022-10-17 13:11:12 +02:00
|
|
|
}
|
2023-03-29 14:27:40 +02:00
|
|
|
Ok(index_scheduler_dump.finish()?)
|
2021-09-28 18:10:09 +02:00
|
|
|
}
|
|
|
|
|
2021-10-12 14:32:44 +02:00
|
|
|
pub fn configure_data(
|
|
|
|
config: &mut web::ServiceConfig,
|
2022-09-27 16:33:37 +02:00
|
|
|
index_scheduler: Data<IndexScheduler>,
|
2023-04-06 13:38:47 +02:00
|
|
|
auth: Data<AuthController>,
|
2021-10-12 14:32:44 +02:00
|
|
|
opt: &Opt,
|
2024-02-12 11:06:37 +01:00
|
|
|
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
2021-10-29 15:58:06 +02:00
|
|
|
analytics: Arc<dyn Analytics>,
|
2021-10-12 14:32:44 +02:00
|
|
|
) {
|
2024-03-26 17:28:03 +01:00
|
|
|
let search_queue = SearchQueue::new(
|
|
|
|
opt.experimental_search_queue_size,
|
|
|
|
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
|
|
|
);
|
2021-09-20 15:31:03 +02:00
|
|
|
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
2021-06-23 13:21:48 +02:00
|
|
|
config
|
2022-09-27 16:33:37 +02:00
|
|
|
.app_data(index_scheduler)
|
2021-11-08 18:31:27 +01:00
|
|
|
.app_data(auth)
|
2024-03-26 17:28:03 +01:00
|
|
|
.app_data(web::Data::new(search_queue))
|
2021-10-29 15:58:06 +02:00
|
|
|
.app_data(web::Data::from(analytics))
|
2024-02-12 11:06:37 +01:00
|
|
|
.app_data(web::Data::new(logs_route))
|
|
|
|
.app_data(web::Data::new(logs_stderr))
|
2024-02-20 11:24:44 +01:00
|
|
|
.app_data(web::Data::new(opt.clone()))
|
2021-06-23 13:21:48 +02:00
|
|
|
.app_data(
|
|
|
|
web::JsonConfig::default()
|
2023-11-25 19:41:16 +01:00
|
|
|
.limit(http_payload_size_limit)
|
2021-10-05 13:30:53 +02:00
|
|
|
.content_type(|mime| mime == mime::APPLICATION_JSON)
|
2021-10-06 11:49:34 +02:00
|
|
|
.error_handler(|err, req: &HttpRequest| match err {
|
|
|
|
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
|
|
|
|
Some(content_type) => MeilisearchHttpError::InvalidContentType(
|
|
|
|
content_type.to_str().unwrap_or("unknown").to_string(),
|
2021-10-05 13:30:53 +02:00
|
|
|
vec![mime::APPLICATION_JSON.to_string()],
|
2021-10-06 11:49:34 +02:00
|
|
|
)
|
|
|
|
.into(),
|
|
|
|
None => MeilisearchHttpError::MissingContentType(vec![
|
|
|
|
mime::APPLICATION_JSON.to_string(),
|
|
|
|
])
|
|
|
|
.into(),
|
|
|
|
},
|
|
|
|
err => PayloadError::from(err).into(),
|
2021-10-05 13:30:53 +02:00
|
|
|
}),
|
2021-06-23 13:21:48 +02:00
|
|
|
)
|
2021-06-23 13:58:22 +02:00
|
|
|
.app_data(PayloadConfig::new(http_payload_size_limit))
|
2021-06-23 13:21:48 +02:00
|
|
|
.app_data(
|
2021-10-06 11:49:34 +02:00
|
|
|
web::QueryConfig::default().error_handler(|err, _req| PayloadError::from(err).into()),
|
2021-06-23 13:21:48 +02:00
|
|
|
);
|
|
|
|
}
|
2021-03-10 11:56:51 +01:00
|
|
|
|
2021-06-23 13:21:48 +02:00
|
|
|
#[cfg(feature = "mini-dashboard")]
|
|
|
|
pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
|
2021-06-23 13:55:16 +02:00
|
|
|
use actix_web::HttpResponse;
|
2022-01-21 21:44:17 +01:00
|
|
|
use static_files::Resource;
|
2021-04-21 13:49:21 +02:00
|
|
|
|
2021-06-23 14:48:33 +02:00
|
|
|
mod generated {
|
2021-06-23 13:21:48 +02:00
|
|
|
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
|
|
|
|
}
|
2021-04-21 13:49:21 +02:00
|
|
|
|
2021-06-23 13:21:48 +02:00
|
|
|
if enable_frontend {
|
2021-06-23 14:48:33 +02:00
|
|
|
let generated = generated::generate();
|
2021-06-24 16:25:52 +02:00
|
|
|
// Generate routes for mini-dashboard assets
|
|
|
|
for (path, resource) in generated.into_iter() {
|
2022-10-20 18:00:07 +02:00
|
|
|
let Resource { mime_type, data, .. } = resource;
|
2021-06-24 16:25:52 +02:00
|
|
|
// Redirect index.html to /
|
|
|
|
if path == "index.html" {
|
2022-02-26 00:28:55 +01:00
|
|
|
config.service(web::resource("/").route(web::get().to(move || async move {
|
|
|
|
HttpResponse::Ok().content_type(mime_type).body(data)
|
|
|
|
})));
|
2021-06-24 16:25:52 +02:00
|
|
|
} else {
|
2022-02-26 00:28:55 +01:00
|
|
|
config.service(web::resource(path).route(web::get().to(move || async move {
|
|
|
|
HttpResponse::Ok().content_type(mime_type).body(data)
|
|
|
|
})));
|
2021-06-23 13:21:48 +02:00
|
|
|
}
|
2021-06-24 16:25:52 +02:00
|
|
|
}
|
2021-06-23 13:21:48 +02:00
|
|
|
} else {
|
2021-06-24 19:02:28 +02:00
|
|
|
config.service(web::resource("/").route(web::get().to(routes::running)));
|
2021-06-23 13:21:48 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(not(feature = "mini-dashboard"))]
|
|
|
|
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
2021-06-24 19:02:28 +02:00
|
|
|
config.service(web::resource("/").route(web::get().to(routes::running)));
|
2021-06-23 13:21:48 +02:00
|
|
|
}
|