MeiliSearch/meilisearch-http/src/lib.rs

405 lines
15 KiB
Rust
Raw Normal View History

#![allow(rustdoc::private_intra_doc_links)]
2021-06-15 17:55:27 +02:00
#[macro_use]
2020-12-12 13:32:06 +01:00
pub mod error;
pub mod analytics;
2021-06-24 14:22:12 +02:00
#[macro_use]
2021-06-23 14:56:02 +02:00
pub mod extractors;
2021-03-15 18:11:10 +01:00
pub mod option;
pub mod routes;
pub mod search;
2022-08-29 12:36:54 +02:00
#[cfg(feature = "metrics")]
pub mod metrics;
#[cfg(feature = "metrics")]
pub mod route_metrics;
2022-10-20 18:00:07 +02:00
use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::path::Path;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
2021-09-28 18:10:09 +02:00
use actix_cors::Cors;
use actix_http::body::MessageBody;
2022-10-20 18:00:07 +02:00
use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::error::JsonPayloadError;
use actix_web::web::Data;
use actix_web::{middleware, web, HttpRequest};
use analytics::Analytics;
2022-10-16 01:39:01 +02:00
use anyhow::bail;
use error::PayloadError;
2021-06-24 16:25:52 +02:00
use extractors::payload::PayloadConfig;
2022-10-20 18:00:07 +02:00
use http::header::CONTENT_TYPE;
2022-09-27 16:33:37 +02:00
use index_scheduler::IndexScheduler;
use log::error;
use meilisearch_auth::AuthController;
2022-10-20 18:00:07 +02:00
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent;
2022-10-25 15:51:15 +02:00
use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
2022-10-20 18:00:07 +02:00
pub use option::Opt;
use crate::error::MeilisearchHttpError;
2021-09-20 15:31:03 +02:00
2022-01-19 11:21:19 +01:00
pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
2022-10-16 01:39:01 +02:00
/// Check if a db is empty. It does not provide any information on the
/// validity of the data in it.
/// We consider a database as non empty when it's a non empty directory.
fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
let db_path = db_path.as_ref();
if !db_path.exists() {
true
// if we encounter an error or if the db is a file we consider the db non empty
} else if let Ok(dir) = db_path.read_dir() {
dir.count() == 0
} else {
true
}
}
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: AuthController,
opt: Opt,
analytics: Arc<dyn Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
impl ServiceFactory<
actix_web::dev::ServiceRequest,
Config = (),
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
InitError = (),
>,
> {
let app = actix_web::App::new()
.configure(|s| {
configure_data(
s,
index_scheduler.clone(),
auth_controller.clone(),
&opt,
analytics.clone(),
)
})
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
#[cfg(feature = "metrics")]
let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route));
2022-10-22 16:35:42 +02:00
#[cfg(feature = "metrics")]
2022-10-20 18:00:07 +02:00
let app = app.wrap(Condition::new(opt.enable_metrics_route, route_metrics::RouteMetrics));
2022-10-22 16:35:42 +02:00
app.wrap(
Cors::default()
.send_wildcard()
.allow_any_header()
.allow_any_origin()
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim))
}
2022-09-27 16:33:37 +02:00
// TODO: TAMO: Finish setting up things
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
2022-10-16 01:39:01 +02:00
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder = || {
IndexScheduler::new(
2022-10-25 15:06:28 +02:00
opt.db_path.join(VERSION_FILE_NAME),
opt.db_path.join("auth"),
2022-10-16 01:39:01 +02:00
opt.db_path.join("tasks"),
opt.db_path.join("update_files"),
opt.db_path.join("indexes"),
opt.snapshot_dir.clone(),
2022-10-16 01:39:01 +02:00
opt.dumps_dir.clone(),
opt.max_task_db_size.get_bytes() as usize,
2022-10-16 01:39:01 +02:00
opt.max_index_size.get_bytes() as usize,
(&opt.indexer_options).try_into()?,
true,
)
};
let meilisearch_builder = || -> anyhow::Result<_> {
// if anything wrong happens we delete the `data.ms` entirely.
match (
index_scheduler_builder().map_err(anyhow::Error::from),
auth_controller_builder().map_err(anyhow::Error::from),
2022-10-25 15:51:15 +02:00
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
2022-10-25 15:51:15 +02:00
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
std::fs::remove_dir_all(&opt.db_path)?;
Err(e)
}
}
};
2022-10-16 01:39:01 +02:00
2022-10-25 15:51:15 +02:00
let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists();
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => meilisearch_builder()?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
} else if !empty_db && !opt.ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
)
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
bail!("snapshot doesn't exist at {}", snapshot_path.display())
} else {
meilisearch_builder()?
}
2022-10-16 01:39:01 +02:00
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) = meilisearch_builder()?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
2022-10-22 16:35:42 +02:00
return Err(e);
}
}
2022-10-16 01:39:01 +02:00
} else if !empty_db && !opt.ignore_dump_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
2022-10-20 18:00:07 +02:00
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
2022-10-16 01:39:01 +02:00
)
} else if !src_path_exists && !opt.ignore_missing_dump {
bail!("dump doesn't exist at {:?}", path)
} else {
let (mut index_scheduler, mut auth_controller) = meilisearch_builder()?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
2022-10-22 16:35:42 +02:00
return Err(e);
}
}
2022-10-16 01:39:01 +02:00
}
} else {
2022-10-25 15:51:15 +02:00
if !empty_db {
check_version_file(&opt.db_path)?;
}
meilisearch_builder()?
2022-10-16 01:39:01 +02:00
};
2022-09-27 16:33:37 +02:00
// We create a loop in a thread that registers snapshotCreation tasks
let index_scheduler = Arc::new(index_scheduler);
2021-09-28 18:10:09 +02:00
if opt.schedule_snapshot {
let snapshot_delay = Duration::from_secs(opt.snapshot_interval_sec);
let index_scheduler = index_scheduler.clone();
thread::spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
error!("Error while registering snapshot: {}", e);
}
});
2021-09-28 18:10:09 +02:00
}
2022-10-16 01:39:01 +02:00
Ok((index_scheduler, auth_controller))
}
fn import_dump(
db_path: &Path,
dump_path: &Path,
index_scheduler: &mut IndexScheduler,
auth: &mut AuthController,
) -> Result<(), anyhow::Error> {
let reader = File::open(dump_path)?;
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {
log::info!(
"Importing a dump of meilisearch `{:?}` from the {}",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
date
);
} else {
log::info!(
"Importing a dump of meilisearch `{:?}`",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
);
}
let instance_uid = dump_reader.instance_uid()?;
// 1. Import the instance-uid.
if let Some(ref instance_uid) = instance_uid {
// we don't want to panic if there is an error with the instance-uid.
2022-10-20 18:00:07 +02:00
let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes());
2022-10-16 01:39:01 +02:00
};
// 2. Import the `Key`s.
let mut keys = Vec::new();
auth.raw_delete_all_keys()?;
for key in dump_reader.keys()? {
2022-10-16 01:39:01 +02:00
let key = key?;
auth.raw_insert_key(key.clone())?;
keys.push(key);
}
2022-10-22 16:35:42 +02:00
let indexer_config = index_scheduler.indexer_config();
2022-10-16 01:39:01 +02:00
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
// try to process tasks while we're trying to import the indexes.
2022-10-16 01:39:01 +02:00
// 3. Import the indexes.
2022-10-16 01:39:01 +02:00
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
log::info!("Importing index `{}`.", metadata.uid);
let index = index_scheduler.create_raw_index(&metadata.uid)?;
let mut wtxn = index.write_txn()?;
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
// 3.1 Import the primary key if there is one.
2022-10-16 01:39:01 +02:00
if let Some(ref primary_key) = metadata.primary_key {
builder.set_primary_key(primary_key.to_string());
}
// 3.2 Import the settings.
2022-10-16 01:39:01 +02:00
log::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
2022-10-20 18:00:07 +02:00
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
2022-10-16 01:39:01 +02:00
// 3.3 Import the documents.
// 3.3.1 We need to recreate the grenad+obkv format accepted by the index.
2022-10-16 01:39:01 +02:00
log::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
2022-10-16 01:39:01 +02:00
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
2022-10-16 01:39:01 +02:00
// 3.3.2 We feed it to the milli index.
2022-10-16 01:39:01 +02:00
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| log::debug!("update: {:?}", indexing_step),
|| false,
2022-10-16 01:39:01 +02:00
)?;
let (builder, user_result) = builder.add_documents(reader)?;
log::info!("{} documents found.", user_result?);
builder.execute()?;
wtxn.commit()?;
log::info!("All documents successfully imported.");
}
// 4. Import the tasks.
for ret in dump_reader.tasks()? {
let (task, file) = ret?;
index_scheduler.register_dumped_task(task, file)?;
}
2022-10-16 01:39:01 +02:00
Ok(())
2021-09-28 18:10:09 +02:00
}
pub fn configure_data(
config: &mut web::ServiceConfig,
2022-09-27 16:33:37 +02:00
index_scheduler: Data<IndexScheduler>,
auth: AuthController,
opt: &Opt,
analytics: Arc<dyn Analytics>,
) {
2021-09-20 15:31:03 +02:00
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
2021-06-23 13:21:48 +02:00
config
2022-09-27 16:33:37 +02:00
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::from(analytics))
2021-06-23 13:21:48 +02:00
.app_data(
web::JsonConfig::default()
.content_type(|mime| mime == mime::APPLICATION_JSON)
.error_handler(|err, req: &HttpRequest| match err {
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
Some(content_type) => MeilisearchHttpError::InvalidContentType(
content_type.to_str().unwrap_or("unknown").to_string(),
vec![mime::APPLICATION_JSON.to_string()],
)
.into(),
None => MeilisearchHttpError::MissingContentType(vec![
mime::APPLICATION_JSON.to_string(),
])
.into(),
},
err => PayloadError::from(err).into(),
}),
2021-06-23 13:21:48 +02:00
)
2021-06-23 13:58:22 +02:00
.app_data(PayloadConfig::new(http_payload_size_limit))
2021-06-23 13:21:48 +02:00
.app_data(
web::QueryConfig::default().error_handler(|err, _req| PayloadError::from(err).into()),
2021-06-23 13:21:48 +02:00
);
}
2021-03-10 11:56:51 +01:00
2021-06-23 13:21:48 +02:00
#[cfg(feature = "mini-dashboard")]
pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
2021-06-23 13:55:16 +02:00
use actix_web::HttpResponse;
use static_files::Resource;
2021-04-21 13:49:21 +02:00
2021-06-23 14:48:33 +02:00
mod generated {
2021-06-23 13:21:48 +02:00
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
}
2021-04-21 13:49:21 +02:00
2021-06-23 13:21:48 +02:00
if enable_frontend {
2021-06-23 14:48:33 +02:00
let generated = generated::generate();
2021-06-24 16:25:52 +02:00
// Generate routes for mini-dashboard assets
for (path, resource) in generated.into_iter() {
2022-10-20 18:00:07 +02:00
let Resource { mime_type, data, .. } = resource;
2021-06-24 16:25:52 +02:00
// Redirect index.html to /
if path == "index.html" {
2022-02-26 00:28:55 +01:00
config.service(web::resource("/").route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
2021-06-24 16:25:52 +02:00
} else {
2022-02-26 00:28:55 +01:00
config.service(web::resource(path).route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
2021-06-23 13:21:48 +02:00
}
2021-06-24 16:25:52 +02:00
}
2021-06-23 13:21:48 +02:00
} else {
2021-06-24 19:02:28 +02:00
config.service(web::resource("/").route(web::get().to(routes::running)));
2021-06-23 13:21:48 +02:00
}
}
#[cfg(not(feature = "mini-dashboard"))]
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
2021-06-24 19:02:28 +02:00
config.service(web::resource("/").route(web::get().to(routes::running)));
2021-06-23 13:21:48 +02:00
}
2021-04-21 13:49:21 +02:00
2022-08-29 12:36:54 +02:00
#[cfg(feature = "metrics")]
pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_route: bool) {
if enable_metrics_route {
2022-08-29 12:36:54 +02:00
config.service(
web::resource("/metrics").route(web::get().to(crate::route_metrics::get_metrics)),
);
}
}