diff --git a/Cargo.lock b/Cargo.lock index 809535e1d..33660f836 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -872,6 +872,15 @@ dependencies = [ "termcolor", ] +[[package]] +name = "erased-serde" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa" +dependencies = [ + "serde", +] + [[package]] name = "fake-simd" version = "0.1.2" @@ -1571,6 +1580,7 @@ name = "meilisearch-error" version = "0.22.0" dependencies = [ "actix-http", + "serde", ] [[package]] @@ -1606,6 +1616,7 @@ dependencies = [ "log", "main_error", "meilisearch-error", + "meilisearch-lib", "meilisearch-tokenizer", "memmap", "milli", @@ -1646,6 +1657,74 @@ dependencies = [ "zip", ] +[[package]] +name = "meilisearch-lib" +version = "0.1.0" +dependencies = [ + "actix-cors", + "actix-rt", + "actix-web", + "actix-web-static-files", + "anyhow", + "arc-swap", + "assert-json-diff", + "async-stream", + "async-trait", + "byte-unit", + "bytes", + "chrono", + "crossbeam-channel", + "either", + "env_logger", + "erased-serde", + "flate2", + "fst", + "futures", + "futures-util", + "heed", + "http", + "indexmap", + "itertools", + "log", + "main_error", + "meilisearch-error", + "meilisearch-tokenizer", + "memmap", + "milli", + "mime", + "mockall", + "num_cpus", + "obkv", + "once_cell", + "parking_lot", + "paste", + "pin-project", + "rand 0.8.4", + "rayon", + "regex", + "reqwest", + "rustls", + "serde", + "serde_json", + "serde_url_params", + "serdeval", + "sha2", + "siphasher", + "slice-group-by", + "structopt", + "sysinfo", + "tar", + "tempdir", + "tempfile", + "thiserror", + "tokio", + "tokio-stream", + "urlencoding", + "uuid", + "walkdir", + "whoami", +] + [[package]] name = "meilisearch-tokenizer" version = "0.2.5" diff --git a/Cargo.toml b/Cargo.toml index a1dca038e..fc64a107f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "meilisearch-http", "meilisearch-error", + "meilisearch-lib", ] [profile.release] diff --git a/meilisearch-error/Cargo.toml b/meilisearch-error/Cargo.toml index 810270183..612e92821 100644 --- a/meilisearch-error/Cargo.toml +++ b/meilisearch-error/Cargo.toml @@ -6,3 +6,4 @@ edition = "2018" [dependencies] actix-http = "=3.0.0-beta.10" +serde = { version = "1.0.130", features = ["derive"] } diff --git a/meilisearch-error/src/lib.rs b/meilisearch-error/src/lib.rs index 5e08317a9..9d5b79f69 100644 --- a/meilisearch-error/src/lib.rs +++ b/meilisearch-error/src/lib.rs @@ -1,6 +1,7 @@ use std::fmt; use actix_http::http::StatusCode; +use serde::{Serialize, Deserialize}; pub trait ErrorCode: std::error::Error { fn error_code(&self) -> Code; @@ -45,6 +46,7 @@ impl fmt::Display for ErrorType { } } +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] pub enum Code { // index related error CreateIndex, diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 02e72668b..eb3d550ab 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -45,6 +45,7 @@ indexmap = { version = "1.7.0", features = ["serde-1"] } itertools = "0.10.1" log = "0.4.14" main_error = "0.1.1" +meilisearch-lib = { path = "../meilisearch-lib" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 11347175b..8d91c9e9c 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -4,8 +4,8 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use log::debug; use serde::Serialize; use siphasher::sip::SipHasher; +use meilisearch_lib::MeiliSearch; -use crate::Data; use crate::Opt; const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47"; @@ -18,8 +18,8 @@ struct EventProperties { } impl EventProperties { - async fn from(data: Data) -> anyhow::Result { - let stats = data.index_controller.get_all_stats().await?; + async fn from(data: MeiliSearch) -> anyhow::Result { + let stats = data.get_all_stats().await?; let database_size = stats.database_size; let last_update_timestamp = stats.last_update.map(|u| u.timestamp()); @@ -62,7 +62,7 @@ struct AmplitudeRequest<'a> { events: Vec>, } -pub async fn analytics_sender(data: Data, opt: Opt) { +pub async fn analytics_sender(data: MeiliSearch, opt: Opt) { let username = whoami::username(); let hostname = whoami::hostname(); let platform = whoami::platform(); diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs deleted file mode 100644 index a4cd274ff..000000000 --- a/meilisearch-http/src/data/mod.rs +++ /dev/null @@ -1,86 +0,0 @@ -use std::ops::Deref; -use std::sync::Arc; - -use crate::index::{Checked, Settings}; -use crate::index_controller::{ - error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats, -}; -use crate::option::Opt; - -pub mod search; -mod updates; - -#[derive(Clone)] -pub struct Data { - inner: Arc, -} - -impl Deref for Data { - type Target = DataInner; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -pub struct DataInner { - pub index_controller: IndexController, - //pub api_keys: ApiKeys, -} - -impl Data { - pub fn new(options: Opt) -> anyhow::Result { - let path = options.db_path.clone(); - - let index_controller = IndexController::new(&path, &options)?; - - let inner = DataInner { - index_controller, - }; - let inner = Arc::new(inner); - - Ok(Data { inner }) - } - - pub async fn settings(&self, uid: String) -> Result> { - self.index_controller.settings(uid).await - } - - pub async fn list_indexes(&self) -> Result> { - self.index_controller.list_indexes().await - } - - pub async fn index(&self, uid: String) -> Result { - self.index_controller.get_index(uid).await - } - - //pub async fn create_index( - //&self, - //uid: String, - //primary_key: Option, - //) -> Result { - //let settings = IndexSettings { - //uid: Some(uid), - //primary_key, - //}; - - //let meta = self.index_controller.create_index(settings).await?; - //Ok(meta) - //} - - pub async fn get_index_stats(&self, uid: String) -> Result { - Ok(self.index_controller.get_index_stats(uid).await?) - } - - pub async fn get_all_stats(&self) -> Result { - Ok(self.index_controller.get_all_stats().await?) - } - - pub async fn create_dump(&self) -> Result { - Ok(self.index_controller.create_dump().await?) - } - - pub async fn dump_status(&self, uid: String) -> Result { - Ok(self.index_controller.dump_info(uid).await?) - } -} diff --git a/meilisearch-http/src/data/search.rs b/meilisearch-http/src/data/search.rs deleted file mode 100644 index 5ad8d4a07..000000000 --- a/meilisearch-http/src/data/search.rs +++ /dev/null @@ -1,34 +0,0 @@ -use serde_json::{Map, Value}; - -use super::Data; -use crate::index::{SearchQuery, SearchResult}; -use crate::index_controller::error::Result; - -impl Data { - pub async fn search(&self, index: String, search_query: SearchQuery) -> Result { - self.index_controller.search(index, search_query).await - } - - pub async fn retrieve_documents( - &self, - index: String, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result>> { - self.index_controller - .documents(index, offset, limit, attributes_to_retrieve) - .await - } - - pub async fn retrieve_document( - &self, - index: String, - document_id: String, - attributes_to_retrieve: Option>, - ) -> Result> { - self.index_controller - .document(index, document_id, attributes_to_retrieve) - .await - } -} diff --git a/meilisearch-http/src/data/updates.rs b/meilisearch-http/src/data/updates.rs deleted file mode 100644 index 8228cd2b2..000000000 --- a/meilisearch-http/src/data/updates.rs +++ /dev/null @@ -1,32 +0,0 @@ -use crate::index_controller::Update; -use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus}; -use crate::Data; - -impl Data { - pub async fn register_update(&self, index_uid: &str, update: Update) -> Result { - let status = self.index_controller.register_update(index_uid, update).await?; - Ok(status) - } - - pub async fn get_update_status(&self, index: String, uid: u64) -> Result { - self.index_controller.update_status(index, uid).await - } - - pub async fn get_updates_status(&self, index: String) -> Result> { - self.index_controller.all_update_status(index).await - } - - pub async fn update_index( - &self, - uid: String, - primary_key: Option, - new_uid: Option, - ) -> Result { - let settings = IndexSettings { - uid: new_uid, - primary_key, - }; - - self.index_controller.update_index(uid, settings).await - } -} diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 61b8dbcd9..c18c32ea5 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -55,18 +55,6 @@ impl aweb::error::ResponseError for ResponseError { } } -macro_rules! internal_error { - ($target:ty : $($other:path), *) => { - $( - impl From<$other> for $target { - fn from(other: $other) -> Self { - Self::Internal(Box::new(other)) - } - } - )* - } -} - #[derive(Debug)] pub struct MilliError<'a>(pub &'a milli::Error); diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index af7e776d7..307bbcefa 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -38,7 +38,6 @@ //! Most of the routes use [extractors] to handle the authentication. #![allow(rustdoc::private_intra_doc_links)] -pub mod data; #[macro_use] pub mod error; #[macro_use] @@ -46,11 +45,8 @@ pub mod extractors; #[cfg(all(not(debug_assertions), feature = "analytics"))] pub mod analytics; pub mod helpers; -mod index; -mod index_controller; pub mod option; pub mod routes; -pub use self::data::Data; use crate::extractors::authentication::AuthConfig; pub use option::Opt; @@ -58,6 +54,7 @@ use actix_web::web; use extractors::authentication::policies::*; use extractors::payload::PayloadConfig; +use meilisearch_lib::MeiliSearch; use sha2::Digest; #[derive(Clone)] @@ -86,14 +83,14 @@ impl ApiKeys { pub fn configure_data( config: &mut web::ServiceConfig, - data: Data, + data: MeiliSearch, opt: &Opt, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config - .app_data(web::Data::new(data.clone())) - // TODO!: Why are we passing the data with two different things? .app_data(data) + // TODO!: Why are we passing the data with two different things? + //.app_data(data) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 3c796f29d..77f439d05 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,7 +1,8 @@ use std::env; use actix_web::HttpServer; -use meilisearch_http::{create_app, Data, Opt}; +use meilisearch_http::{create_app, Opt}; +use meilisearch_lib::MeiliSearch; use structopt::StructOpt; #[cfg(all(not(debug_assertions), feature = "analytics"))] @@ -39,6 +40,26 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { Ok(()) } +fn setup_meilisearch(opt: &Opt) -> anyhow::Result { + let mut meilisearch = MeiliSearch::builder(); + meilisearch + .set_max_index_size(opt.max_index_size.get_bytes() as usize) + .set_max_update_store_size(opt.max_udb_size.get_bytes() as usize) + .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) + .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) + .set_dump_dst(opt.dumps_dir.clone()) + .set_snapshot_dir(opt.snapshot_dir.clone()); + + if let Some(ref path) = opt.import_snapshot { + meilisearch.set_import_snapshot(path.clone()); + } + if let Some(ref path) = opt.import_dump { + meilisearch.set_dump_src(path.clone()); + } + + meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) +} + #[actix_web::main] async fn main() -> anyhow::Result<()> { let opt = Opt::from_args(); @@ -55,23 +76,23 @@ async fn main() -> anyhow::Result<()> { _ => unreachable!(), } - let data = Data::new(opt.clone())?; + let meilisearch = setup_meilisearch(&opt)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] if !opt.no_analytics { - let analytics_data = data.clone(); + let analytics_data = meilisearch.clone(); let analytics_opt = opt.clone(); tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt)); } print_launch_resume(&opt); - run_http(data, opt).await?; + run_http(meilisearch, opt).await?; Ok(()) } -async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> { +async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> { let _enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone)) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 3a0ab8acb..2a4d425e9 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -8,7 +8,6 @@ use std::sync::Arc; use std::fs; use byte_unit::Byte; -use milli::CompressionType; use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use rustls::{ AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth, @@ -16,56 +15,7 @@ use rustls::{ }; use structopt::StructOpt; use sysinfo::{RefreshKind, System, SystemExt}; - -#[derive(Debug, Clone, StructOpt)] -pub struct IndexerOpts { - /// The amount of documents to skip before printing - /// a log regarding the indexing advancement. - #[structopt(long, default_value = "100000")] // 100k - pub log_every_n: usize, - - /// Grenad max number of chunks in bytes. - #[structopt(long)] - pub max_nb_chunks: Option, - - /// The maximum amount of memory the indexer will use. It defaults to 2/3 - /// of the available memory. It is recommended to use something like 80%-90% - /// of the available memory, no more. - /// - /// In case the engine is unable to retrieve the available memory the engine will - /// try to use the memory it needs but without real limit, this can lead to - /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[structopt(long, default_value)] - pub max_memory: MaxMemory, - - /// The name of the compression algorithm to use when compressing intermediate - /// Grenad chunks while indexing documents. - /// - /// Choosing a fast algorithm will make the indexing faster but may consume more memory. - #[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] - pub chunk_compression_type: CompressionType, - - /// The level of compression of the chosen algorithm. - #[structopt(long, requires = "chunk-compression-type")] - pub chunk_compression_level: Option, - - /// Number of parallel jobs for indexing, defaults to # of CPUs. - #[structopt(long)] - pub indexing_jobs: Option, -} - -impl Default for IndexerOpts { - fn default() -> Self { - Self { - log_every_n: 100_000, - max_nb_chunks: None, - max_memory: MaxMemory::default(), - chunk_compression_type: CompressionType::None, - chunk_compression_level: None, - indexing_jobs: None, - } - } -} +use meilisearch_lib::options::IndexerOpts; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 72bc55986..a598f875b 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -1,17 +1,17 @@ use actix_web::{web, HttpResponse}; use log::debug; +use meilisearch_lib::MeiliSearch; use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::Data; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(create_dump))) .service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status))); } -pub async fn create_dump(data: GuardedData) -> Result { +pub async fn create_dump(data: GuardedData) -> Result { let res = data.create_dump().await?; debug!("returns: {:?}", res); @@ -30,10 +30,10 @@ struct DumpParam { } async fn get_dump_status( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { - let res = data.dump_status(path.dump_uid.clone()).await?; + let res = data.dump_info(path.dump_uid.clone()).await?; debug!("returns: {:?}", res); Ok(HttpResponse::Ok().json(res)) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index be80a55a0..b7d13d16b 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -3,6 +3,8 @@ use actix_web::{web, HttpResponse}; use actix_web::web::Bytes; use futures::{Stream, StreamExt}; use log::debug; +use meilisearch_lib::MeiliSearch; +use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update}; use milli::update::IndexDocumentsMethod; use serde::Deserialize; //use serde_json::Value; @@ -11,9 +13,7 @@ use tokio::sync::mpsc; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::payload::Payload; -use crate::index_controller::{DocumentAdditionFormat, Update}; use crate::routes::IndexParam; -use crate::Data; const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0; const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20; @@ -88,20 +88,20 @@ pub fn configure(cfg: &mut web::ServiceConfig) { } pub async fn get_document( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { let index = path.index_uid.clone(); let id = path.document_id.clone(); let document = data - .retrieve_document(index, id, None as Option>) + .document(index, id, None as Option>) .await?; debug!("returns: {:?}", document); Ok(HttpResponse::Ok().json(document)) } //pub async fn delete_document( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //) -> Result { //let update_status = data @@ -120,7 +120,7 @@ pub struct BrowseQuery { } pub async fn get_all_documents( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, ) -> Result { @@ -137,7 +137,7 @@ pub async fn get_all_documents( }); let documents = data - .retrieve_documents( + .documents( path.index_uid.clone(), params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET), params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT), @@ -157,7 +157,7 @@ pub struct UpdateDocumentsQuery { /// Route used when the payload type is "application/json" /// Used to add or replace documents pub async fn add_documents( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, body: Payload, @@ -180,7 +180,7 @@ pub async fn add_documents( /// Route used when the payload type is "application/json" /// Used to add or replace documents pub async fn update_documents( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, body: Payload, @@ -201,7 +201,7 @@ pub async fn update_documents( } //pub async fn delete_documents( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //body: web::Json>, //) -> Result { @@ -221,7 +221,7 @@ pub async fn update_documents( //} //pub async fn clear_all_documents( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //) -> Result { //let update_status = data.clear_documents(path.index_uid.clone()).await?; diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index ef68215b4..da7008640 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -1,12 +1,13 @@ use actix_web::{web, HttpResponse}; use chrono::{DateTime, Utc}; use log::debug; +use meilisearch_lib::MeiliSearch; +use meilisearch_lib::index_controller::IndexSettings; use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::routes::IndexParam; -use crate::Data; pub mod documents; pub mod search; @@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -pub async fn list_indexes(data: GuardedData) -> Result { +pub async fn list_indexes(data: GuardedData) -> Result { let indexes = data.list_indexes().await?; debug!("returns: {:?}", indexes); Ok(HttpResponse::Ok().json(indexes)) @@ -49,7 +50,7 @@ pub struct IndexCreateRequest { } //pub async fn create_index( - //data: GuardedData, + //data: GuardedData, //body: web::Json, //) -> Result { //let body = body.into_inner(); @@ -75,30 +76,34 @@ pub struct UpdateIndexResponse { } pub async fn get_index( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { - let meta = data.index(path.index_uid.clone()).await?; + let meta = data.get_index(path.index_uid.clone()).await?; debug!("returns: {:?}", meta); Ok(HttpResponse::Ok().json(meta)) } pub async fn update_index( - data: GuardedData, + data: GuardedData, path: web::Path, body: web::Json, ) -> Result { debug!("called with params: {:?}", body); let body = body.into_inner(); + let settings = IndexSettings { + uid: body.uid, + primary_key: body.primary_key, + }; let meta = data - .update_index(path.into_inner().index_uid, body.primary_key, body.uid) + .update_index(path.into_inner().index_uid, settings) .await?; debug!("returns: {:?}", meta); Ok(HttpResponse::Ok().json(meta)) } //pub async fn delete_index( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //) -> Result { //data.delete_index(path.index_uid.clone()).await?; @@ -106,7 +111,7 @@ pub async fn update_index( //} pub async fn get_index_stats( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { let response = data.get_index_stats(path.index_uid.clone()).await?; diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 83f58648e..2b68e6ed6 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -1,13 +1,13 @@ use actix_web::{web, HttpResponse}; use log::debug; +use meilisearch_lib::MeiliSearch; +use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use serde::Deserialize; use serde_json::Value; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use crate::routes::IndexParam; -use crate::Data; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( @@ -82,7 +82,7 @@ impl From for SearchQuery { } pub async fn search_with_url_query( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, ) -> Result { @@ -99,7 +99,7 @@ pub async fn search_with_url_query( } pub async fn search_with_post( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Json, ) -> Result { diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 051483b20..07a96003f 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -148,7 +148,7 @@ //); //pub async fn update_all( - //data: GuardedData, + //data: GuardedData, //index_uid: web::Path, //body: web::Json>, //) -> Result { @@ -162,7 +162,7 @@ //} //pub async fn get_all( - //data: GuardedData, + //data: GuardedData, //index_uid: web::Path, //) -> Result { //let settings = data.settings(index_uid.into_inner()).await?; @@ -171,7 +171,7 @@ //} //pub async fn delete_all( - //data: GuardedData, + //data: GuardedData, //index_uid: web::Path, //) -> Result { //let settings = Settings::cleared(); diff --git a/meilisearch-http/src/routes/indexes/updates.rs b/meilisearch-http/src/routes/indexes/updates.rs index 471636abf..547977790 100644 --- a/meilisearch-http/src/routes/indexes/updates.rs +++ b/meilisearch-http/src/routes/indexes/updates.rs @@ -1,12 +1,12 @@ use actix_web::{web, HttpResponse}; use chrono::{DateTime, Utc}; use log::debug; +use meilisearch_lib::MeiliSearch; use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::routes::{IndexParam, UpdateStatusResponse}; -use crate::Data; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::get().to(get_all_updates_status))) @@ -37,12 +37,12 @@ pub struct UpdateParam { } pub async fn get_update_status( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { let params = path.into_inner(); let meta = data - .get_update_status(params.index_uid, params.update_id) + .update_status(params.index_uid, params.update_id) .await?; let meta = UpdateStatusResponse::from(meta); debug!("returns: {:?}", meta); @@ -50,10 +50,10 @@ pub async fn get_update_status( } pub async fn get_all_updates_status( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { - let metas = data.get_updates_status(path.into_inner().index_uid).await?; + let metas = data.all_update_status(path.into_inner().index_uid).await?; let metas = metas .into_iter() .map(UpdateStatusResponse::from) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 12b0612ad..6c99d1766 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -5,12 +5,12 @@ use chrono::{DateTime, Utc}; use log::debug; use serde::{Deserialize, Serialize}; +use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate}; +use meilisearch_lib::index::{Settings, Unchecked}; + use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::index::{Settings, Unchecked}; -use crate::index_controller::update_actor::RegisterUpdate; -use crate::index_controller::{UpdateResult, UpdateStatus}; -use crate::{ApiKeys, Data}; +use crate::ApiKeys; mod dump; mod indexes; @@ -187,15 +187,17 @@ impl From for UpdateStatusResponse { let duration = Duration::from_millis(duration as u64).as_secs_f64(); let update_id = failed.id(); - let response = failed.error; + let processed_at = failed.failed_at; + let enqueued_at = failed.from.from.enqueued_at; + let response = failed.into(); let content = FailedUpdateResult { update_id, update_type, response, duration, - enqueued_at: failed.from.from.enqueued_at, - processed_at: failed.failed_at, + enqueued_at, + processed_at, }; UpdateStatusResponse::Failed { content } } @@ -230,7 +232,7 @@ pub async fn running() -> HttpResponse { HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" })) } -async fn get_stats(data: GuardedData) -> Result { +async fn get_stats(data: GuardedData) -> Result { let response = data.get_all_stats().await?; debug!("returns: {:?}", response); @@ -245,7 +247,7 @@ struct VersionResponse { pkg_version: String, } -async fn get_version(_data: GuardedData) -> HttpResponse { +async fn get_version(_data: GuardedData) -> HttpResponse { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); @@ -288,7 +290,7 @@ mod test { macro_rules! impl_is_policy { ($($param:ident)*) => { impl Is for Func - where Func: Fn(GuardedData, $($param,)*) -> Res {} + where Func: Fn(GuardedData, $($param,)*) -> Res {} }; } diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml new file mode 100644 index 000000000..7ef4ecad7 --- /dev/null +++ b/meilisearch-lib/Cargo.toml @@ -0,0 +1,72 @@ +[package] +name = "meilisearch-lib" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" } +actix-web = { version = "4.0.0-beta.9", features = ["rustls"] } +actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true } +anyhow = { version = "1.0.43", features = ["backtrace"] } +async-stream = "0.3.2" +async-trait = "0.1.51" +arc-swap = "1.3.2" +byte-unit = { version = "4.0.12", default-features = false, features = ["std"] } +bytes = "1.1.0" +chrono = { version = "0.4.19", features = ["serde"] } +crossbeam-channel = "0.5.1" +either = "1.6.1" +env_logger = "0.9.0" +flate2 = "1.0.21" +fst = "0.4.7" +futures = "0.3.17" +futures-util = "0.3.17" +heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } +http = "0.2.4" +indexmap = { version = "1.7.0", features = ["serde-1"] } +itertools = "0.10.1" +log = "0.4.14" +main_error = "0.1.1" +meilisearch-error = { path = "../meilisearch-error" } +meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } +memmap = "0.7.0" +milli = { git = "https://github.com/meilisearch/milli.git", rev = "6de1b41" } +mime = "0.3.16" +num_cpus = "1.13.0" +once_cell = "1.8.0" +parking_lot = "0.11.2" +rand = "0.8.4" +rayon = "1.5.1" +regex = "1.5.4" +rustls = "0.19.1" +serde = { version = "1.0.130", features = ["derive"] } +serde_json = { version = "1.0.67", features = ["preserve_order"] } +sha2 = "0.9.6" +siphasher = "0.3.7" +slice-group-by = "0.2.6" +structopt = "0.3.23" +tar = "0.4.37" +tempfile = "3.2.0" +thiserror = "1.0.28" +tokio = { version = "1.11.0", features = ["full"] } +uuid = { version = "0.8.2", features = ["serde"] } +walkdir = "2.3.2" +obkv = "0.2.0" +pin-project = "1.0.8" +whoami = { version = "1.1.3", optional = true } +reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true } +serdeval = "0.1.0" +sysinfo = "0.20.2" +tokio-stream = "0.1.7" +erased-serde = "0.3.16" + +[dev-dependencies] +actix-rt = "2.2.0" +assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" } +mockall = "0.10.2" +paste = "1.0.5" +serde_url_params = "0.2.1" +tempdir = "0.3.7" +urlencoding = "2.1.0" diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs new file mode 100644 index 000000000..80141dae5 --- /dev/null +++ b/meilisearch-lib/src/error.rs @@ -0,0 +1,62 @@ +use std::error::Error; +use std::fmt; + +use meilisearch_error::{Code, ErrorCode}; +use milli::UserError; + +macro_rules! internal_error { + ($target:ty : $($other:path), *) => { + $( + impl From<$other> for $target { + fn from(other: $other) -> Self { + Self::Internal(Box::new(other)) + } + } + )* + } +} + +#[derive(Debug)] +pub struct MilliError<'a>(pub &'a milli::Error); + +impl Error for MilliError<'_> {} + +impl fmt::Display for MilliError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl ErrorCode for MilliError<'_> { + fn error_code(&self) -> Code { + match self.0 { + milli::Error::InternalError(_) => Code::Internal, + milli::Error::IoError(_) => Code::Internal, + milli::Error::UserError(ref error) => { + match error { + // TODO: wait for spec for new error codes. + | UserError::SerdeJson(_) + | UserError::MaxDatabaseSizeReached + | UserError::InvalidCriterionName { .. } + | UserError::InvalidDocumentId { .. } + | UserError::InvalidStoreFile + | UserError::NoSpaceLeftOnDevice + | UserError::InvalidAscDescSyntax { .. } + | UserError::DocumentLimitReached => Code::Internal, + UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, + UserError::InvalidFilter(_) => Code::Filter, + UserError::InvalidFilterAttribute(_) => Code::Filter, + UserError::InvalidSortName { .. } => Code::Sort, + UserError::MissingDocumentId { .. } => Code::MissingDocumentId, + UserError::MissingPrimaryKey => Code::MissingPrimaryKey, + UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, + UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent, + UserError::SortRankingRuleMissing => Code::Sort, + UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, + UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, + UserError::InvalidSortableAttribute { .. } => Code::Sort, + } + } + } + } +} diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs similarity index 99% rename from meilisearch-http/src/index/dump.rs rename to meilisearch-lib/src/index/dump.rs index 9c8acf960..018ae6d2f 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -6,7 +6,7 @@ use heed::RoTxn; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; use super::error::Result; use super::{Index, Settings, Unchecked}; diff --git a/meilisearch-http/src/index/error.rs b/meilisearch-lib/src/index/error.rs similarity index 100% rename from meilisearch-http/src/index/error.rs rename to meilisearch-lib/src/index/error.rs diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs similarity index 99% rename from meilisearch-http/src/index/mod.rs rename to meilisearch-lib/src/index/mod.rs index 1ea481ec9..c05e337e2 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -14,7 +14,7 @@ use error::Result; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Checked, Facets, Settings, Unchecked}; -use crate::helpers::EnvSizer; +use crate::EnvSizer; use crate::index_controller::update_file_store::UpdateFileStore; use self::error::IndexError; diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-lib/src/index/search.rs similarity index 100% rename from meilisearch-http/src/index/search.rs rename to meilisearch-lib/src/index/search.rs diff --git a/meilisearch-http/src/index/update_handler.rs b/meilisearch-lib/src/index/update_handler.rs similarity index 96% rename from meilisearch-http/src/index/update_handler.rs rename to meilisearch-lib/src/index/update_handler.rs index 0ad71d313..8fba55341 100644 --- a/meilisearch-http/src/index/update_handler.rs +++ b/meilisearch-lib/src/index/update_handler.rs @@ -5,7 +5,7 @@ use rayon::ThreadPool; use crate::index_controller::update_actor::RegisterUpdate; use crate::index_controller::{Failed, Processed, Processing}; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; pub struct UpdateHandler { max_nb_chunks: Option, @@ -66,7 +66,7 @@ impl UpdateHandler { match result { Ok(result) => Ok(meta.process(result)), - Err(e) => Err(meta.fail(e.into())), + Err(e) => Err(meta.fail(e)), } } } diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs similarity index 100% rename from meilisearch-http/src/index/updates.rs rename to meilisearch-lib/src/index/updates.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/index_controller/dump_actor/actor.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/actor.rs rename to meilisearch-lib/src/index_controller/dump_actor/actor.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/error.rs b/meilisearch-lib/src/index_controller/dump_actor/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/error.rs rename to meilisearch-lib/src/index_controller/dump_actor/error.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/handle_impl.rs rename to meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs rename to meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs similarity index 99% rename from meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs rename to meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 21893eb49..584828b4e 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -11,7 +11,7 @@ use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata} use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::{ index::Unchecked, - option::IndexerOpts, + options::IndexerOpts, }; #[derive(Serialize, Deserialize, Debug)] diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs similarity index 97% rename from meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs rename to meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs index eddd8a3b7..c39da1e44 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use crate::index::Index; use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] diff --git a/meilisearch-http/src/index_controller/dump_actor/message.rs b/meilisearch-lib/src/index_controller/dump_actor/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/message.rs rename to meilisearch-lib/src/index_controller/dump_actor/message.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs similarity index 96% rename from meilisearch-http/src/index_controller/dump_actor/mod.rs rename to meilisearch-lib/src/index_controller/dump_actor/mod.rs index c8aad6815..e0f9535f3 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -18,7 +18,7 @@ pub use message::DumpMsg; use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; use crate::index_controller::dump_actor::error::DumpActorError; -use crate::{helpers::compression, option::IndexerOpts}; +use crate::options::IndexerOpts; use error::Result; mod actor; @@ -112,7 +112,7 @@ pub fn load_dump( let tmp_src = tempfile::tempdir_in(".")?; let tmp_src_path = tmp_src.path(); - compression::from_tar_gz(&src_path, tmp_src_path)?; + crate::from_tar_gz(&src_path, tmp_src_path)?; let meta_path = tmp_src_path.join(META_FILE_NAME); let mut meta_file = File::open(&meta_path)?; @@ -162,6 +162,7 @@ impl DumpTask where U: UuidResolverHandle + Send + Sync + Clone + 'static, P: UpdateActorHandle + Send + Sync + Clone + 'static, + { async fn run(self) -> Result<()> { trace!("Performing dump."); @@ -186,7 +187,7 @@ where let dump_path = tokio::task::spawn_blocking(move || -> Result { let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; - compression::to_tar_gz(temp_dump_path, temp_dump_file.path()) + crate::to_tar_gz(temp_dump_path, temp_dump_file.path()) .map_err(|e| DumpActorError::Internal(e.into()))?; let dump_path = self.path.join(self.uid).with_extension("dump"); diff --git a/meilisearch-http/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/error.rs rename to meilisearch-lib/src/index_controller/error.rs diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-lib/src/index_controller/index_actor/actor.rs similarity index 99% rename from meilisearch-http/src/index_controller/index_actor/actor.rs rename to meilisearch-lib/src/index_controller/index_actor/actor.rs index abc08788e..cee656b97 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/index_actor/actor.rs @@ -16,7 +16,7 @@ use crate::index::{ use crate::index_controller::{ get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing, }; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; use super::error::{IndexActorError, Result}; use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore}; @@ -29,7 +29,9 @@ pub struct IndexActor { store: S, } -impl IndexActor { +impl IndexActor +where S: IndexStore + Sync + Send, +{ pub fn new( receiver: mpsc::Receiver, store: S, diff --git a/meilisearch-http/src/index_controller/index_actor/error.rs b/meilisearch-lib/src/index_controller/index_actor/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/error.rs rename to meilisearch-lib/src/index_controller/index_actor/error.rs diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs similarity index 99% rename from meilisearch-http/src/index_controller/index_actor/handle_impl.rs rename to meilisearch-lib/src/index_controller/index_actor/handle_impl.rs index efc104c54..8cc66edee 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs @@ -1,4 +1,4 @@ -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-lib/src/index_controller/index_actor/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/message.rs rename to meilisearch-lib/src/index_controller/index_actor/message.rs diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-lib/src/index_controller/index_actor/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/mod.rs rename to meilisearch-lib/src/index_controller/index_actor/mod.rs diff --git a/meilisearch-http/src/index_controller/index_actor/store.rs b/meilisearch-lib/src/index_controller/index_actor/store.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/store.rs rename to meilisearch-lib/src/index_controller/index_actor/store.rs diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs similarity index 82% rename from meilisearch-http/src/index_controller/mod.rs rename to meilisearch-lib/src/index_controller/mod.rs index 88a219530..73df4eee6 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -22,8 +22,8 @@ use update_actor::UpdateActorHandle; pub use updates::*; use uuid_resolver::{error::UuidResolverError, UuidResolverHandle}; +use crate::options::IndexerOpts; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; -use crate::option::Opt; use error::Result; use self::dump_actor::load_dump; @@ -99,45 +99,58 @@ pub enum Update { } } -impl IndexController { - pub fn new(path: impl AsRef, options: &Opt) -> anyhow::Result { - let index_size = options.max_index_size.get_bytes() as usize; - let update_store_size = options.max_index_size.get_bytes() as usize; +#[derive(Default, Debug)] +pub struct IndexControllerBuilder { + max_index_size: Option, + max_update_store_size: Option, + snapshot_dir: Option, + import_snapshot: Option, + ignore_snapshot_if_db_exists: bool, + ignore_missing_snapshot: bool, + dump_src: Option, + dump_dst: Option, +} - if let Some(ref path) = options.import_snapshot { +impl IndexControllerBuilder { + pub fn build(self, db_path: impl AsRef, indexer_options: IndexerOpts) -> anyhow::Result { + let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?; + let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; + + if let Some(ref path) = self.import_snapshot { info!("Loading from snapshot {:?}", path); load_snapshot( - &options.db_path, + db_path.as_ref(), path, - options.ignore_snapshot_if_db_exists, - options.ignore_missing_snapshot, + self.ignore_snapshot_if_db_exists, + self.ignore_missing_snapshot, )?; - } else if let Some(ref src_path) = options.import_dump { + } else if let Some(ref src_path) = self.dump_src { load_dump( - &options.db_path, + db_path.as_ref(), src_path, - options.max_index_size.get_bytes() as usize, - options.max_udb_size.get_bytes() as usize, - &options.indexer_options, + index_size, + update_store_size, + &indexer_options, )?; } - std::fs::create_dir_all(&path)?; + std::fs::create_dir_all(db_path.as_ref())?; - let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?; + let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?; let index_handle = - index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?; + index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?; let update_handle = update_actor::UpdateActorHandleImpl::new( index_handle.clone(), - &path, + &db_path, update_store_size, )?; + let dump_handle = dump_actor::DumpActorHandleImpl::new( - &options.dumps_dir, + &self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?, uuid_resolver.clone(), update_handle.clone(), - options.max_index_size.get_bytes() as usize, - options.max_udb_size.get_bytes() as usize, + index_size, + update_store_size, )?; //if options.schedule_snapshot { @@ -156,7 +169,7 @@ impl IndexController { //tokio::task::spawn(snapshot_service.run()); //} - Ok(Self { + Ok(IndexController { uuid_resolver, index_handle, update_handle, @@ -164,6 +177,59 @@ impl IndexController { }) } + /// Set the index controller builder's max update store size. + pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self { + self.max_update_store_size.replace(max_update_store_size); + self + } + + pub fn set_max_index_size(&mut self, size: usize) -> &mut Self { + self.max_index_size.replace(size); + self + } + + /// Set the index controller builder's snapshot path. + pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self { + self.snapshot_dir.replace(snapshot_dir); + self + } + + /// Set the index controller builder's ignore snapshot if db exists. + pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self { + self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists; + self + } + + /// Set the index controller builder's ignore missing snapshot. + pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self { + self.ignore_missing_snapshot = ignore_missing_snapshot; + self + } + + /// Set the index controller builder's dump src. + pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { + self.dump_src.replace(dump_src); + self + } + + /// Set the index controller builder's dump dst. + pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { + self.dump_dst.replace(dump_dst); + self + } + + /// Set the index controller builder's import snapshot. + pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { + self.import_snapshot.replace(import_snapshot); + self + } +} + +impl IndexController { + pub fn builder() -> IndexControllerBuilder { + IndexControllerBuilder::default() + } + pub async fn register_update(&self, uid: &str, update: Update) -> Result { match self.uuid_resolver.get(uid.to_string()).await { Ok(uuid) => { diff --git a/meilisearch-http/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs similarity index 98% rename from meilisearch-http/src/index_controller/snapshot.rs rename to meilisearch-lib/src/index_controller/snapshot.rs index 6c5171d62..c2f600bbc 100644 --- a/meilisearch-http/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -2,8 +2,6 @@ use std::path::Path; use anyhow::bail; -use crate::helpers::compression; - //pub struct SnapshotService { //uuid_resolver_handle: R, //update_handle: U, @@ -93,7 +91,7 @@ pub fn load_snapshot( ignore_missing_snapshot: bool, ) -> anyhow::Result<()> { if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { - match compression::from_tar_gz(snapshot_path, &db_path) { + match crate::from_tar_gz(snapshot_path, &db_path) { Ok(()) => Ok(()), Err(e) => { // clean created db folder diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-lib/src/index_controller/update_actor/actor.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/actor.rs rename to meilisearch-lib/src/index_controller/update_actor/actor.rs diff --git a/meilisearch-http/src/index_controller/update_actor/error.rs b/meilisearch-lib/src/index_controller/update_actor/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/error.rs rename to meilisearch-lib/src/index_controller/update_actor/error.rs diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/update_actor/handle_impl.rs similarity index 99% rename from meilisearch-http/src/index_controller/update_actor/handle_impl.rs rename to meilisearch-lib/src/index_controller/update_actor/handle_impl.rs index 5175f2eb5..e1df0b5d4 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/update_actor/handle_impl.rs @@ -41,6 +41,7 @@ impl UpdateActorHandle for UpdateActorHandleImpl { self.sender.send(msg).await?; receiver.await? } + async fn update_status(&self, uuid: Uuid, id: u64) -> Result { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::GetUpdate { uuid, id, ret }; diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-lib/src/index_controller/update_actor/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/message.rs rename to meilisearch-lib/src/index_controller/update_actor/message.rs diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-lib/src/index_controller/update_actor/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/mod.rs rename to meilisearch-lib/src/index_controller/update_actor/mod.rs diff --git a/meilisearch-http/src/index_controller/update_actor/store/codec.rs b/meilisearch-lib/src/index_controller/update_actor/store/codec.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/store/codec.rs rename to meilisearch-lib/src/index_controller/update_actor/store/codec.rs diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-lib/src/index_controller/update_actor/store/dump.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/store/dump.rs rename to meilisearch-lib/src/index_controller/update_actor/store/dump.rs diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-lib/src/index_controller/update_actor/store/mod.rs similarity index 99% rename from meilisearch-http/src/index_controller/update_actor/store/mod.rs rename to meilisearch-lib/src/index_controller/update_actor/store/mod.rs index 2dd758b82..62fcbd5ad 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-lib/src/index_controller/update_actor/store/mod.rs @@ -28,7 +28,7 @@ use codec::*; use super::RegisterUpdate; use super::error::Result; -use crate::helpers::EnvSizer; +use crate::EnvSizer; use crate::index_controller::update_files_path; use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; @@ -323,7 +323,7 @@ impl UpdateStore { let result = match handle.block_on(index_handle.update(index_uuid, processing.clone())) { Ok(result) => result, - Err(e) => Err(processing.fail(e.into())), + Err(e) => Err(processing.fail(e)), }; // Once the pending update have been successfully processed diff --git a/meilisearch-http/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_file_store.rs rename to meilisearch-lib/src/index_controller/update_file_store.rs diff --git a/meilisearch-http/src/index_controller/updates.rs b/meilisearch-lib/src/index_controller/updates.rs similarity index 89% rename from meilisearch-http/src/index_controller/updates.rs rename to meilisearch-lib/src/index_controller/updates.rs index 7065b0462..efe48e5e5 100644 --- a/meilisearch-http/src/index_controller/updates.rs +++ b/meilisearch-lib/src/index_controller/updates.rs @@ -1,11 +1,12 @@ +use std::{error::Error, fmt::Display}; + use chrono::{DateTime, Utc}; + +use meilisearch_error::{Code, ErrorCode}; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; -use crate::{ - error::ResponseError, - index::{Settings, Unchecked}, -}; +use crate::index::{Settings, Unchecked}; use super::update_actor::RegisterUpdate; @@ -115,10 +116,13 @@ impl Processing { } } - pub fn fail(self, error: ResponseError) -> Failed { + pub fn fail(self, error: impl ErrorCode) -> Failed { + let msg = error.to_string(); + let code = error.error_code(); Failed { from: self, - error, + msg, + code, failed_at: Utc::now(), } } @@ -147,10 +151,25 @@ impl Aborted { pub struct Failed { #[serde(flatten)] pub from: Processing, - pub error: ResponseError, + pub msg: String, + pub code: Code, pub failed_at: DateTime, } +impl Display for Failed { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.msg.fmt(f) + } +} + +impl Error for Failed { } + +impl ErrorCode for Failed { + fn error_code(&self) -> Code { + self.code + } +} + impl Failed { pub fn id(&self) -> u64 { self.from.id() diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-lib/src/index_controller/uuid_resolver/actor.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/actor.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/actor.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/error.rs b/meilisearch-lib/src/index_controller/uuid_resolver/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/error.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/error.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-lib/src/index_controller/uuid_resolver/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/message.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/message.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/mod.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/mod.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-lib/src/index_controller/uuid_resolver/store.rs similarity index 99% rename from meilisearch-http/src/index_controller/uuid_resolver/store.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/store.rs index f02d22d7f..5457ab91d 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-lib/src/index_controller/uuid_resolver/store.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::{error::UuidResolverError, Result, UUID_STORE_SIZE}; -use crate::helpers::EnvSizer; +use crate::EnvSizer; #[derive(Serialize, Deserialize)] struct DumpEntry { diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs new file mode 100644 index 000000000..9f6be4361 --- /dev/null +++ b/meilisearch-lib/src/lib.rs @@ -0,0 +1,53 @@ +#[macro_use] +pub mod error; +pub mod options; + +pub mod index; +pub mod index_controller; + +pub use index_controller::{UpdateResult, UpdateStatus, IndexController as MeiliSearch, update_actor::RegisterUpdate}; + +use walkdir::WalkDir; + +pub trait EnvSizer { + fn size(&self) -> u64; +} + +impl EnvSizer for heed::Env { + fn size(&self) -> u64 { + WalkDir::new(self.path()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len()) + } +} + +use std::fs::{create_dir_all, File}; +use std::io::Write; +use std::path::Path; + +use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use tar::{Archive, Builder}; + +pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { + let mut f = File::create(dest)?; + let gz_encoder = GzEncoder::new(&mut f, Compression::default()); + let mut tar_encoder = Builder::new(gz_encoder); + tar_encoder.append_dir_all(".", src)?; + let gz_encoder = tar_encoder.into_inner()?; + gz_encoder.finish()?; + f.flush()?; + Ok(()) +} + +pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { + let f = File::open(&src)?; + let gz = GzDecoder::new(f); + let mut ar = Archive::new(gz); + create_dir_all(&dest)?; + ar.unpack(&dest)?; + Ok(()) +} + diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs new file mode 100644 index 000000000..f4b992f2e --- /dev/null +++ b/meilisearch-lib/src/options.rs @@ -0,0 +1,115 @@ +use core::fmt; +use std::{ops::Deref, str::FromStr}; + +use byte_unit::{Byte, ByteError}; +use milli::CompressionType; +use structopt::StructOpt; +use sysinfo::{RefreshKind, System, SystemExt}; + +#[derive(Debug, Clone, StructOpt)] +pub struct IndexerOpts { + /// The amount of documents to skip before printing + /// a log regarding the indexing advancement. + #[structopt(long, default_value = "100000")] // 100k + pub log_every_n: usize, + + /// Grenad max number of chunks in bytes. + #[structopt(long)] + pub max_nb_chunks: Option, + + /// The maximum amount of memory the indexer will use. It defaults to 2/3 + /// of the available memory. It is recommended to use something like 80%-90% + /// of the available memory, no more. + /// + /// In case the engine is unable to retrieve the available memory the engine will + /// try to use the memory it needs but without real limit, this can lead to + /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. + #[structopt(long, default_value)] + pub max_memory: MaxMemory, + + /// The name of the compression algorithm to use when compressing intermediate + /// Grenad chunks while indexing documents. + /// + /// Choosing a fast algorithm will make the indexing faster but may consume more memory. + #[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] + pub chunk_compression_type: CompressionType, + + /// The level of compression of the chosen algorithm. + #[structopt(long, requires = "chunk-compression-type")] + pub chunk_compression_level: Option, + + /// Number of parallel jobs for indexing, defaults to # of CPUs. + #[structopt(long)] + pub indexing_jobs: Option, +} + +impl Default for IndexerOpts { + fn default() -> Self { + Self { + log_every_n: 100_000, + max_nb_chunks: None, + max_memory: MaxMemory::default(), + chunk_compression_type: CompressionType::None, + chunk_compression_level: None, + indexing_jobs: None, + } + } +} + +/// A type used to detect the max memory available and use 2/3 of it. +#[derive(Debug, Clone, Copy)] +pub struct MaxMemory(Option); + +impl FromStr for MaxMemory { + type Err = ByteError; + + fn from_str(s: &str) -> Result { + Byte::from_str(s).map(Some).map(MaxMemory) + } +} + +impl Default for MaxMemory { + fn default() -> MaxMemory { + MaxMemory( + total_memory_bytes() + .map(|bytes| bytes * 2 / 3) + .map(Byte::from_bytes), + ) + } +} + +impl fmt::Display for MaxMemory { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), + None => f.write_str("unknown"), + } + } +} + +impl Deref for MaxMemory { + type Target = Option; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl MaxMemory { + pub fn unlimited() -> Self { + Self(None) + } +} + +/// Returns the total amount of bytes available or `None` if this system isn't supported. +fn total_memory_bytes() -> Option { + if System::IS_SUPPORTED { + let memory_kind = RefreshKind::new().with_memory(); + let mut system = System::new_with_specifics(memory_kind); + system.refresh_memory(); + Some(system.total_memory() * 1024) // KiB into bytes + } else { + None + } +} +