From e14640e530daca3fe9aeaf52255b5f6f4f0a25b9 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 14 Sep 2021 18:39:02 +0200 Subject: [PATCH 01/37] refactor meilisearch --- Cargo.lock | 104 ++++-- meilisearch-http/Cargo.toml | 5 +- meilisearch-http/src/data/mod.rs | 26 +- meilisearch-http/src/data/updates.rs | 56 +-- meilisearch-http/src/error.rs | 1 - meilisearch-http/src/index/dump.rs | 154 ++++----- meilisearch-http/src/index/mod.rs | 14 +- meilisearch-http/src/index/search.rs | 2 +- meilisearch-http/src/index/update_handler.rs | 27 +- meilisearch-http/src/index/updates.rs | 230 ++++++------- .../dump_actor/handle_impl.rs | 3 +- .../index_controller/dump_actor/loaders/v1.rs | 104 +++--- .../src/index_controller/index_actor/actor.rs | 10 +- .../index_actor/handle_impl.rs | 4 +- .../index_controller/index_actor/message.rs | 1 - .../src/index_controller/index_actor/mod.rs | 2 - .../src/index_controller/index_actor/store.rs | 10 +- meilisearch-http/src/index_controller/mod.rs | 299 +++++++++------- .../src/index_controller/snapshot.rs | 153 ++++----- .../index_controller/update_actor/actor.rs | 180 +++++----- .../update_actor/handle_impl.rs | 30 +- .../index_controller/update_actor/message.rs | 9 +- .../src/index_controller/update_actor/mod.rs | 23 +- .../update_actor/store/dump.rs | 128 +++---- .../update_actor/store/mod.rs | 149 ++++---- .../src/index_controller/update_file_store.rs | 63 ++++ .../src/index_controller/updates.rs | 24 +- meilisearch-http/src/main.rs | 37 +- meilisearch-http/src/option.rs | 28 +- .../src/routes/indexes/documents.rs | 132 +++---- meilisearch-http/src/routes/indexes/mod.rs | 36 +- .../src/routes/indexes/settings.rs | 325 +++++++++--------- meilisearch-http/src/routes/mod.rs | 19 +- 33 files changed, 1222 insertions(+), 1166 deletions(-) create mode 100644 meilisearch-http/src/index_controller/update_file_store.rs diff --git a/Cargo.lock b/Cargo.lock index 1f216ffc7..809535e1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -235,6 +235,15 @@ dependencies = [ "path-slash", ] +[[package]] +name = "addr2line" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61f2b7f93d2c7d2b08263acaa4a363b3e276806c68af6134c44f523bf1aacd" +dependencies = [ + "gimli", +] + [[package]] name = "adler" version = "1.0.2" @@ -281,6 +290,9 @@ name = "anyhow" version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1" +dependencies = [ + "backtrace", +] [[package]] name = "arc-swap" @@ -346,6 +358,21 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "backtrace" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7a905d892734eea339e896738c14b9afce22b5318f64b951e70bf3844419b01" +dependencies = [ + "addr2line", + "cc", + "cfg-if 1.0.0", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "base-x" version = "0.2.8" @@ -358,6 +385,15 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "bimap" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50ae17cabbc8a38a1e3e4c1a6a664e9a09672dc14d0896fa8d865d3a5a446b07" +dependencies = [ + "serde", +] + [[package]] name = "bincode" version = "1.3.3" @@ -432,7 +468,6 @@ dependencies = [ "lazy_static", "memchr", "regex-automata", - "serde", ] [[package]] @@ -734,28 +769,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "csv" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" -dependencies = [ - "bstr", - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - [[package]] name = "derive_more" version = "0.99.16" @@ -1089,6 +1102,12 @@ dependencies = [ "syn 1.0.76", ] +[[package]] +name = "gimli" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0a01e0497841a3b2db4f8afa483cce65f7e96a3498bd6c541734792aeac8fe7" + [[package]] name = "git2" version = "0.13.22" @@ -1618,6 +1637,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", + "tokio-stream", "urlencoding", "uuid", "vergen", @@ -1670,14 +1690,15 @@ dependencies = [ [[package]] name = "milli" version = "0.13.1" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.13.1#90d64d257fa944ab2ee1572193e501bb231627c7" +source = "git+https://github.com/meilisearch/milli.git?rev=6de1b41#6de1b41f791e7d117634e63783d78b29b5228a99" dependencies = [ + "bimap", + "bincode", "bstr", "byteorder", "chrono", "concat-arrays", "crossbeam-channel", - "csv", "either", "flate2", "fst", @@ -1706,6 +1727,7 @@ dependencies = [ "smallvec", "tempfile", "uuid", + "vec-utils", ] [[package]] @@ -1827,6 +1849,15 @@ dependencies = [ "libc", ] +[[package]] +name = "object" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39f37e50073ccad23b6d09bcb5b263f4e76d3bb6038e4a3c08e52162ffa8abc2" +dependencies = [ + "memchr", +] + [[package]] name = "obkv" version = "0.2.0" @@ -2367,6 +2398,12 @@ dependencies = [ "retain_mut", ] +[[package]] +name = "rustc-demangle" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" + [[package]] name = "rustc_version" version = "0.2.3" @@ -2959,6 +2996,17 @@ dependencies = [ "webpki", ] +[[package]] +name = "tokio-stream" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2f3f698253f03119ac0102beaa64f67a67e08074d03a22d18784104543727f" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.6.8" @@ -3126,6 +3174,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vec-utils" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dac984aa016c26ef4ed7b2c30d6a1bd570fd40a078caccaf6415a2ac5d96161" + [[package]] name = "vec_map" version = "0.8.2" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 7d5b92a87..02e72668b 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -25,7 +25,7 @@ zip = { version = "0.5.13", optional = true } actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" } actix-web = { version = "4.0.0-beta.9", features = ["rustls"] } actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true } -anyhow = "1.0.43" +anyhow = { version = "1.0.43", features = ["backtrace"] } async-stream = "0.3.2" async-trait = "0.1.51" arc-swap = "1.3.2" @@ -48,7 +48,7 @@ main_error = "0.1.1" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.13.1" } +milli = { git = "https://github.com/meilisearch/milli.git", rev = "6de1b41" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" @@ -75,6 +75,7 @@ whoami = { version = "1.1.3", optional = true } reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true } serdeval = "0.1.0" sysinfo = "0.20.2" +tokio-stream = "0.1.7" [dev-dependencies] actix-rt = "2.2.0" diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 48dfcfa06..c0e83155c 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -5,7 +5,7 @@ use sha2::Digest; use crate::index::{Checked, Settings}; use crate::index_controller::{ - error::Result, DumpInfo, IndexController, IndexMetadata, IndexSettings, IndexStats, Stats, + error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats, }; use crate::option::Opt; @@ -91,19 +91,19 @@ impl Data { self.index_controller.get_index(uid).await } - pub async fn create_index( - &self, - uid: String, - primary_key: Option, - ) -> Result { - let settings = IndexSettings { - uid: Some(uid), - primary_key, - }; + //pub async fn create_index( + //&self, + //uid: String, + //primary_key: Option, + //) -> Result { + //let settings = IndexSettings { + //uid: Some(uid), + //primary_key, + //}; - let meta = self.index_controller.create_index(settings).await?; - Ok(meta) - } + //let meta = self.index_controller.create_index(settings).await?; + //Ok(meta) + //} pub async fn get_index_stats(&self, uid: String) -> Result { Ok(self.index_controller.get_index_stats(uid).await?) diff --git a/meilisearch-http/src/data/updates.rs b/meilisearch-http/src/data/updates.rs index 4e38294e9..8228cd2b2 100644 --- a/meilisearch-http/src/data/updates.rs +++ b/meilisearch-http/src/data/updates.rs @@ -1,59 +1,11 @@ -use milli::update::{IndexDocumentsMethod, UpdateFormat}; - -use crate::extractors::payload::Payload; -use crate::index::{Checked, Settings}; +use crate::index_controller::Update; use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus}; use crate::Data; impl Data { - pub async fn add_documents( - &self, - index: String, - method: IndexDocumentsMethod, - format: UpdateFormat, - stream: Payload, - primary_key: Option, - ) -> Result { - let update_status = self - .index_controller - .add_documents(index, method, format, stream, primary_key) - .await?; - Ok(update_status) - } - - pub async fn update_settings( - &self, - index: String, - settings: Settings, - create: bool, - ) -> Result { - let update = self - .index_controller - .update_settings(index, settings, create) - .await?; - Ok(update) - } - - pub async fn clear_documents(&self, index: String) -> Result { - let update = self.index_controller.clear_documents(index).await?; - Ok(update) - } - - pub async fn delete_documents( - &self, - index: String, - document_ids: Vec, - ) -> Result { - let update = self - .index_controller - .delete_documents(index, document_ids) - .await?; - Ok(update) - } - - pub async fn delete_index(&self, index: String) -> Result<()> { - self.index_controller.delete_index(index).await?; - Ok(()) + pub async fn register_update(&self, index_uid: &str, update: Update) -> Result { + let status = self.index_controller.register_update(index_uid, update).await?; + Ok(status) } pub async fn get_update_status(&self, index: String, uid: u64) -> Result { diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 2ec556de2..61b8dbcd9 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -86,7 +86,6 @@ impl ErrorCode for MilliError<'_> { milli::Error::UserError(ref error) => { match error { // TODO: wait for spec for new error codes. - UserError::Csv(_) | UserError::SerdeJson(_) | UserError::MaxDatabaseSizeReached | UserError::InvalidCriterionName { .. } diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index 7df704339..9c8acf960 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -1,20 +1,15 @@ -use std::fs::{create_dir_all, File}; -use std::io::{BufRead, BufReader, Write}; +use std::fs::File; +use std::io::Write; use std::path::Path; -use std::sync::Arc; -use anyhow::{bail, Context}; use heed::RoTxn; use indexmap::IndexMap; -use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream}; use serde::{Deserialize, Serialize}; -use serde_json::Value; -use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::option::IndexerOpts; use super::error::Result; -use super::{update_handler::UpdateHandler, Index, Settings, Unchecked}; +use super::{Index, Settings, Unchecked}; #[derive(Serialize, Deserialize)] struct DumpMeta { @@ -80,91 +75,92 @@ impl Index { } pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - size: usize, - indexing_options: &IndexerOpts, + _src: impl AsRef, + _dst: impl AsRef, + _size: usize, + _indexing_options: &IndexerOpts, ) -> anyhow::Result<()> { - let dir_name = src - .as_ref() - .file_name() - .with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; + //let dir_name = src + //.as_ref() + //.file_name() + //.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; - let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); - create_dir_all(&dst_dir_path)?; + //let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); + //create_dir_all(&dst_dir_path)?; - let meta_path = src.as_ref().join(META_FILE_NAME); - let mut meta_file = File::open(meta_path)?; + //let meta_path = src.as_ref().join(META_FILE_NAME); + //let mut meta_file = File::open(meta_path)?; - // We first deserialize the dump meta into a serde_json::Value and change - // the custom ranking rules settings from the old format to the new format. - let mut meta: Value = serde_json::from_reader(&mut meta_file)?; - if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { - convert_custom_ranking_rules(ranking_rules); - } + //// We first deserialize the dump meta into a serde_json::Value and change + //// the custom ranking rules settings from the old format to the new format. + //let mut meta: Value = serde_json::from_reader(&mut meta_file)?; + //if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { + //convert_custom_ranking_rules(ranking_rules); + //} - // Then we serialize it back into a vec to deserialize it - // into a `DumpMeta` struct with the newly patched `rankingRules` format. - let patched_meta = serde_json::to_vec(&meta)?; + //// Then we serialize it back into a vec to deserialize it + //// into a `DumpMeta` struct with the newly patched `rankingRules` format. + //let patched_meta = serde_json::to_vec(&meta)?; - let DumpMeta { - settings, - primary_key, - } = serde_json::from_slice(&patched_meta)?; - let settings = settings.check(); - let index = Self::open(&dst_dir_path, size)?; - let mut txn = index.write_txn()?; + //let DumpMeta { + //settings, + //primary_key, + //} = serde_json::from_slice(&patched_meta)?; + //let settings = settings.check(); + //let index = Self::open(&dst_dir_path, size)?; + //let mut txn = index.write_txn()?; - let handler = UpdateHandler::new(indexing_options)?; + //let handler = UpdateHandler::new(indexing_options)?; - index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?; + //index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?; - let document_file_path = src.as_ref().join(DATA_FILE_NAME); - let reader = File::open(&document_file_path)?; - let mut reader = BufReader::new(reader); - reader.fill_buf()?; + //let document_file_path = src.as_ref().join(DATA_FILE_NAME); + //let reader = File::open(&document_file_path)?; + //let mut reader = BufReader::new(reader); + //reader.fill_buf()?; // If the document file is empty, we don't perform the document addition, to prevent // a primary key error to be thrown. - if !reader.buffer().is_empty() { - index.update_documents_txn( - &mut txn, - JsonStream, - IndexDocumentsMethod::UpdateDocuments, - Some(reader), - handler.update_builder(0), - primary_key.as_deref(), - )?; - } - txn.commit()?; + todo!("fix obk document dumps") + //if !reader.buffer().is_empty() { + //index.update_documents_txn( + //&mut txn, + //IndexDocumentsMethod::UpdateDocuments, + //Some(reader), + //handler.update_builder(0), + //primary_key.as_deref(), + //)?; + //} - match Arc::try_unwrap(index.0) { - Ok(inner) => inner.prepare_for_closing().wait(), - Err(_) => bail!("Could not close index properly."), - } + //txn.commit()?; - Ok(()) + //match Arc::try_unwrap(index.0) { + //Ok(inner) => inner.prepare_for_closing().wait(), + //Err(_) => bail!("Could not close index properly."), + //} + + //Ok(()) } } -/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. -/// -/// This is done for compatibility reasons, and to avoid a new dump version, -/// since the new syntax was introduced soon after the new dump version. -fn convert_custom_ranking_rules(ranking_rules: &mut Value) { - *ranking_rules = match ranking_rules.take() { - Value::Array(values) => values - .into_iter() - .filter_map(|value| match value { - Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s) - .map(|f| format!("{}:asc", f)) - .map(Value::String), - Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s) - .map(|f| format!("{}:desc", f)) - .map(Value::String), - otherwise => Some(otherwise), - }) - .collect(), - otherwise => otherwise, - } -} +// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. +// /// +// /// This is done for compatibility reasons, and to avoid a new dump version, +// /// since the new syntax was introduced soon after the new dump version. +//fn convert_custom_ranking_rules(ranking_rules: &mut Value) { + //*ranking_rules = match ranking_rules.take() { + //Value::Array(values) => values + //.into_iter() + //.filter_map(|value| match value { + //Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s) + //.map(|f| format!("{}:asc", f)) + //.map(Value::String), + //Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s) + //.map(|f| format!("{}:desc", f)) + //.map(Value::String), + //otherwise => Some(otherwise), + //}) + //.collect(), + //otherwise => otherwise, + //} +//} diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index e4243aadc..1ea481ec9 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -15,6 +15,7 @@ pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_ pub use updates::{Checked, Facets, Settings, Unchecked}; use crate::helpers::EnvSizer; +use crate::index_controller::update_file_store::UpdateFileStore; use self::error::IndexError; @@ -28,23 +29,26 @@ mod updates; pub type Document = Map; #[derive(Clone)] -pub struct Index(pub Arc); +pub struct Index { + pub inner: Arc, + update_file_store: Arc, +} impl Deref for Index { type Target = milli::Index; fn deref(&self) -> &Self::Target { - self.0.as_ref() + self.inner.as_ref() } } impl Index { - pub fn open(path: impl AsRef, size: usize) -> Result { + pub fn open(path: impl AsRef, size: usize, update_file_store: Arc) -> Result { create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); - let index = milli::Index::new(options, &path)?; - Ok(Index(Arc::new(index))) + let inner = Arc::new(milli::Index::new(options, &path)?); + Ok(Index { inner, update_file_store }) } pub fn settings(&self) -> Result> { diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 26eb816a0..c7949fea6 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -662,7 +662,7 @@ fn parse_filter_array( } } - Ok(FilterCondition::from_array(txn, &index.0, ands)?) + Ok(FilterCondition::from_array(txn, &index, ands)?) } #[cfg(test)] diff --git a/meilisearch-http/src/index/update_handler.rs b/meilisearch-http/src/index/update_handler.rs index f3977a00d..0ad71d313 100644 --- a/meilisearch-http/src/index/update_handler.rs +++ b/meilisearch-http/src/index/update_handler.rs @@ -1,11 +1,9 @@ -use std::fs::File; - use crate::index::Index; use milli::update::UpdateBuilder; use milli::CompressionType; use rayon::ThreadPool; -use crate::index_controller::UpdateMeta; +use crate::index_controller::update_actor::RegisterUpdate; use crate::index_controller::{Failed, Processed, Processing}; use crate::option::IndexerOpts; @@ -54,31 +52,16 @@ impl UpdateHandler { pub fn handle_update( &self, - meta: Processing, - content: Option, index: Index, + meta: Processing, ) -> Result { - use UpdateMeta::*; - let update_id = meta.id(); - let update_builder = self.update_builder(update_id); let result = match meta.meta() { - DocumentsAddition { - method, - format, - primary_key, - } => index.update_documents( - *format, - *method, - content, - update_builder, - primary_key.as_deref(), - ), - ClearDocuments => index.clear_documents(update_builder), - DeleteDocuments { ids } => index.delete_documents(ids, update_builder), - Settings(settings) => index.update_settings(&settings.clone().check(), update_builder), + RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => { + index.update_documents(*method, *content_uuid, update_builder, primary_key.as_deref()) + } }; match result { diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 924e6b1ef..6c7ae1416 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -1,17 +1,17 @@ use std::collections::{BTreeMap, BTreeSet}; -use std::io; use std::marker::PhantomData; use std::num::NonZeroUsize; -use flate2::read::GzDecoder; use log::{debug, info, trace}; -use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat}; +use milli::documents::DocumentBatchReader; +use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use serde::{Deserialize, Serialize, Serializer}; +use uuid::Uuid; use crate::index_controller::UpdateResult; -use super::error::Result; use super::Index; +use super::error::Result; fn serialize_with_wildcard( field: &Setting>, @@ -162,31 +162,23 @@ pub struct Facets { impl Index { pub fn update_documents( &self, - format: UpdateFormat, method: IndexDocumentsMethod, - content: Option, + content_uuid: Uuid, update_builder: UpdateBuilder, primary_key: Option<&str>, ) -> Result { let mut txn = self.write_txn()?; - let result = self.update_documents_txn( - &mut txn, - format, - method, - content, - update_builder, - primary_key, - )?; + let result = self.update_documents_txn(&mut txn, method, content_uuid, update_builder, primary_key)?; txn.commit()?; + Ok(result) } pub fn update_documents_txn<'a, 'b>( &'a self, txn: &mut heed::RwTxn<'a, 'b>, - format: UpdateFormat, method: IndexDocumentsMethod, - content: Option, + content_uuid: Uuid, update_builder: UpdateBuilder, primary_key: Option<&str>, ) -> Result { @@ -199,138 +191,132 @@ impl Index { builder.execute(|_, _| ())?; } - let mut builder = update_builder.index_documents(txn, self); - builder.update_format(format); - builder.index_documents_method(method); - let indexing_callback = |indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step); - let gzipped = false; - let addition = match content { - Some(content) if gzipped => { - builder.execute(GzDecoder::new(content), indexing_callback)? - } - Some(content) => builder.execute(content, indexing_callback)?, - None => builder.execute(std::io::empty(), indexing_callback)?, - }; + let content_file = self.update_file_store.get_update(content_uuid).unwrap(); + let reader = DocumentBatchReader::from_reader(content_file).unwrap(); + + let mut builder = update_builder.index_documents(txn, self); + builder.index_documents_method(method); + let addition = builder.execute(reader, indexing_callback)?; info!("document addition done: {:?}", addition); Ok(UpdateResult::DocumentsAddition(addition)) } - pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result { - // We must use the write transaction of the update here. - let mut wtxn = self.write_txn()?; - let builder = update_builder.clear_documents(&mut wtxn, self); + //pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result { + //// We must use the write transaction of the update here. + //let mut wtxn = self.write_txn()?; + //let builder = update_builder.clear_documents(&mut wtxn, self); - let _count = builder.execute()?; + //let _count = builder.execute()?; - wtxn.commit() - .and(Ok(UpdateResult::Other)) - .map_err(Into::into) - } + //wtxn.commit() + //.and(Ok(UpdateResult::Other)) + //.map_err(Into::into) + //} - pub fn update_settings_txn<'a, 'b>( - &'a self, - txn: &mut heed::RwTxn<'a, 'b>, - settings: &Settings, - update_builder: UpdateBuilder, - ) -> Result { - // We must use the write transaction of the update here. - let mut builder = update_builder.settings(txn, self); + //pub fn update_settings_txn<'a, 'b>( + //&'a self, + //txn: &mut heed::RwTxn<'a, 'b>, + //settings: &Settings, + //update_builder: UpdateBuilder, + //) -> Result { + //// We must use the write transaction of the update here. + //let mut builder = update_builder.settings(txn, self); - match settings.searchable_attributes { - Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), - Setting::Reset => builder.reset_searchable_fields(), - Setting::NotSet => (), - } + //match settings.searchable_attributes { + //Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), + //Setting::Reset => builder.reset_searchable_fields(), + //Setting::NotSet => (), + //} - match settings.displayed_attributes { - Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), - Setting::Reset => builder.reset_displayed_fields(), - Setting::NotSet => (), - } + //match settings.displayed_attributes { + //Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), + //Setting::Reset => builder.reset_displayed_fields(), + //Setting::NotSet => (), + //} - match settings.filterable_attributes { - Setting::Set(ref facets) => { - builder.set_filterable_fields(facets.clone().into_iter().collect()) - } - Setting::Reset => builder.reset_filterable_fields(), - Setting::NotSet => (), - } + //match settings.filterable_attributes { + //Setting::Set(ref facets) => { + //builder.set_filterable_fields(facets.clone().into_iter().collect()) + //} + //Setting::Reset => builder.reset_filterable_fields(), + //Setting::NotSet => (), + //} - match settings.sortable_attributes { - Setting::Set(ref fields) => { - builder.set_sortable_fields(fields.iter().cloned().collect()) - } - Setting::Reset => builder.reset_sortable_fields(), - Setting::NotSet => (), - } + //match settings.sortable_attributes { + //Setting::Set(ref fields) => { + //builder.set_sortable_fields(fields.iter().cloned().collect()) + //} + //Setting::Reset => builder.reset_sortable_fields(), + //Setting::NotSet => (), + //} - match settings.ranking_rules { - Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), - Setting::Reset => builder.reset_criteria(), - Setting::NotSet => (), - } + //match settings.ranking_rules { + //Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), + //Setting::Reset => builder.reset_criteria(), + //Setting::NotSet => (), + //} - match settings.stop_words { - Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), - Setting::Reset => builder.reset_stop_words(), - Setting::NotSet => (), - } + //match settings.stop_words { + //Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), + //Setting::Reset => builder.reset_stop_words(), + //Setting::NotSet => (), + //} - match settings.synonyms { - Setting::Set(ref synonyms) => { - builder.set_synonyms(synonyms.clone().into_iter().collect()) - } - Setting::Reset => builder.reset_synonyms(), - Setting::NotSet => (), - } + //match settings.synonyms { + //Setting::Set(ref synonyms) => { + //builder.set_synonyms(synonyms.clone().into_iter().collect()) + //} + //Setting::Reset => builder.reset_synonyms(), + //Setting::NotSet => (), + //} - match settings.distinct_attribute { - Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), - Setting::Reset => builder.reset_distinct_field(), - Setting::NotSet => (), - } + //match settings.distinct_attribute { + //Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), + //Setting::Reset => builder.reset_distinct_field(), + //Setting::NotSet => (), + //} - builder.execute(|indexing_step, update_id| { - debug!("update {}: {:?}", update_id, indexing_step) - })?; + //builder.execute(|indexing_step, update_id| { + //debug!("update {}: {:?}", update_id, indexing_step) + //})?; - Ok(UpdateResult::Other) - } + //Ok(UpdateResult::Other) + //} - pub fn update_settings( - &self, - settings: &Settings, - update_builder: UpdateBuilder, - ) -> Result { - let mut txn = self.write_txn()?; - let result = self.update_settings_txn(&mut txn, settings, update_builder)?; - txn.commit()?; - Ok(result) - } + //pub fn update_settings( + //&self, + //settings: &Settings, + //update_builder: UpdateBuilder, + //) -> Result { + //let mut txn = self.write_txn()?; + //let result = self.update_settings_txn(&mut txn, settings, update_builder)?; + //txn.commit()?; + //Ok(result) + //} - pub fn delete_documents( - &self, - document_ids: &[String], - update_builder: UpdateBuilder, - ) -> Result { - let mut txn = self.write_txn()?; - let mut builder = update_builder.delete_documents(&mut txn, self)?; + //pub fn delete_documents( + //&self, + //document_ids: &[String], + //update_builder: UpdateBuilder, + //) -> Result { + //let mut txn = self.write_txn()?; + //let mut builder = update_builder.delete_documents(&mut txn, self)?; - // We ignore unexisting document ids - document_ids.iter().for_each(|id| { - builder.delete_external_id(id); - }); + //// We ignore unexisting document ids + //document_ids.iter().for_each(|id| { + //builder.delete_external_id(id); + //}); - let deleted = builder.execute()?; - txn.commit() - .and(Ok(UpdateResult::DocumentDeletion { deleted })) - .map_err(Into::into) - } + //let deleted = builder.execute()?; + //txn.commit() + //.and(Ok(UpdateResult::DocumentDeletion { deleted })) + //.map_err(Into::into) + //} } #[cfg(test)] diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs index db11fb8fc..649d82405 100644 --- a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs @@ -1,6 +1,5 @@ use std::path::Path; -use actix_web::web::Bytes; use tokio::sync::{mpsc, oneshot}; use super::error::Result; @@ -32,7 +31,7 @@ impl DumpActorHandleImpl { pub fn new( path: impl AsRef, uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl, - update: crate::index_controller::update_actor::UpdateActorHandleImpl, + update: crate::index_controller::update_actor::UpdateActorHandleImpl, index_db_size: usize, update_db_size: usize, ) -> anyhow::Result { diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index 997fd2801..21893eb49 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -1,20 +1,16 @@ use std::collections::{BTreeMap, BTreeSet}; -use std::fs::{create_dir_all, File}; -use std::io::BufRead; use std::marker::PhantomData; use std::path::Path; -use std::sync::Arc; -use heed::EnvOpenOptions; use log::{error, info, warn}; -use milli::update::{IndexDocumentsMethod, Setting, UpdateFormat}; +use milli::update::Setting; use serde::{Deserialize, Deserializer, Serialize}; use uuid::Uuid; use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::{ - index::{update_handler::UpdateHandler, Index, Unchecked}, + index::Unchecked, option::IndexerOpts, }; @@ -86,57 +82,57 @@ struct Settings { } fn load_index( - src: impl AsRef, - dst: impl AsRef, - uuid: Uuid, - primary_key: Option<&str>, - size: usize, - indexer_options: &IndexerOpts, + _src: impl AsRef, + _dst: impl AsRef, + _uuid: Uuid, + _primary_key: Option<&str>, + _size: usize, + _indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { - let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); + todo!("fix dump obkv documents") + //let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); - create_dir_all(&index_path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let index = milli::Index::new(options, index_path)?; - let index = Index(Arc::new(index)); + //create_dir_all(&index_path)?; + //let mut options = EnvOpenOptions::new(); + //options.map_size(size); + //let index = milli::Index::new(options, index_path)?; + //let index = Index(Arc::new(index)); - // extract `settings.json` file and import content - let settings = import_settings(&src)?; - let settings: index_controller::Settings = settings.into(); + //// extract `settings.json` file and import content + //let settings = import_settings(&src)?; + //let settings: index_controller::Settings = settings.into(); - let mut txn = index.write_txn()?; + //let mut txn = index.write_txn()?; - let handler = UpdateHandler::new(indexer_options)?; + //let handler = UpdateHandler::new(indexer_options)?; - index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; + //index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; - let file = File::open(&src.as_ref().join("documents.jsonl"))?; - let mut reader = std::io::BufReader::new(file); - reader.fill_buf()?; - if !reader.buffer().is_empty() { - index.update_documents_txn( - &mut txn, - UpdateFormat::JsonStream, - IndexDocumentsMethod::ReplaceDocuments, - Some(reader), - handler.update_builder(0), - primary_key, - )?; - } + //let file = File::open(&src.as_ref().join("documents.jsonl"))?; + //let mut reader = std::io::BufReader::new(file); + //reader.fill_buf()?; + //if !reader.buffer().is_empty() { + //index.update_documents_txn( + //&mut txn, + //IndexDocumentsMethod::ReplaceDocuments, + //Some(reader), + //handler.update_builder(0), + //primary_key, + //)?; + //} - txn.commit()?; + //txn.commit()?; - // Finaly, we extract the original milli::Index and close it - Arc::try_unwrap(index.0) - .map_err(|_e| "Couldn't close the index properly") - .unwrap() - .prepare_for_closing() - .wait(); + //// Finaly, we extract the original milli::Index and close it + //Arc::try_unwrap(index.0) + //.map_err(|_e| "Couldn't close the index properly") + //.unwrap() + //.prepare_for_closing() + //.wait(); - // Updates are ignored in dumps V1. + //// Updates are ignored in dumps V1. - Ok(()) + //Ok(()) } /// we need to **always** be able to convert the old settings to the settings currently being used @@ -203,15 +199,15 @@ impl From for index_controller::Settings { } } -/// Extract Settings from `settings.json` file present at provided `dir_path` -fn import_settings(dir_path: impl AsRef) -> anyhow::Result { - let path = dir_path.as_ref().join("settings.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; +// /// Extract Settings from `settings.json` file present at provided `dir_path` +//fn import_settings(dir_path: impl AsRef) -> anyhow::Result { + //let path = dir_path.as_ref().join("settings.json"); + //let file = File::open(path)?; + //let reader = std::io::BufReader::new(file); + //let metadata = serde_json::from_reader(reader)?; - Ok(metadata) -} + //Ok(metadata) +//} #[cfg(test)] mod test { diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index fc40a5090..abc08788e 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -1,4 +1,3 @@ -use std::fs::File; use std::path::PathBuf; use std::sync::Arc; @@ -39,6 +38,7 @@ impl IndexActor { let update_handler = UpdateHandler::new(options)?; let update_handler = Arc::new(update_handler); let receiver = Some(receiver); + Ok(Self { receiver, update_handler, @@ -82,10 +82,9 @@ impl IndexActor { Update { ret, meta, - data, uuid, } => { - let _ = ret.send(self.handle_update(uuid, meta, data).await); + let _ = ret.send(self.handle_update(uuid, meta).await); } Search { ret, query, uuid } => { let _ = ret.send(self.handle_search(uuid, query).await); @@ -165,7 +164,6 @@ impl IndexActor { &self, uuid: Uuid, meta: Processing, - data: Option, ) -> Result> { debug!("Processing update {}", meta.id()); let update_handler = self.update_handler.clone(); @@ -174,7 +172,7 @@ impl IndexActor { None => self.store.create(uuid, None).await?, }; - Ok(spawn_blocking(move || update_handler.handle_update(meta, data, index)).await?) + Ok(spawn_blocking(move || update_handler.handle_update(index, meta)).await?) } async fn handle_settings(&self, uuid: Uuid) -> Result> { @@ -230,7 +228,7 @@ impl IndexActor { if let Some(index) = index { tokio::task::spawn(async move { - let index = index.0; + let index = index.inner; let store = get_arc_ownership_blocking(index).await; spawn_blocking(move || { store.prepare_for_closing().wait(); diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index ceb2a8226..efc104c54 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -38,13 +38,11 @@ impl IndexActorHandle for IndexActorHandleImpl { &self, uuid: Uuid, meta: Processing, - data: Option, ) -> Result> { let (ret, receiver) = oneshot::channel(); let msg = IndexMsg::Update { ret, meta, - data, uuid, }; let _ = self.sender.send(msg).await; @@ -156,7 +154,7 @@ impl IndexActorHandleImpl { ) -> anyhow::Result { let (sender, receiver) = mpsc::channel(100); - let store = MapIndexStore::new(path, index_size); + let store = MapIndexStore::new(&path, index_size); let actor = IndexActor::new(receiver, store, options)?; tokio::task::spawn(actor.run()); Ok(Self { sender }) diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 415b90e4b..1b93ec34f 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -19,7 +19,6 @@ pub enum IndexMsg { Update { uuid: Uuid, meta: Processing, - data: Option, ret: oneshot::Sender>>, }, Search { diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index faad75e01..bf5833222 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -1,4 +1,3 @@ -use std::fs::File; use std::path::PathBuf; use chrono::{DateTime, Utc}; @@ -59,7 +58,6 @@ pub trait IndexActorHandle { &self, uuid: Uuid, meta: Processing, - data: Option, ) -> Result>; async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result; async fn settings(&self, uuid: Uuid) -> Result>; diff --git a/meilisearch-http/src/index_controller/index_actor/store.rs b/meilisearch-http/src/index_controller/index_actor/store.rs index 2cfda61b5..252271d51 100644 --- a/meilisearch-http/src/index_controller/index_actor/store.rs +++ b/meilisearch-http/src/index_controller/index_actor/store.rs @@ -10,6 +10,7 @@ use uuid::Uuid; use super::error::{IndexActorError, Result}; use crate::index::Index; +use crate::index_controller::update_file_store::UpdateFileStore; type AsyncMap = Arc>>; @@ -24,16 +25,19 @@ pub struct MapIndexStore { index_store: AsyncMap, path: PathBuf, index_size: usize, + update_file_store: Arc, } impl MapIndexStore { pub fn new(path: impl AsRef, index_size: usize) -> Self { + let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap()); let path = path.as_ref().join("indexes/"); let index_store = Arc::new(RwLock::new(HashMap::new())); Self { index_store, path, index_size, + update_file_store, } } } @@ -54,8 +58,9 @@ impl IndexStore for MapIndexStore { } let index_size = self.index_size; + let file_store = self.update_file_store.clone(); let index = spawn_blocking(move || -> Result { - let index = Index::open(path, index_size)?; + let index = Index::open(path, index_size, file_store)?; if let Some(primary_key) = primary_key { let mut txn = index.write_txn()?; @@ -87,7 +92,8 @@ impl IndexStore for MapIndexStore { } let index_size = self.index_size; - let index = spawn_blocking(move || Index::open(path, index_size)).await??; + let file_store = self.update_file_store.clone(); + let index = spawn_blocking(move || Index::open(path, index_size, file_store)).await??; self.index_store.write().await.insert(uuid, index.clone()); Ok(Some(index)) } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 4565a1dd0..88a219530 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -1,42 +1,43 @@ use std::collections::BTreeMap; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; -use actix_web::web::Bytes; +use actix_web::error::PayloadError; +use bytes::Bytes; use chrono::{DateTime, Utc}; -use futures::stream::StreamExt; -use log::error; +use futures::Stream; use log::info; use milli::FieldDistribution; +use milli::update::IndexDocumentsMethod; use serde::{Deserialize, Serialize}; -use tokio::sync::mpsc; use tokio::time::sleep; use uuid::Uuid; use dump_actor::DumpActorHandle; pub use dump_actor::{DumpInfo, DumpStatus}; use index_actor::IndexActorHandle; -use snapshot::{load_snapshot, SnapshotService}; +use snapshot::load_snapshot; use update_actor::UpdateActorHandle; pub use updates::*; use uuid_resolver::{error::UuidResolverError, UuidResolverHandle}; -use crate::extractors::payload::Payload; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::option::Opt; use error::Result; use self::dump_actor::load_dump; -use self::error::IndexControllerError; mod dump_actor; pub mod error; pub mod index_actor; mod snapshot; -mod update_actor; +pub mod update_actor; mod updates; mod uuid_resolver; +pub mod update_file_store; + +pub type Payload = Box> + Send + Sync + 'static + Unpin>; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] @@ -72,10 +73,15 @@ pub struct IndexStats { pub struct IndexController { uuid_resolver: uuid_resolver::UuidResolverHandleImpl, index_handle: index_actor::IndexActorHandleImpl, - update_handle: update_actor::UpdateActorHandleImpl, + update_handle: update_actor::UpdateActorHandleImpl, dump_handle: dump_actor::DumpActorHandleImpl, } +pub enum DocumentAdditionFormat { + Json, +} + + #[derive(Serialize, Debug)] #[serde(rename_all = "camelCase")] pub struct Stats { @@ -84,6 +90,15 @@ pub struct Stats { pub indexes: BTreeMap, } +pub enum Update { + DocumentAddition { + payload: Payload, + primary_key: Option, + method: IndexDocumentsMethod, + format: DocumentAdditionFormat, + } +} + impl IndexController { pub fn new(path: impl AsRef, options: &Opt) -> anyhow::Result { let index_size = options.max_index_size.get_bytes() as usize; @@ -125,21 +140,21 @@ impl IndexController { options.max_udb_size.get_bytes() as usize, )?; - if options.schedule_snapshot { - let snapshot_service = SnapshotService::new( - uuid_resolver.clone(), - update_handle.clone(), - Duration::from_secs(options.snapshot_interval_sec), - options.snapshot_dir.clone(), - options - .db_path - .file_name() - .map(|n| n.to_owned().into_string().expect("invalid path")) - .unwrap_or_else(|| String::from("data.ms")), - ); + //if options.schedule_snapshot { + //let snapshot_service = SnapshotService::new( + //uuid_resolver.clone(), + //update_handle.clone(), + //Duration::from_secs(options.snapshot_interval_sec), + //options.snapshot_dir.clone(), + //options + //.db_path + //.file_name() + //.map(|n| n.to_owned().into_string().expect("invalid path")) + //.unwrap_or_else(|| String::from("data.ms")), + //); - tokio::task::spawn(snapshot_service.run()); - } + //tokio::task::spawn(snapshot_service.run()); + //} Ok(Self { uuid_resolver, @@ -149,132 +164,148 @@ impl IndexController { }) } - pub async fn add_documents( - &self, - uid: String, - method: milli::update::IndexDocumentsMethod, - format: milli::update::UpdateFormat, - payload: Payload, - primary_key: Option, - ) -> Result { - let perform_update = |uuid| async move { - let meta = UpdateMeta::DocumentsAddition { - method, - format, - primary_key, - }; - let (sender, receiver) = mpsc::channel(10); - - // It is necessary to spawn a local task to send the payload to the update handle to - // prevent dead_locking between the update_handle::update that waits for the update to be - // registered and the update_actor that waits for the the payload to be sent to it. - tokio::task::spawn_local(async move { - payload - .for_each(|r| async { - let _ = sender.send(r).await; - }) - .await - }); - - // This must be done *AFTER* spawning the task. - self.update_handle.update(meta, receiver, uuid).await - }; - - match self.uuid_resolver.get(uid).await { - Ok(uuid) => Ok(perform_update(uuid).await?), + pub async fn register_update(&self, uid: &str, update: Update) -> Result { + match self.uuid_resolver.get(uid.to_string()).await { + Ok(uuid) => { + let update_result = self.update_handle.update(uuid, update).await?; + Ok(update_result) + }, Err(UuidResolverError::UnexistingIndex(name)) => { let uuid = Uuid::new_v4(); - let status = perform_update(uuid).await?; + let update_result = self.update_handle.update(uuid, update).await?; // ignore if index creation fails now, since it may already have been created let _ = self.index_handle.create_index(uuid, None).await; self.uuid_resolver.insert(name, uuid).await?; - Ok(status) + Ok(update_result) } Err(e) => Err(e.into()), } } - pub async fn clear_documents(&self, uid: String) -> Result { - let uuid = self.uuid_resolver.get(uid).await?; - let meta = UpdateMeta::ClearDocuments; - let (_, receiver) = mpsc::channel(1); - let status = self.update_handle.update(meta, receiver, uuid).await?; - Ok(status) - } + //pub async fn add_documents( + //&self, + //uid: String, + //method: milli::update::IndexDocumentsMethod, + //payload: Payload, + //primary_key: Option, + //) -> Result { + //let perform_update = |uuid| async move { + //let meta = UpdateMeta::DocumentsAddition { + //method, + //primary_key, + //}; + //let (sender, receiver) = mpsc::channel(10); - pub async fn delete_documents( - &self, - uid: String, - documents: Vec, - ) -> Result { - let uuid = self.uuid_resolver.get(uid).await?; - let meta = UpdateMeta::DeleteDocuments { ids: documents }; - let (_, receiver) = mpsc::channel(1); - let status = self.update_handle.update(meta, receiver, uuid).await?; - Ok(status) - } + //// It is necessary to spawn a local task to send the payload to the update handle to + //// prevent dead_locking between the update_handle::update that waits for the update to be + //// registered and the update_actor that waits for the the payload to be sent to it. + //tokio::task::spawn_local(async move { + //payload + //.for_each(|r| async { + //let _ = sender.send(r).await; + //}) + //.await + //}); - pub async fn update_settings( - &self, - uid: String, - settings: Settings, - create: bool, - ) -> Result { - let perform_udpate = |uuid| async move { - let meta = UpdateMeta::Settings(settings.into_unchecked()); - // Nothing so send, drop the sender right away, as not to block the update actor. - let (_, receiver) = mpsc::channel(1); - self.update_handle.update(meta, receiver, uuid).await - }; + //// This must be done *AFTER* spawning the task. + //self.update_handle.update(meta, receiver, uuid).await + //}; - match self.uuid_resolver.get(uid).await { - Ok(uuid) => Ok(perform_udpate(uuid).await?), - Err(UuidResolverError::UnexistingIndex(name)) if create => { - let uuid = Uuid::new_v4(); - let status = perform_udpate(uuid).await?; - // ignore if index creation fails now, since it may already have been created - let _ = self.index_handle.create_index(uuid, None).await; - self.uuid_resolver.insert(name, uuid).await?; - Ok(status) - } - Err(e) => Err(e.into()), - } - } + //match self.uuid_resolver.get(uid).await { + //Ok(uuid) => Ok(perform_update(uuid).await?), + //Err(UuidResolverError::UnexistingIndex(name)) => { + //let uuid = Uuid::new_v4(); + //let status = perform_update(uuid).await?; + //// ignore if index creation fails now, since it may already have been created + //let _ = self.index_handle.create_index(uuid, None).await; + //self.uuid_resolver.insert(name, uuid).await?; + //Ok(status) + //} + //Err(e) => Err(e.into()), + //} + //} - pub async fn create_index(&self, index_settings: IndexSettings) -> Result { - let IndexSettings { uid, primary_key } = index_settings; - let uid = uid.ok_or(IndexControllerError::MissingUid)?; - let uuid = Uuid::new_v4(); - let meta = self.index_handle.create_index(uuid, primary_key).await?; - self.uuid_resolver.insert(uid.clone(), uuid).await?; - let meta = IndexMetadata { - uuid, - name: uid.clone(), - uid, - meta, - }; + //pub async fn clear_documents(&self, uid: String) -> Result { + //let uuid = self.uuid_resolver.get(uid).await?; + //let meta = UpdateMeta::ClearDocuments; + //let (_, receiver) = mpsc::channel(1); + //let status = self.update_handle.update(meta, receiver, uuid).await?; + //Ok(status) + //} - Ok(meta) - } + //pub async fn delete_documents( + //&self, + //uid: String, + //documents: Vec, + //) -> Result { + //let uuid = self.uuid_resolver.get(uid).await?; + //let meta = UpdateMeta::DeleteDocuments { ids: documents }; + //let (_, receiver) = mpsc::channel(1); + //let status = self.update_handle.update(meta, receiver, uuid).await?; + //Ok(status) + //} - pub async fn delete_index(&self, uid: String) -> Result<()> { - let uuid = self.uuid_resolver.delete(uid).await?; + //pub async fn update_settings( + //&self, + //uid: String, + //settings: Settings, + //create: bool, + //) -> Result { + //let perform_udpate = |uuid| async move { + //let meta = UpdateMeta::Settings(settings.into_unchecked()); + //// Nothing so send, drop the sender right away, as not to block the update actor. + //let (_, receiver) = mpsc::channel(1); + //self.update_handle.update(meta, receiver, uuid).await + //}; - // We remove the index from the resolver synchronously, and effectively perform the index - // deletion as a background task. - let update_handle = self.update_handle.clone(); - let index_handle = self.index_handle.clone(); - tokio::spawn(async move { - if let Err(e) = update_handle.delete(uuid).await { - error!("Error while deleting index: {}", e); - } - if let Err(e) = index_handle.delete(uuid).await { - error!("Error while deleting index: {}", e); - } - }); + //match self.uuid_resolver.get(uid).await { + //Ok(uuid) => Ok(perform_udpate(uuid).await?), + //Err(UuidResolverError::UnexistingIndex(name)) if create => { + //let uuid = Uuid::new_v4(); + //let status = perform_udpate(uuid).await?; + //// ignore if index creation fails now, since it may already have been created + //let _ = self.index_handle.create_index(uuid, None).await; + //self.uuid_resolver.insert(name, uuid).await?; + //Ok(status) + //} + //Err(e) => Err(e.into()), + //} + //} - Ok(()) - } + //pub async fn create_index(&self, index_settings: IndexSettings) -> Result { + //let IndexSettings { uid, primary_key } = index_settings; + //let uid = uid.ok_or(IndexControllerError::MissingUid)?; + //let uuid = Uuid::new_v4(); + //let meta = self.index_handle.create_index(uuid, primary_key).await?; + //self.uuid_resolver.insert(uid.clone(), uuid).await?; + //let meta = IndexMetadata { + //uuid, + //name: uid.clone(), + //uid, + //meta, + //}; + + //Ok(meta) + //} + + //pub async fn delete_index(&self, uid: String) -> Result<()> { + //let uuid = self.uuid_resolver.delete(uid).await?; + + //// We remove the index from the resolver synchronously, and effectively perform the index + //// deletion as a background task. + //let update_handle = self.update_handle.clone(); + //let index_handle = self.index_handle.clone(); + //tokio::spawn(async move { + //if let Err(e) = update_handle.delete(uuid).await { + //error!("Error while deleting index: {}", e); + //} + //if let Err(e) = index_handle.delete(uuid).await { + //error!("Error while deleting index: {}", e); + //} + //}); + + //Ok(()) + //} pub async fn update_status(&self, uid: String, id: u64) -> Result { let uuid = self.uuid_resolver.get(uid).await?; @@ -454,3 +485,7 @@ pub fn desc_ranking_rule(text: &str) -> Option<&str> { .and_then(|(_, tail)| tail.rsplit_once(")")) .map(|(field, _)| field) } + +fn update_files_path(path: impl AsRef) -> PathBuf { + path.as_ref().join("updates/updates_files") +} diff --git a/meilisearch-http/src/index_controller/snapshot.rs b/meilisearch-http/src/index_controller/snapshot.rs index 4c731efd8..6c5171d62 100644 --- a/meilisearch-http/src/index_controller/snapshot.rs +++ b/meilisearch-http/src/index_controller/snapshot.rs @@ -1,97 +1,90 @@ -use std::path::{Path, PathBuf}; -use std::time::Duration; +use std::path::Path; use anyhow::bail; -use log::{error, info, trace}; -use tokio::fs; -use tokio::task::spawn_blocking; -use tokio::time::sleep; -use super::update_actor::UpdateActorHandle; -use super::uuid_resolver::UuidResolverHandle; use crate::helpers::compression; -pub struct SnapshotService { - uuid_resolver_handle: R, - update_handle: U, - snapshot_period: Duration, - snapshot_path: PathBuf, - db_name: String, -} +//pub struct SnapshotService { + //uuid_resolver_handle: R, + //update_handle: U, + //snapshot_period: Duration, + //snapshot_path: PathBuf, + //db_name: String, +//} -impl SnapshotService -where - U: UpdateActorHandle, - R: UuidResolverHandle, -{ - pub fn new( - uuid_resolver_handle: R, - update_handle: U, - snapshot_period: Duration, - snapshot_path: PathBuf, - db_name: String, - ) -> Self { - Self { - uuid_resolver_handle, - update_handle, - snapshot_period, - snapshot_path, - db_name, - } - } +//impl SnapshotService +//where + //U: UpdateActorHandle, + //R: UuidResolverHandle, +//{ + //pub fn new( + //uuid_resolver_handle: R, + //update_handle: U, + //snapshot_period: Duration, + //snapshot_path: PathBuf, + //db_name: String, + //) -> Self { + //Self { + //uuid_resolver_handle, + //update_handle, + //snapshot_period, + //snapshot_path, + //db_name, + //} + //} - pub async fn run(self) { - info!( - "Snapshot scheduled every {}s.", - self.snapshot_period.as_secs() - ); - loop { - if let Err(e) = self.perform_snapshot().await { - error!("Error while performing snapshot: {}", e); - } - sleep(self.snapshot_period).await; - } - } + //pub async fn run(self) { + //info!( + //"Snapshot scheduled every {}s.", + //self.snapshot_period.as_secs() + //); + //loop { + //if let Err(e) = self.perform_snapshot().await { + //error!("Error while performing snapshot: {}", e); + //} + //sleep(self.snapshot_period).await; + //} + //} - async fn perform_snapshot(&self) -> anyhow::Result<()> { - trace!("Performing snapshot."); + //async fn perform_snapshot(&self) -> anyhow::Result<()> { + //trace!("Performing snapshot."); - let snapshot_dir = self.snapshot_path.clone(); - fs::create_dir_all(&snapshot_dir).await?; - let temp_snapshot_dir = - spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??; - let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); + //let snapshot_dir = self.snapshot_path.clone(); + //fs::create_dir_all(&snapshot_dir).await?; + //let temp_snapshot_dir = + //spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??; + //let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); - let uuids = self - .uuid_resolver_handle - .snapshot(temp_snapshot_path.clone()) - .await?; + //let uuids = self + //.uuid_resolver_handle + //.snapshot(temp_snapshot_path.clone()) + //.await?; - if uuids.is_empty() { - return Ok(()); - } + //if uuids.is_empty() { + //return Ok(()); + //} - self.update_handle - .snapshot(uuids, temp_snapshot_path.clone()) - .await?; - let snapshot_dir = self.snapshot_path.clone(); - let snapshot_path = self - .snapshot_path - .join(format!("{}.snapshot", self.db_name)); - let snapshot_path = spawn_blocking(move || -> anyhow::Result { - let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?; - let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); - compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; - temp_snapshot_file.persist(&snapshot_path)?; - Ok(snapshot_path) - }) - .await??; + //self.update_handle + //.snapshot(uuids, temp_snapshot_path.clone()) + //.await?; + //let snapshot_dir = self.snapshot_path.clone(); + //let snapshot_path = self + //.snapshot_path + //.join(format!("{}.snapshot", self.db_name)); + //let snapshot_path = spawn_blocking(move || -> anyhow::Result { + //let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?; + //let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); + //compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; + //temp_snapshot_file.persist(&snapshot_path)?; + //Ok(snapshot_path) + //}) + //.await??; - trace!("Created snapshot in {:?}.", snapshot_path); + //trace!("Created snapshot in {:?}.", snapshot_path); - Ok(()) - } -} + //Ok(()) + //} +//} pub fn load_snapshot( db_path: impl AsRef, diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 59a22910f..01e34e000 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -1,44 +1,82 @@ use std::collections::HashSet; -use std::io::SeekFrom; +use std::io; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; use std::sync::Arc; +use actix_web::error::PayloadError; use async_stream::stream; -use futures::StreamExt; +use bytes::Bytes; +use futures::{Stream, StreamExt}; use log::trace; -use serdeval::*; -use tokio::fs; -use tokio::io::AsyncWriteExt; +use milli::documents::DocumentBatchBuilder; +use serde_json::{Map, Value}; use tokio::sync::mpsc; use uuid::Uuid; use super::error::{Result, UpdateActorError}; -use super::{PayloadData, UpdateMsg, UpdateStore, UpdateStoreInfo}; +use super::RegisterUpdate; +use super::{UpdateMsg, UpdateStore, UpdateStoreInfo, Update}; use crate::index_controller::index_actor::IndexActorHandle; -use crate::index_controller::{UpdateMeta, UpdateStatus}; +use crate::index_controller::update_file_store::UpdateFileStore; +use crate::index_controller::{DocumentAdditionFormat, Payload, UpdateStatus}; -pub struct UpdateActor { - path: PathBuf, +pub struct UpdateActor { store: Arc, - inbox: Option>>, + inbox: Option>, + update_file_store: UpdateFileStore, index_handle: I, must_exit: Arc, } -impl UpdateActor +struct StreamReader { + stream: S, + current: Option, +} + +impl StreamReader { + fn new(stream: S) -> Self { + Self { stream, current: None } + } + +} + +impl> + Unpin> io::Read for StreamReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self.current.take() { + Some(mut bytes) => { + let copied = bytes.split_to(buf.len()); + buf.copy_from_slice(&copied); + if !bytes.is_empty() { + self.current.replace(bytes); + } + Ok(copied.len()) + } + None => { + match tokio::runtime::Handle::current().block_on(self.stream.next()) { + Some(Ok(bytes)) => { + self.current.replace(bytes); + self.read(buf) + }, + Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)), + None => return Ok(0), + } + } + } + } +} + +impl UpdateActor where - D: AsRef<[u8]> + Sized + 'static, - I: IndexActorHandle + Clone + Send + Sync + 'static, + I: IndexActorHandle + Clone + Sync + Send + 'static, { pub fn new( update_db_size: usize, - inbox: mpsc::Receiver>, + inbox: mpsc::Receiver, path: impl AsRef, index_handle: I, ) -> anyhow::Result { - let path = path.as_ref().join("updates"); - + let path = path.as_ref().to_owned(); std::fs::create_dir_all(&path)?; let mut options = heed::EnvOpenOptions::new(); @@ -47,14 +85,17 @@ where let must_exit = Arc::new(AtomicBool::new(false)); let store = UpdateStore::open(options, &path, index_handle.clone(), must_exit.clone())?; - std::fs::create_dir_all(path.join("update_files"))?; + let inbox = Some(inbox); + + let update_file_store = UpdateFileStore::new(&path).unwrap(); + Ok(Self { - path, store, inbox, index_handle, must_exit, + update_file_store }) } @@ -89,11 +130,10 @@ where match msg { Update { uuid, - meta, - data, + update, ret, } => { - let _ = ret.send(self.handle_update(uuid, meta, data).await); + let _ = ret.send(self.handle_update(uuid, update).await); } ListUpdates { uuid, ret } => { let _ = ret.send(self.handle_list_updates(uuid).await); @@ -120,90 +160,39 @@ where async fn handle_update( &self, - uuid: Uuid, - meta: UpdateMeta, - payload: mpsc::Receiver>, + index_uuid: Uuid, + update: Update, ) -> Result { - let file_path = match meta { - UpdateMeta::DocumentsAddition { .. } => { - let update_file_id = uuid::Uuid::new_v4(); - let path = self - .path - .join(format!("update_files/update_{}", update_file_id)); - let mut file = fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&path) - .await?; + let registration = match update { + Update::DocumentAddition { payload, primary_key, method, format } => { + let content_uuid = match format { + DocumentAdditionFormat::Json => self.documents_from_json(payload).await?, + }; - async fn write_to_file( - file: &mut fs::File, - mut payload: mpsc::Receiver>, - ) -> Result - where - D: AsRef<[u8]> + Sized + 'static, - { - let mut file_len = 0; - - while let Some(bytes) = payload.recv().await { - let bytes = bytes?; - file_len += bytes.as_ref().len(); - file.write_all(bytes.as_ref()).await?; - } - - file.flush().await?; - - Ok(file_len) - } - - let file_len = write_to_file(&mut file, payload).await; - - match file_len { - Ok(len) if len > 0 => { - let file = file.into_std().await; - Some((file, update_file_id)) - } - Err(e) => { - fs::remove_file(&path).await?; - return Err(e); - } - _ => { - fs::remove_file(&path).await?; - None - } - } + RegisterUpdate::DocumentAddition { primary_key, method, content_uuid } } - _ => None, }; - let update_store = self.store.clone(); + let store = self.store.clone(); + let status = tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration)).await??; + Ok(status.into()) + } + + async fn documents_from_json(&self, payload: Payload) -> Result { + let file_store = self.update_file_store.clone(); tokio::task::spawn_blocking(move || { - use std::io::{BufReader, Seek}; + let (uuid, mut file) = file_store.new_update().unwrap(); + let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap(); - // If the payload is empty, ignore the check. - let update_uuid = if let Some((mut file, uuid)) = file_path { - // set the file back to the beginning - file.seek(SeekFrom::Start(0))?; - // Check that the json payload is valid: - let reader = BufReader::new(&mut file); - // Validate that the payload is in the correct format. - let _: Seq> = serde_json::from_reader(reader) - .map_err(|e| UpdateActorError::InvalidPayload(Box::new(e)))?; + let documents: Vec> = serde_json::from_reader(StreamReader::new(payload))?; + builder.add_documents(documents).unwrap(); + builder.finish().unwrap(); - Some(uuid) - } else { - None - }; + file.persist(); - // The payload is valid, we can register it to the update store. - let status = update_store - .register_update(meta, update_uuid, uuid) - .map(UpdateStatus::Enqueued)?; - Ok(status) - }) - .await? + Ok(uuid) + }).await? } async fn handle_list_updates(&self, uuid: Uuid) -> Result> { @@ -267,4 +256,5 @@ where Ok(info) } + } diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 125c63401..5175f2eb5 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -4,45 +4,37 @@ use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; -use crate::index_controller::{IndexActorHandle, UpdateStatus}; +use crate::index_controller::{IndexActorHandle, Update, UpdateStatus}; use super::error::Result; -use super::{PayloadData, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStoreInfo}; +use super::{UpdateActor, UpdateActorHandle, UpdateMsg, UpdateStoreInfo}; #[derive(Clone)] -pub struct UpdateActorHandleImpl { - sender: mpsc::Sender>, +pub struct UpdateActorHandleImpl { + sender: mpsc::Sender, } -impl UpdateActorHandleImpl -where - D: AsRef<[u8]> + Sized + 'static + Sync + Send, -{ +impl UpdateActorHandleImpl { pub fn new( index_handle: I, path: impl AsRef, update_store_size: usize, ) -> anyhow::Result where - I: IndexActorHandle + Clone + Send + Sync + 'static, + I: IndexActorHandle + Clone + Sync + Send +'static, { let path = path.as_ref().to_owned(); let (sender, receiver) = mpsc::channel(100); let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?; - tokio::task::spawn(actor.run()); + tokio::task::spawn_local(actor.run()); Ok(Self { sender }) } } #[async_trait::async_trait] -impl UpdateActorHandle for UpdateActorHandleImpl -where - D: AsRef<[u8]> + Sized + 'static + Sync + Send, -{ - type Data = D; - +impl UpdateActorHandle for UpdateActorHandleImpl { async fn get_all_updates_status(&self, uuid: Uuid) -> Result> { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::ListUpdates { uuid, ret }; @@ -86,15 +78,13 @@ where async fn update( &self, - meta: UpdateMeta, - data: mpsc::Receiver>, uuid: Uuid, + update: Update, ) -> Result { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::Update { uuid, - data, - meta, + update, ret, }; self.sender.send(msg).await?; diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 6b8a0f73f..40cc3360c 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -1,17 +1,16 @@ use std::collections::HashSet; use std::path::PathBuf; -use tokio::sync::{mpsc, oneshot}; +use tokio::sync::oneshot; use uuid::Uuid; use super::error::Result; -use super::{PayloadData, UpdateMeta, UpdateStatus, UpdateStoreInfo}; +use super::{UpdateStatus, UpdateStoreInfo, Update}; -pub enum UpdateMsg { +pub enum UpdateMsg { Update { uuid: Uuid, - meta: UpdateMeta, - data: mpsc::Receiver>, + update: Update, ret: oneshot::Sender>, }, ListUpdates { diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index ee388d2fa..b83cf491c 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -1,10 +1,11 @@ use std::{collections::HashSet, path::PathBuf}; -use actix_web::error::PayloadError; -use tokio::sync::mpsc; +use milli::update::IndexDocumentsMethod; use uuid::Uuid; +use serde::{Serialize, Deserialize}; -use crate::index_controller::{UpdateMeta, UpdateStatus}; +use crate::index_controller::UpdateStatus; +use super::Update; use actor::UpdateActor; use error::Result; @@ -19,16 +20,21 @@ mod handle_impl; mod message; pub mod store; -type PayloadData = std::result::Result; +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum RegisterUpdate { + DocumentAddition { + primary_key: Option, + method: IndexDocumentsMethod, + content_uuid: Uuid, + } +} + #[cfg(test)] use mockall::automock; #[async_trait::async_trait] -#[cfg_attr(test, automock(type Data=Vec;))] pub trait UpdateActorHandle { - type Data: AsRef<[u8]> + Sized + 'static + Sync + Send; - async fn get_all_updates_status(&self, uuid: Uuid) -> Result>; async fn update_status(&self, uuid: Uuid, id: u64) -> Result; async fn delete(&self, uuid: Uuid) -> Result<()>; @@ -37,8 +43,7 @@ pub trait UpdateActorHandle { async fn get_info(&self) -> Result; async fn update( &self, - meta: UpdateMeta, - data: mpsc::Receiver>, uuid: Uuid, + update: Update, ) -> Result; } diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index 79a3cca05..5f3605999 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -1,17 +1,17 @@ use std::{ collections::HashSet, fs::{create_dir_all, File}, - io::{BufRead, BufReader, Write}, + io::Write, path::{Path, PathBuf}, }; -use heed::{EnvOpenOptions, RoTxn}; +use heed::RoTxn; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::{Result, State, UpdateStore}; use crate::index_controller::{ - index_actor::IndexActorHandle, update_actor::store::update_uuid_to_file_path, Enqueued, + index_actor::IndexActorHandle, UpdateStatus, }; @@ -67,35 +67,36 @@ impl UpdateStore { fn dump_pending( &self, - txn: &RoTxn, - uuids: &HashSet, - mut file: &mut File, - dst_path: impl AsRef, + _txn: &RoTxn, + _uuids: &HashSet, + _file: &mut File, + _dst_path: impl AsRef, ) -> Result<()> { - let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); + todo!() + //let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); - for pending in pendings { - let ((_, uuid, _), data) = pending?; - if uuids.contains(&uuid) { - let update = data.decode()?; + //for pending in pendings { + //let ((_, uuid, _), data) = pending?; + //if uuids.contains(&uuid) { + //let update = data.decode()?; - if let Some(ref update_uuid) = update.content { - let src = super::update_uuid_to_file_path(&self.path, *update_uuid); - let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid); - std::fs::copy(src, dst)?; - } + //if let Some(ref update_uuid) = update.content { + //let src = super::update_uuid_to_file_path(&self.path, *update_uuid); + //let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid); + //std::fs::copy(src, dst)?; + //} - let update_json = UpdateEntry { - uuid, - update: update.into(), - }; + //let update_json = UpdateEntry { + //uuid, + //update: update.into(), + //}; - serde_json::to_writer(&mut file, &update_json)?; - file.write_all(b"\n")?; - } - } + //serde_json::to_writer(&mut file, &update_json)?; + //file.write_all(b"\n")?; + //} + //} - Ok(()) + //Ok(()) } fn dump_completed( @@ -122,52 +123,53 @@ impl UpdateStore { } pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - db_size: usize, + _src: impl AsRef, + _dst: impl AsRef, + _db_size: usize, ) -> anyhow::Result<()> { - let dst_update_path = dst.as_ref().join("updates/"); - create_dir_all(&dst_update_path)?; + todo!() + //let dst_update_path = dst.as_ref().join("updates/"); + //create_dir_all(&dst_update_path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(db_size as usize); - let (store, _) = UpdateStore::new(options, &dst_update_path)?; + //let mut options = EnvOpenOptions::new(); + //options.map_size(db_size as usize); + //let (store, _) = UpdateStore::new(options, &dst_update_path)?; - let src_update_path = src.as_ref().join("updates"); - let update_data = File::open(&src_update_path.join("data.jsonl"))?; - let mut update_data = BufReader::new(update_data); + //let src_update_path = src.as_ref().join("updates"); + //let update_data = File::open(&src_update_path.join("data.jsonl"))?; + //let mut update_data = BufReader::new(update_data); - std::fs::create_dir_all(dst_update_path.join("update_files/"))?; + //std::fs::create_dir_all(dst_update_path.join("update_files/"))?; - let mut wtxn = store.env.write_txn()?; - let mut line = String::new(); - loop { - match update_data.read_line(&mut line) { - Ok(0) => break, - Ok(_) => { - let UpdateEntry { uuid, update } = serde_json::from_str(&line)?; - store.register_raw_updates(&mut wtxn, &update, uuid)?; + //let mut wtxn = store.env.write_txn()?; + //let mut line = String::new(); + //loop { + //match update_data.read_line(&mut line) { + //Ok(0) => break, + //Ok(_) => { + //let UpdateEntry { uuid, update } = serde_json::from_str(&line)?; + //store.register_raw_updates(&mut wtxn, &update, uuid)?; - // Copy ascociated update path if it exists - if let UpdateStatus::Enqueued(Enqueued { - content: Some(uuid), - .. - }) = update - { - let src = update_uuid_to_file_path(&src_update_path, uuid); - let dst = update_uuid_to_file_path(&dst_update_path, uuid); - std::fs::copy(src, dst)?; - } - } - _ => break, - } + //// Copy ascociated update path if it exists + //if let UpdateStatus::Enqueued(Enqueued { + //content: Some(uuid), + //.. + //}) = update + //{ + //let src = update_uuid_to_file_path(&src_update_path, uuid); + //let dst = update_uuid_to_file_path(&dst_update_path, uuid); + //std::fs::copy(src, dst)?; + //} + //} + //_ => break, + //} - line.clear(); - } + //line.clear(); + //} - wtxn.commit()?; + //wtxn.commit()?; - Ok(()) + //Ok(()) } } diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index e23e05b52..2dd758b82 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -1,7 +1,7 @@ mod codec; pub mod dump; -use std::fs::{copy, create_dir_all, remove_file, File}; +use std::fs::{create_dir_all, remove_file}; use std::path::Path; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -26,9 +26,10 @@ use uuid::Uuid; use codec::*; +use super::RegisterUpdate; use super::error::Result; -use super::UpdateMeta; use crate::helpers::EnvSizer; +use crate::index_controller::update_files_path; use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; #[allow(clippy::upper_case_acronyms)] @@ -116,7 +117,9 @@ impl UpdateStore { ) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { options.max_dbs(5); - let env = options.open(&path)?; + let update_path = path.as_ref().join("updates"); + std::fs::create_dir_all(&update_path)?; + let env = options.open(update_path)?; let pending_queue = env.create_database(Some("pending-queue"))?; let next_update_id = env.create_database(Some("next-update-id"))?; let updates = env.create_database(Some("updates"))?; @@ -157,7 +160,7 @@ impl UpdateStore { // want to close the index. let duration = Duration::from_secs(10 * 60); // 10 minutes let update_store_weak = Arc::downgrade(&update_store); - tokio::task::spawn(async move { + tokio::task::spawn_local(async move { // Block and wait for something to process with a timeout. The timeout // function returns a Result and we must just unlock the loop on Result. 'outer: while timeout(duration, notification_receiver.recv()) @@ -233,14 +236,12 @@ impl UpdateStore { /// into the pending-meta store. Returns the new unique update id. pub fn register_update( &self, - meta: UpdateMeta, - content: Option, index_uuid: Uuid, + update: RegisterUpdate, ) -> heed::Result { let mut txn = self.env.write_txn()?; - let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?; - let meta = Enqueued::new(meta, update_id, content); + let meta = Enqueued::new(update, update_id); self.pending_queue .put(&mut txn, &(global_id, index_uuid, update_id), &meta)?; @@ -254,30 +255,30 @@ impl UpdateStore { Ok(meta) } - /// Push already processed update in the UpdateStore without triggering the notification - /// process. This is useful for the dumps. - pub fn register_raw_updates( - &self, - wtxn: &mut heed::RwTxn, - update: &UpdateStatus, - index_uuid: Uuid, - ) -> heed::Result<()> { - match update { - UpdateStatus::Enqueued(enqueued) => { - let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?; - self.pending_queue.remap_key_type::().put( - wtxn, - &(global_id, index_uuid, enqueued.id()), - enqueued, - )?; - } - _ => { - let _update_id = self.next_update_id_raw(wtxn, index_uuid)?; - self.updates.put(wtxn, &(index_uuid, update.id()), update)?; - } - } - Ok(()) - } + // /// Push already processed update in the UpdateStore without triggering the notification + // /// process. This is useful for the dumps. + //pub fn register_raw_updates( + //&self, + //wtxn: &mut heed::RwTxn, + //update: &UpdateStatus, + //index_uuid: Uuid, + //) -> heed::Result<()> { + //match update { + //UpdateStatus::Enqueued(enqueued) => { + //let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?; + //self.pending_queue.remap_key_type::().put( + //wtxn, + //&(global_id, index_uuid, enqueued.id()), + //enqueued, + //)?; + //} + //_ => { + //let _update_id = self.next_update_id_raw(wtxn, index_uuid)?; + //self.updates.put(wtxn, &(index_uuid, update.id()), update)?; + //} + //} + //Ok(()) + //} /// Executes the user provided function on the next pending update (the one with the lowest id). /// This is asynchronous as it let the user process the update with a read-only txn and @@ -291,8 +292,7 @@ impl UpdateStore { // If there is a pending update we process and only keep // a reader while processing it, not a writer. match first_meta { - Some(((global_id, index_uuid, _), mut pending)) => { - let content = pending.content.take(); + Some(((global_id, index_uuid, _), pending)) => { let processing = pending.processing(); // Acquire the state lock and set the current state to processing. // txn must *always* be acquired after state lock, or it will dead lock. @@ -300,7 +300,7 @@ impl UpdateStore { state.swap(State::Processing(index_uuid, processing.clone())); let result = - self.perform_update(content, processing, index_handle, index_uuid, global_id); + self.perform_update(processing, index_handle, index_uuid, global_id); state.swap(State::Idle); @@ -312,27 +312,16 @@ impl UpdateStore { fn perform_update( &self, - content: Option, processing: Processing, index_handle: impl IndexActorHandle, index_uuid: Uuid, global_id: u64, ) -> Result> { - let content_path = content.map(|uuid| update_uuid_to_file_path(&self.path, uuid)); - let update_id = processing.id(); - - let file = match content_path { - Some(ref path) => { - let file = File::open(path)?; - Some(file) - } - None => None, - }; - // Process the pending update using the provided user function. let handle = Handle::current(); + let update_id = processing.id(); let result = - match handle.block_on(index_handle.update(index_uuid, processing.clone(), file)) { + match handle.block_on(index_handle.update(index_uuid, processing.clone())) { Ok(result) => result, Err(e) => Err(processing.fail(e.into())), }; @@ -354,10 +343,6 @@ impl UpdateStore { wtxn.commit()?; - if let Some(ref path) = content_path { - remove_file(&path)?; - } - Ok(Some(())) } @@ -435,16 +420,16 @@ impl UpdateStore { pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> { let mut txn = self.env.write_txn()?; // Contains all the content file paths that we need to be removed if the deletion was successful. - let mut uuids_to_remove = Vec::new(); + let uuids_to_remove = Vec::new(); let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data(); while let Some(Ok(((_, uuid, _), pending))) = pendings.next() { if uuid == index_uuid { - let mut pending = pending.decode()?; - if let Some(update_uuid) = pending.content.take() { - uuids_to_remove.push(update_uuid); - } + let mut _pending = pending.decode()?; + //if let Some(update_uuid) = pending.content.take() { + //uuids_to_remove.push(update_uuid); + //} // Invariant check: we can only delete the current entry when we don't hold // references to it anymore. This must be done after we have retrieved its content. @@ -486,7 +471,7 @@ impl UpdateStore { // them. uuids_to_remove .iter() - .map(|uuid| update_uuid_to_file_path(&self.path, *uuid)) + .map(|uuid: &Uuid| update_files_path(&self.path).join(uuid.to_string())) .for_each(|path| { let _ = remove_file(path); }); @@ -521,17 +506,17 @@ impl UpdateStore { let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); for entry in pendings { - let ((_, uuid, _), pending) = entry?; - if uuids.contains(&uuid) { - if let Enqueued { - content: Some(uuid), - .. - } = pending.decode()? - { - let path = update_uuid_to_file_path(&self.path, uuid); - copy(path, &update_files_path)?; - } - } + let ((_, _uuid, _), _pending) = entry?; + //if uuids.contains(&uuid) { + //if let Enqueued { + //content: Some(uuid), + //.. + //} = pending.decode()? + //{ + //let path = update_uuid_to_file_path(&self.path, uuid); + //copy(path, &update_files_path)?; + //} + //} } let path = &path.as_ref().to_path_buf(); @@ -553,18 +538,18 @@ impl UpdateStore { } pub fn get_info(&self) -> Result { - let mut size = self.env.size(); + let size = self.env.size(); let txn = self.env.read_txn()?; for entry in self.pending_queue.iter(&txn)? { - let (_, pending) = entry?; - if let Enqueued { - content: Some(uuid), - .. - } = pending - { - let path = update_uuid_to_file_path(&self.path, uuid); - size += File::open(path)?.metadata()?.len(); - } + let (_, _pending) = entry?; + //if let Enqueued { + //content: Some(uuid), + //.. + //} = pending + //{ + //let path = update_uuid_to_file_path(&self.path, uuid); + //size += File::open(path)?.metadata()?.len(); + //} } let processing = match *self.state.read() { State::Processing(uuid, _) => Some(uuid), @@ -575,12 +560,6 @@ impl UpdateStore { } } -fn update_uuid_to_file_path(root: impl AsRef, uuid: Uuid) -> PathBuf { - root.as_ref() - .join(UPDATE_DIR) - .join(format!("update_{}", uuid)) -} - #[cfg(test)] mod test { use super::*; diff --git a/meilisearch-http/src/index_controller/update_file_store.rs b/meilisearch-http/src/index_controller/update_file_store.rs new file mode 100644 index 000000000..1c60bcec9 --- /dev/null +++ b/meilisearch-http/src/index_controller/update_file_store.rs @@ -0,0 +1,63 @@ +use std::fs::File; +use std::path::{Path, PathBuf}; +use std::ops::{Deref, DerefMut}; + +use tempfile::NamedTempFile; +use uuid::Uuid; + +use super::error::Result; + +pub struct UpdateFile { + path: PathBuf, + file: NamedTempFile, +} + +impl UpdateFile { + pub fn persist(self) { + println!("persisting in {}", self.path.display()); + self.file.persist(&self.path).unwrap(); + } +} + +impl Deref for UpdateFile { + type Target = NamedTempFile; + + fn deref(&self) -> &Self::Target { + &self.file + } +} + +impl DerefMut for UpdateFile { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.file + } +} + +#[derive(Clone, Debug)] +pub struct UpdateFileStore { + path: PathBuf, +} + +impl UpdateFileStore { + pub fn new(path: impl AsRef) -> Result { + let path = path.as_ref().join("updates/updates_files"); + std::fs::create_dir_all(&path).unwrap(); + Ok(Self { path }) + } + + pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { + let file = NamedTempFile::new().unwrap(); + let uuid = Uuid::new_v4(); + let path = self.path.join(uuid.to_string()); + let update_file = UpdateFile { file, path }; + + Ok((uuid, update_file)) + } + + pub fn get_update(&self, uuid: Uuid) -> Result { + let path = self.path.join(uuid.to_string()); + println!("reading in {}", path.display()); + let file = File::open(path).unwrap(); + Ok(file) + } +} diff --git a/meilisearch-http/src/index_controller/updates.rs b/meilisearch-http/src/index_controller/updates.rs index d02438d3c..7065b0462 100644 --- a/meilisearch-http/src/index_controller/updates.rs +++ b/meilisearch-http/src/index_controller/updates.rs @@ -1,13 +1,14 @@ use chrono::{DateTime, Utc}; -use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat}; +use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; -use uuid::Uuid; use crate::{ error::ResponseError, index::{Settings, Unchecked}, }; +use super::update_actor::RegisterUpdate; + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum UpdateResult { DocumentsAddition(DocumentAdditionResult), @@ -21,7 +22,6 @@ pub enum UpdateResult { pub enum UpdateMeta { DocumentsAddition { method: IndexDocumentsMethod, - format: UpdateFormat, primary_key: Option, }, ClearDocuments, @@ -35,18 +35,16 @@ pub enum UpdateMeta { #[serde(rename_all = "camelCase")] pub struct Enqueued { pub update_id: u64, - pub meta: UpdateMeta, + pub meta: RegisterUpdate, pub enqueued_at: DateTime, - pub content: Option, } impl Enqueued { - pub fn new(meta: UpdateMeta, update_id: u64, content: Option) -> Self { + pub fn new(meta: RegisterUpdate, update_id: u64) -> Self { Self { enqueued_at: Utc::now(), meta, update_id, - content, } } @@ -64,7 +62,7 @@ impl Enqueued { } } - pub fn meta(&self) -> &UpdateMeta { + pub fn meta(&self) -> &RegisterUpdate { &self.meta } @@ -87,7 +85,7 @@ impl Processed { self.from.id() } - pub fn meta(&self) -> &UpdateMeta { + pub fn meta(&self) -> &RegisterUpdate { self.from.meta() } } @@ -105,7 +103,7 @@ impl Processing { self.from.id() } - pub fn meta(&self) -> &UpdateMeta { + pub fn meta(&self) -> &RegisterUpdate { self.from.meta() } @@ -139,7 +137,7 @@ impl Aborted { self.from.id() } - pub fn meta(&self) -> &UpdateMeta { + pub fn meta(&self) -> &RegisterUpdate { self.from.meta() } } @@ -158,7 +156,7 @@ impl Failed { self.from.id() } - pub fn meta(&self) -> &UpdateMeta { + pub fn meta(&self) -> &RegisterUpdate { self.from.meta() } } @@ -184,7 +182,7 @@ impl UpdateStatus { } } - pub fn meta(&self) -> &UpdateMeta { + pub fn meta(&self) -> &RegisterUpdate { match self { UpdateStatus::Processing(u) => u.meta(), UpdateStatus::Enqueued(u) => u.meta(), diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 0875806ac..daa18f480 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,7 +1,6 @@ use std::env; use actix_web::HttpServer; -use main_error::MainError; use meilisearch_http::{create_app, Data, Opt}; use structopt::StructOpt; @@ -12,10 +11,7 @@ use meilisearch_http::analytics; #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; -#[actix_web::main] -async fn main() -> Result<(), MainError> { - let opt = Opt::from_args(); - +fn setup(opt: &Opt) -> anyhow::Result<()> { let mut log_builder = env_logger::Builder::new(); log_builder.parse_filters(&opt.log_level); if opt.log_level == "info" { @@ -25,13 +21,34 @@ async fn main() -> Result<(), MainError> { log_builder.init(); + // Set the tempfile directory in the current db path, to avoid cross device references. Also + // remove the previous outstanding files found there + // + // TODO: if two processes open the same db, one might delete the other tmpdir. Need to make + // sure that no one is using it before deleting it. + let temp_path = opt.db_path.join("tmp"); + // Ignore error if tempdir doesn't exist + let _ = std::fs::remove_dir_all(&temp_path); + std::fs::create_dir_all(&temp_path)?; + if cfg!(windows) { + std::env::set_var("TMP", temp_path); + } else { + std::env::set_var("TMPDIR", temp_path); + } + + Ok(()) +} + +#[actix_web::main] +async fn main() -> anyhow::Result<()> { + let opt = Opt::from_args(); + + setup(&opt)?; + match opt.env.as_ref() { "production" => { if opt.master_key.is_none() { - return Err( - "In production mode, the environment variable MEILI_MASTER_KEY is mandatory" - .into(), - ); + anyhow::bail!("In production mode, the environment variable MEILI_MASTER_KEY is mandatory") } } "development" => (), @@ -54,7 +71,7 @@ async fn main() -> Result<(), MainError> { Ok(()) } -async fn run_http(data: Data, opt: Opt) -> Result<(), Box> { +async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> { let _enable_dashboard = &opt.env == "development"; let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard)) // Disable signals allows the server to terminate immediately when a user enter CTRL-C diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 39966092e..3a0ab8acb 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -5,7 +5,7 @@ use std::ops::Deref; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; -use std::{error, fs}; +use std::fs; use byte_unit::Byte; use milli::CompressionType; @@ -184,7 +184,7 @@ pub struct Opt { } impl Opt { - pub fn get_ssl_config(&self) -> Result, Box> { + pub fn get_ssl_config(&self) -> anyhow::Result> { if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { let client_auth = match &self.ssl_auth_path { Some(auth_path) => { @@ -210,7 +210,7 @@ impl Opt { let ocsp = load_ocsp(&self.ssl_ocsp_path)?; config .set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![]) - .map_err(|_| "bad certificates/private key")?; + .map_err(|_| anyhow::anyhow!("bad certificates/private key"))?; if self.ssl_resumption { config.set_persistence(rustls::ServerSessionMemoryCache::new(256)); @@ -284,25 +284,25 @@ fn total_memory_bytes() -> Option { } } -fn load_certs(filename: PathBuf) -> Result, Box> { - let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?; +fn load_certs(filename: PathBuf) -> anyhow::Result> { + let certfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; let mut reader = BufReader::new(certfile); - Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?) + Ok(certs(&mut reader).map_err(|_| anyhow::anyhow!("cannot read certificate file"))?) } -fn load_private_key(filename: PathBuf) -> Result> { +fn load_private_key(filename: PathBuf) -> anyhow::Result { let rsa_keys = { let keyfile = - fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?; + fs::File::open(filename.clone()).map_err(|_| anyhow::anyhow!("cannot open private key file"))?; let mut reader = BufReader::new(keyfile); - rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")? + rsa_private_keys(&mut reader).map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))? }; let pkcs8_keys = { - let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?; + let keyfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open private key file"))?; let mut reader = BufReader::new(keyfile); pkcs8_private_keys(&mut reader) - .map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")? + .map_err(|_| anyhow::anyhow!("file contains invalid pkcs8 private key (encrypted keys not supported)"))? }; // prefer to load pkcs8 keys @@ -314,14 +314,14 @@ fn load_private_key(filename: PathBuf) -> Result) -> Result, Box> { +fn load_ocsp(filename: &Option) -> anyhow::Result> { let mut ret = Vec::new(); if let Some(ref name) = filename { fs::File::open(name) - .map_err(|_| "cannot open ocsp file")? + .map_err(|_| anyhow::anyhow!("cannot open ocsp file"))? .read_to_end(&mut ret) - .map_err(|_| "cannot read oscp file")?; + .map_err(|_| anyhow::anyhow!("cannot read oscp file"))?; } Ok(ret) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index a4bf465b5..be80a55a0 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -1,12 +1,17 @@ +use actix_web::error::PayloadError; use actix_web::{web, HttpResponse}; +use actix_web::web::Bytes; +use futures::{Stream, StreamExt}; use log::debug; -use milli::update::{IndexDocumentsMethod, UpdateFormat}; +use milli::update::IndexDocumentsMethod; use serde::Deserialize; -use serde_json::Value; +//use serde_json::Value; +use tokio::sync::mpsc; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::payload::Payload; +use crate::index_controller::{DocumentAdditionFormat, Update}; use crate::routes::IndexParam; use crate::Data; @@ -32,6 +37,17 @@ macro_rules! guard_content_type { guard_content_type!(guard_json, "application/json"); */ +/// This is required because Payload is not Sync nor Send +fn payload_to_stream(mut payload: Payload) -> impl Stream> { + let (snd, recv) = mpsc::channel(1); + tokio::task::spawn_local(async move { + while let Some(data) = payload.next().await { + let _ = snd.send(data).await; + } + }); + tokio_stream::wrappers::ReceiverStream::new(recv) +} + fn guard_json(head: &actix_web::dev::RequestHead) -> bool { if let Some(_content_type) = head.headers.get("Content-Type") { // CURRENTLY AND FOR THIS RELEASE ONLY WE DECIDED TO INTERPRET ALL CONTENT-TYPES AS JSON @@ -60,14 +76,14 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .route(web::get().to(get_all_documents)) .route(web::post().guard(guard_json).to(add_documents)) .route(web::put().guard(guard_json).to(update_documents)) - .route(web::delete().to(clear_all_documents)), + //.route(web::delete().to(clear_all_documents)), ) // this route needs to be before the /documents/{document_id} to match properly - .service(web::resource("/delete-batch").route(web::post().to(delete_documents))) + //.service(web::resource("/delete-batch").route(web::post().to(delete_documents))) .service( web::resource("/{document_id}") .route(web::get().to(get_document)) - .route(web::delete().to(delete_document)), + //.route(web::delete().to(delete_document)), ); } @@ -84,16 +100,16 @@ pub async fn get_document( Ok(HttpResponse::Ok().json(document)) } -pub async fn delete_document( - data: GuardedData, - path: web::Path, -) -> Result { - let update_status = data - .delete_documents(path.index_uid.clone(), vec![path.document_id.clone()]) - .await?; - debug!("returns: {:?}", update_status); - Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) -} +//pub async fn delete_document( + //data: GuardedData, + //path: web::Path, +//) -> Result { + //let update_status = data + //.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()]) + //.await?; + //debug!("returns: {:?}", update_status); + //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) +//} #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase", deny_unknown_fields)] @@ -147,14 +163,14 @@ pub async fn add_documents( body: Payload, ) -> Result { debug!("called with params: {:?}", params); + let update = Update::DocumentAddition { + payload: Box::new(payload_to_stream(body)), + primary_key: params.primary_key.clone(), + method: IndexDocumentsMethod::ReplaceDocuments, + format: DocumentAdditionFormat::Json, + }; let update_status = data - .add_documents( - path.into_inner().index_uid, - IndexDocumentsMethod::ReplaceDocuments, - UpdateFormat::Json, - body, - params.primary_key.clone(), - ) + .register_update(path.index_uid.as_str(), update) .await?; debug!("returns: {:?}", update_status); @@ -170,45 +186,45 @@ pub async fn update_documents( body: Payload, ) -> Result { debug!("called with params: {:?}", params); - let update = data - .add_documents( - path.into_inner().index_uid, - IndexDocumentsMethod::UpdateDocuments, - UpdateFormat::Json, - body, - params.primary_key.clone(), - ) + let update = Update::DocumentAddition { + payload: Box::new(payload_to_stream(body)), + primary_key: params.primary_key.clone(), + method: IndexDocumentsMethod::UpdateDocuments, + format: DocumentAdditionFormat::Json, + }; + let update_status = data + .register_update(path.index_uid.as_str(), update) .await?; - debug!("returns: {:?}", update); - Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update.id() }))) -} - -pub async fn delete_documents( - data: GuardedData, - path: web::Path, - body: web::Json>, -) -> Result { - debug!("called with params: {:?}", body); - let ids = body - .iter() - .map(|v| { - v.as_str() - .map(String::from) - .unwrap_or_else(|| v.to_string()) - }) - .collect(); - - let update_status = data.delete_documents(path.index_uid.clone(), ids).await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } -pub async fn clear_all_documents( - data: GuardedData, - path: web::Path, -) -> Result { - let update_status = data.clear_documents(path.index_uid.clone()).await?; - debug!("returns: {:?}", update_status); - Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) -} +//pub async fn delete_documents( + //data: GuardedData, + //path: web::Path, + //body: web::Json>, +//) -> Result { + //debug!("called with params: {:?}", body); + //let ids = body + //.iter() + //.map(|v| { + //v.as_str() + //.map(String::from) + //.unwrap_or_else(|| v.to_string()) + //}) + //.collect(); + + //let update_status = data.delete_documents(path.index_uid.clone(), ids).await?; + //debug!("returns: {:?}", update_status); + //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) +//} + +//pub async fn clear_all_documents( + //data: GuardedData, + //path: web::Path, +//) -> Result { + //let update_status = data.clear_documents(path.index_uid.clone()).await?; + //debug!("returns: {:?}", update_status); + //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) +//} diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 8314bf032..ef68215b4 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -17,7 +17,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") .route(web::get().to(list_indexes)) - .route(web::post().to(create_index)), + //.route(web::post().to(create_index)), ) .service( web::scope("/{index_uid}") @@ -25,13 +25,13 @@ pub fn configure(cfg: &mut web::ServiceConfig) { web::resource("") .route(web::get().to(get_index)) .route(web::put().to(update_index)) - .route(web::delete().to(delete_index)), + //.route(web::delete().to(delete_index)), ) .service(web::resource("/stats").route(web::get().to(get_index_stats))) .service(web::scope("/documents").configure(documents::configure)) .service(web::scope("/search").configure(search::configure)) .service(web::scope("/updates").configure(updates::configure)) - .service(web::scope("/settings").configure(settings::configure)), + //.service(web::scope("/settings").configure(settings::configure)), ); } @@ -48,14 +48,14 @@ pub struct IndexCreateRequest { primary_key: Option, } -pub async fn create_index( - data: GuardedData, - body: web::Json, -) -> Result { - let body = body.into_inner(); - let meta = data.create_index(body.uid, body.primary_key).await?; - Ok(HttpResponse::Created().json(meta)) -} +//pub async fn create_index( + //data: GuardedData, + //body: web::Json, +//) -> Result { + //let body = body.into_inner(); + //let meta = data.create_index(body.uid, body.primary_key).await?; + //Ok(HttpResponse::Created().json(meta)) +//} #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] @@ -97,13 +97,13 @@ pub async fn update_index( Ok(HttpResponse::Ok().json(meta)) } -pub async fn delete_index( - data: GuardedData, - path: web::Path, -) -> Result { - data.delete_index(path.index_uid.clone()).await?; - Ok(HttpResponse::NoContent().finish()) -} +//pub async fn delete_index( + //data: GuardedData, + //path: web::Path, +//) -> Result { + //data.delete_index(path.index_uid.clone()).await?; + //Ok(HttpResponse::NoContent().finish()) +//} pub async fn get_index_stats( data: GuardedData, diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 05a4f308f..051483b20 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -1,185 +1,184 @@ -use actix_web::{web, HttpResponse}; -use log::debug; +//use log::debug; -use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::index::Settings; -use crate::Data; -use crate::{error::ResponseError, index::Unchecked}; +//use crate::extractors::authentication::{policies::*, GuardedData}; +//use crate::index::Settings; +//use crate::Data; +//use crate::error::ResponseError; -#[macro_export] -macro_rules! make_setting_route { - ($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal) => { - pub mod $attr { - use log::debug; - use actix_web::{web, HttpResponse, Resource}; +//#[macro_export] +//macro_rules! make_setting_route { + //($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal) => { + //pub mod $attr { + //use log::debug; + //use actix_web::{web, HttpResponse, Resource}; - use milli::update::Setting; + //use milli::update::Setting; - use crate::data; - use crate::error::ResponseError; - use crate::index::Settings; - use crate::extractors::authentication::{GuardedData, policies::*}; + //use crate::data; + //use crate::error::ResponseError; + //use crate::index::Settings; + //use crate::extractors::authentication::{GuardedData, policies::*}; - pub async fn delete( - data: GuardedData, - index_uid: web::Path, - ) -> Result { - use crate::index::Settings; - let settings = Settings { - $attr: Setting::Reset, - ..Default::default() - }; - let update_status = data.update_settings(index_uid.into_inner(), settings, false).await?; - debug!("returns: {:?}", update_status); - Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) - } + //pub async fn delete( + //data: GuardedData, + //index_uid: web::Path, + //) -> Result { + //use crate::index::Settings; + //let settings = Settings { + //$attr: Setting::Reset, + //..Default::default() + //}; + //let update_status = data.update_settings(index_uid.into_inner(), settings, false).await?; + //debug!("returns: {:?}", update_status); + //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) + //} - pub async fn update( - data: GuardedData, - index_uid: actix_web::web::Path, - body: actix_web::web::Json>, - ) -> std::result::Result { - let settings = Settings { - $attr: match body.into_inner() { - Some(inner_body) => Setting::Set(inner_body), - None => Setting::Reset - }, - ..Default::default() - }; + //pub async fn update( + //data: GuardedData, + //index_uid: actix_web::web::Path, + //body: actix_web::web::Json>, + //) -> std::result::Result { + //let settings = Settings { + //$attr: match body.into_inner() { + //Some(inner_body) => Setting::Set(inner_body), + //None => Setting::Reset + //}, + //..Default::default() + //}; - let update_status = data.update_settings(index_uid.into_inner(), settings, true).await?; - debug!("returns: {:?}", update_status); - Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) - } + //let update_status = data.update_settings(index_uid.into_inner(), settings, true).await?; + //debug!("returns: {:?}", update_status); + //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) + //} - pub async fn get( - data: GuardedData, - index_uid: actix_web::web::Path, - ) -> std::result::Result { - let settings = data.settings(index_uid.into_inner()).await?; - debug!("returns: {:?}", settings); - let mut json = serde_json::json!(&settings); - let val = json[$camelcase_attr].take(); - Ok(HttpResponse::Ok().json(val)) - } + //pub async fn get( + //data: GuardedData, + //index_uid: actix_web::web::Path, + //) -> std::result::Result { + //let settings = data.settings(index_uid.into_inner()).await?; + //debug!("returns: {:?}", settings); + //let mut json = serde_json::json!(&settings); + //let val = json[$camelcase_attr].take(); + //Ok(HttpResponse::Ok().json(val)) + //} - pub fn resources() -> Resource { - Resource::new($route) - .route(web::get().to(get)) - .route(web::post().to(update)) - .route(web::delete().to(delete)) - } - } - }; -} + //pub fn resources() -> Resource { + //Resource::new($route) + //.route(web::get().to(get)) + //.route(web::post().to(update)) + //.route(web::delete().to(delete)) + //} + //} + //}; +//} -make_setting_route!( - "/filterable-attributes", - std::collections::BTreeSet, - filterable_attributes, - "filterableAttributes" -); +//make_setting_route!( + //"/filterable-attributes", + //std::collections::BTreeSet, + //filterable_attributes, + //"filterableAttributes" +//); -make_setting_route!( - "/sortable-attributes", - std::collections::BTreeSet, - sortable_attributes, - "sortableAttributes" -); +//make_setting_route!( + //"/sortable-attributes", + //std::collections::BTreeSet, + //sortable_attributes, + //"sortableAttributes" +//); -make_setting_route!( - "/displayed-attributes", - Vec, - displayed_attributes, - "displayedAttributes" -); +//make_setting_route!( + //"/displayed-attributes", + //Vec, + //displayed_attributes, + //"displayedAttributes" +//); -make_setting_route!( - "/searchable-attributes", - Vec, - searchable_attributes, - "searchableAttributes" -); +//make_setting_route!( + //"/searchable-attributes", + //Vec, + //searchable_attributes, + //"searchableAttributes" +//); -make_setting_route!( - "/stop-words", - std::collections::BTreeSet, - stop_words, - "stopWords" -); +//make_setting_route!( + //"/stop-words", + //std::collections::BTreeSet, + //stop_words, + //"stopWords" +//); -make_setting_route!( - "/synonyms", - std::collections::BTreeMap>, - synonyms, - "synonyms" -); +//make_setting_route!( + //"/synonyms", + //std::collections::BTreeMap>, + //synonyms, + //"synonyms" +//); -make_setting_route!( - "/distinct-attribute", - String, - distinct_attribute, - "distinctAttribute" -); +//make_setting_route!( + //"/distinct-attribute", + //String, + //distinct_attribute, + //"distinctAttribute" +//); -make_setting_route!("/ranking-rules", Vec, ranking_rules, "rankingRules"); +//make_setting_route!("/ranking-rules", Vec, ranking_rules, "rankingRules"); -macro_rules! generate_configure { - ($($mod:ident),*) => { - pub fn configure(cfg: &mut web::ServiceConfig) { - cfg.service( - web::resource("") - .route(web::post().to(update_all)) - .route(web::get().to(get_all)) - .route(web::delete().to(delete_all))) - $(.service($mod::resources()))*; - } - }; -} +//macro_rules! generate_configure { + //($($mod:ident),*) => { + //pub fn configure(cfg: &mut web::ServiceConfig) { + //cfg.service( + //web::resource("") + ////.route(web::post().to(update_all)) + //.route(web::get().to(get_all)) + ////.route(web::delete().to(delete_all))) + //$(.service($mod::resources()))*; + //} + //}; +//} -generate_configure!( - filterable_attributes, - sortable_attributes, - displayed_attributes, - searchable_attributes, - distinct_attribute, - stop_words, - synonyms, - ranking_rules -); +//generate_configure!( + //filterable_attributes, + //sortable_attributes, + //displayed_attributes, + //searchable_attributes, + //distinct_attribute, + //stop_words, + //synonyms, + //ranking_rules +//); -pub async fn update_all( - data: GuardedData, - index_uid: web::Path, - body: web::Json>, -) -> Result { - let settings = body.into_inner().check(); - let update_result = data - .update_settings(index_uid.into_inner(), settings, true) - .await?; - let json = serde_json::json!({ "updateId": update_result.id() }); - debug!("returns: {:?}", json); - Ok(HttpResponse::Accepted().json(json)) -} +//pub async fn update_all( + //data: GuardedData, + //index_uid: web::Path, + //body: web::Json>, +//) -> Result { + //let settings = body.into_inner().check(); + //let update_result = data + //.update_settings(index_uid.into_inner(), settings, true) + //.await?; + //let json = serde_json::json!({ "updateId": update_result.id() }); + //debug!("returns: {:?}", json); + //Ok(HttpResponse::Accepted().json(json)) +//} -pub async fn get_all( - data: GuardedData, - index_uid: web::Path, -) -> Result { - let settings = data.settings(index_uid.into_inner()).await?; - debug!("returns: {:?}", settings); - Ok(HttpResponse::Ok().json(settings)) -} +//pub async fn get_all( + //data: GuardedData, + //index_uid: web::Path, +//) -> Result { + //let settings = data.settings(index_uid.into_inner()).await?; + //debug!("returns: {:?}", settings); + //Ok(HttpResponse::Ok().json(settings)) +//} -pub async fn delete_all( - data: GuardedData, - index_uid: web::Path, -) -> Result { - let settings = Settings::cleared(); - let update_result = data - .update_settings(index_uid.into_inner(), settings, false) - .await?; - let json = serde_json::json!({ "updateId": update_result.id() }); - debug!("returns: {:?}", json); - Ok(HttpResponse::Accepted().json(json)) -} +//pub async fn delete_all( + //data: GuardedData, + //index_uid: web::Path, +//) -> Result { + //let settings = Settings::cleared(); + //let update_result = data + //.update_settings(index_uid.into_inner(), settings, false) + //.await?; + //let json = serde_json::json!({ "updateId": update_result.id() }); + //debug!("returns: {:?}", json); + //Ok(HttpResponse::Accepted().json(json)) +//} diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 2bacf9ed6..e6119ffe9 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -8,7 +8,8 @@ use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::index::{Settings, Unchecked}; -use crate::index_controller::{UpdateMeta, UpdateResult, UpdateStatus}; +use crate::index_controller::update_actor::RegisterUpdate; +use crate::index_controller::{UpdateResult, UpdateStatus}; use crate::Data; mod dump; @@ -50,7 +51,7 @@ impl From<&UpdateStatus> for UpdateType { fn from(other: &UpdateStatus) -> Self { use milli::update::IndexDocumentsMethod::*; match other.meta() { - UpdateMeta::DocumentsAddition { method, .. } => { + RegisterUpdate::DocumentAddition{ method, .. } => { let number = match other { UpdateStatus::Processed(processed) => match processed.success { UpdateResult::DocumentsAddition(ref addition) => { @@ -67,13 +68,13 @@ impl From<&UpdateStatus> for UpdateType { _ => unreachable!(), } } - UpdateMeta::ClearDocuments => UpdateType::ClearAll, - UpdateMeta::DeleteDocuments { ids } => UpdateType::DocumentsDeletion { - number: Some(ids.len()), - }, - UpdateMeta::Settings(settings) => UpdateType::Settings { - settings: settings.clone(), - }, + //UpdateMeta::ClearDocuments => UpdateType::ClearAll, + //UpdateMeta::DeleteDocuments { ids } => UpdateType::DocumentsDeletion { + //number: Some(ids.len()), + //}, + //UpdateMeta::Settings(settings) => UpdateType::Settings { + //settings: settings.clone(), + //}, } } } From 09d4e37044829aa106a9924d726aa21e9d88c48e Mon Sep 17 00:00:00 2001 From: mpostma Date: Mon, 20 Sep 2021 15:31:03 +0200 Subject: [PATCH 02/37] split data and api keys --- meilisearch-http/src/data/mod.rs | 49 +-------------------------- meilisearch-http/src/lib.rs | 54 +++++++++++++++++++++++++----- meilisearch-http/src/main.rs | 11 +++--- meilisearch-http/src/routes/mod.rs | 6 ++-- 4 files changed, 56 insertions(+), 64 deletions(-) diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index c0e83155c..a4cd274ff 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -1,8 +1,6 @@ use std::ops::Deref; use std::sync::Arc; -use sha2::Digest; - use crate::index::{Checked, Settings}; use crate::index_controller::{ error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats, @@ -27,32 +25,7 @@ impl Deref for Data { pub struct DataInner { pub index_controller: IndexController, - pub api_keys: ApiKeys, - options: Opt, -} - -#[derive(Clone)] -pub struct ApiKeys { - pub public: Option, - pub private: Option, - pub master: Option, -} - -impl ApiKeys { - pub fn generate_missing_api_keys(&mut self) { - if let Some(master_key) = &self.master { - if self.private.is_none() { - let key = format!("{}-private", master_key); - let sha = sha2::Sha256::digest(key.as_bytes()); - self.private = Some(format!("{:x}", sha)); - } - if self.public.is_none() { - let key = format!("{}-public", master_key); - let sha = sha2::Sha256::digest(key.as_bytes()); - self.public = Some(format!("{:x}", sha)); - } - } - } + //pub api_keys: ApiKeys, } impl Data { @@ -61,18 +34,8 @@ impl Data { let index_controller = IndexController::new(&path, &options)?; - let mut api_keys = ApiKeys { - master: options.clone().master_key, - private: None, - public: None, - }; - - api_keys.generate_missing_api_keys(); - let inner = DataInner { index_controller, - api_keys, - options, }; let inner = Arc::new(inner); @@ -120,14 +83,4 @@ impl Data { pub async fn dump_status(&self, uid: String) -> Result { Ok(self.index_controller.dump_info(uid).await?) } - - #[inline] - pub fn http_payload_size_limit(&self) -> usize { - self.options.http_payload_size_limit.get_bytes() as usize - } - - #[inline] - pub fn api_keys(&self) -> &ApiKeys { - &self.api_keys - } } diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 8bd16ecc5..af7e776d7 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -58,11 +58,41 @@ use actix_web::web; use extractors::authentication::policies::*; use extractors::payload::PayloadConfig; +use sha2::Digest; -pub fn configure_data(config: &mut web::ServiceConfig, data: Data) { - let http_payload_size_limit = data.http_payload_size_limit(); +#[derive(Clone)] +pub struct ApiKeys { + pub public: Option, + pub private: Option, + pub master: Option, +} + +impl ApiKeys { + pub fn generate_missing_api_keys(&mut self) { + if let Some(master_key) = &self.master { + if self.private.is_none() { + let key = format!("{}-private", master_key); + let sha = sha2::Sha256::digest(key.as_bytes()); + self.private = Some(format!("{:x}", sha)); + } + if self.public.is_none() { + let key = format!("{}-public", master_key); + let sha = sha2::Sha256::digest(key.as_bytes()); + self.public = Some(format!("{:x}", sha)); + } + } + } +} + +pub fn configure_data( + config: &mut web::ServiceConfig, + data: Data, + opt: &Opt, + ) { + let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config .app_data(web::Data::new(data.clone())) + // TODO!: Why are we passing the data with two different things? .app_data(data) .app_data( web::JsonConfig::default() @@ -77,8 +107,15 @@ pub fn configure_data(config: &mut web::ServiceConfig, data: Data) { ); } -pub fn configure_auth(config: &mut web::ServiceConfig, data: &Data) { - let keys = data.api_keys(); +pub fn configure_auth(config: &mut web::ServiceConfig, opts: &Opt) { + let mut keys = ApiKeys { + master: opts.master_key.clone(), + private: None, + public: None, + }; + + keys.generate_missing_api_keys(); + let auth_config = if let Some(ref master_key) = keys.master { let private_key = keys.private.as_ref().unwrap(); let public_key = keys.public.as_ref().unwrap(); @@ -94,7 +131,8 @@ pub fn configure_auth(config: &mut web::ServiceConfig, data: &Data) { AuthConfig::NoAuth }; - config.app_data(auth_config); + config.app_data(auth_config) + .app_data(keys); } #[cfg(feature = "mini-dashboard")] @@ -138,7 +176,7 @@ pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) { #[macro_export] macro_rules! create_app { - ($data:expr, $enable_frontend:expr) => {{ + ($data:expr, $enable_frontend:expr, $opt:expr) => {{ use actix_cors::Cors; use actix_web::middleware::TrailingSlash; use actix_web::App; @@ -147,8 +185,8 @@ macro_rules! create_app { use meilisearch_http::{configure_auth, configure_data, dashboard}; App::new() - .configure(|s| configure_data(s, $data.clone())) - .configure(|s| configure_auth(s, &$data)) + .configure(|s| configure_data(s, $data.clone(), &$opt)) + .configure(|s| configure_auth(s, &$opt)) .configure(routes::configure) .configure(|s| dashboard(s, $enable_frontend)) .wrap( diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index daa18f480..3c796f29d 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -64,7 +64,7 @@ async fn main() -> anyhow::Result<()> { tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt)); } - print_launch_resume(&opt, &data); + print_launch_resume(&opt); run_http(data, opt).await?; @@ -73,7 +73,8 @@ async fn main() -> anyhow::Result<()> { async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> { let _enable_dashboard = &opt.env == "development"; - let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard)) + let opt_clone = opt.clone(); + let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone)) // Disable signals allows the server to terminate immediately when a user enter CTRL-C .disable_signals(); @@ -83,12 +84,12 @@ async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> { .run() .await?; } else { - http_server.bind(opt.http_addr)?.run().await?; + http_server.bind(&opt.http_addr)?.run().await?; } Ok(()) } -pub fn print_launch_resume(opt: &Opt, data: &Data) { +pub fn print_launch_resume(opt: &Opt) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); @@ -133,7 +134,7 @@ Anonymous telemetry: \"Enabled\"" eprintln!(); - if data.api_keys().master.is_some() { + if opt.master_key.is_some() { eprintln!("A Master Key has been set. Requests to MeiliSearch won't be authorized unless you provide an authentication key."); } else { eprintln!("No master key found; The server will accept unidentified requests. \ diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index e6119ffe9..12b0612ad 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -10,7 +10,7 @@ use crate::extractors::authentication::{policies::*, GuardedData}; use crate::index::{Settings, Unchecked}; use crate::index_controller::update_actor::RegisterUpdate; use crate::index_controller::{UpdateResult, UpdateStatus}; -use crate::Data; +use crate::{ApiKeys, Data}; mod dump; mod indexes; @@ -262,8 +262,8 @@ struct KeysResponse { public: Option, } -pub async fn list_keys(data: GuardedData) -> HttpResponse { - let api_keys = data.api_keys.clone(); +pub async fn list_keys(data: GuardedData) -> HttpResponse { + let api_keys = (*data).clone(); HttpResponse::Ok().json(&KeysResponse { private: api_keys.private, public: api_keys.public, From 60518449fc16f6738f2595687be861cb8bda2370 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 21 Sep 2021 13:23:22 +0200 Subject: [PATCH 03/37] split meilisearch-http and meilisearch-lib --- Cargo.lock | 79 ++++++++++++ Cargo.toml | 1 + meilisearch-error/Cargo.toml | 1 + meilisearch-error/src/lib.rs | 2 + meilisearch-http/Cargo.toml | 1 + meilisearch-http/src/analytics.rs | 8 +- meilisearch-http/src/data/mod.rs | 86 ------------- meilisearch-http/src/data/search.rs | 34 ------ meilisearch-http/src/data/updates.rs | 32 ----- meilisearch-http/src/error.rs | 12 -- meilisearch-http/src/lib.rs | 11 +- meilisearch-http/src/main.rs | 31 ++++- meilisearch-http/src/option.rs | 52 +------- meilisearch-http/src/routes/dump.rs | 8 +- .../src/routes/indexes/documents.rs | 22 ++-- meilisearch-http/src/routes/indexes/mod.rs | 23 ++-- meilisearch-http/src/routes/indexes/search.rs | 8 +- .../src/routes/indexes/settings.rs | 6 +- .../src/routes/indexes/updates.rs | 10 +- meilisearch-http/src/routes/mod.rs | 22 ++-- meilisearch-lib/Cargo.toml | 72 +++++++++++ meilisearch-lib/src/error.rs | 62 ++++++++++ .../src/index/dump.rs | 2 +- .../src/index/error.rs | 0 .../src/index/mod.rs | 2 +- .../src/index/search.rs | 0 .../src/index/update_handler.rs | 4 +- .../src/index/updates.rs | 0 .../src/index_controller/dump_actor/actor.rs | 0 .../src/index_controller/dump_actor/error.rs | 0 .../dump_actor/handle_impl.rs | 0 .../dump_actor/loaders/mod.rs | 0 .../index_controller/dump_actor/loaders/v1.rs | 2 +- .../index_controller/dump_actor/loaders/v2.rs | 2 +- .../index_controller/dump_actor/message.rs | 0 .../src/index_controller/dump_actor/mod.rs | 7 +- .../src/index_controller/error.rs | 0 .../src/index_controller/index_actor/actor.rs | 6 +- .../src/index_controller/index_actor/error.rs | 0 .../index_actor/handle_impl.rs | 2 +- .../index_controller/index_actor/message.rs | 0 .../src/index_controller/index_actor/mod.rs | 0 .../src/index_controller/index_actor/store.rs | 0 .../src/index_controller/mod.rs | 110 +++++++++++++---- .../src/index_controller/snapshot.rs | 4 +- .../index_controller/update_actor/actor.rs | 0 .../index_controller/update_actor/error.rs | 0 .../update_actor/handle_impl.rs | 1 + .../index_controller/update_actor/message.rs | 0 .../src/index_controller/update_actor/mod.rs | 0 .../update_actor/store/codec.rs | 0 .../update_actor/store/dump.rs | 0 .../update_actor/store/mod.rs | 4 +- .../src/index_controller/update_file_store.rs | 0 .../src/index_controller/updates.rs | 33 +++-- .../index_controller/uuid_resolver/actor.rs | 0 .../index_controller/uuid_resolver/error.rs | 0 .../uuid_resolver/handle_impl.rs | 0 .../index_controller/uuid_resolver/message.rs | 0 .../src/index_controller/uuid_resolver/mod.rs | 0 .../index_controller/uuid_resolver/store.rs | 2 +- meilisearch-lib/src/lib.rs | 53 ++++++++ meilisearch-lib/src/options.rs | 115 ++++++++++++++++++ 63 files changed, 608 insertions(+), 324 deletions(-) delete mode 100644 meilisearch-http/src/data/mod.rs delete mode 100644 meilisearch-http/src/data/search.rs delete mode 100644 meilisearch-http/src/data/updates.rs create mode 100644 meilisearch-lib/Cargo.toml create mode 100644 meilisearch-lib/src/error.rs rename {meilisearch-http => meilisearch-lib}/src/index/dump.rs (99%) rename {meilisearch-http => meilisearch-lib}/src/index/error.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index/mod.rs (99%) rename {meilisearch-http => meilisearch-lib}/src/index/search.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index/update_handler.rs (96%) rename {meilisearch-http => meilisearch-lib}/src/index/updates.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/actor.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/error.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/handle_impl.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/loaders/mod.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/loaders/v1.rs (99%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/loaders/v2.rs (97%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/message.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/dump_actor/mod.rs (96%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/error.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/index_actor/actor.rs (99%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/index_actor/error.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/index_actor/handle_impl.rs (99%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/index_actor/message.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/index_actor/mod.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/index_actor/store.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/mod.rs (82%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/snapshot.rs (98%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/actor.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/error.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/handle_impl.rs (99%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/message.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/mod.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/store/codec.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/store/dump.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_actor/store/mod.rs (99%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/update_file_store.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/updates.rs (89%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/uuid_resolver/actor.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/uuid_resolver/error.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/uuid_resolver/handle_impl.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/uuid_resolver/message.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/uuid_resolver/mod.rs (100%) rename {meilisearch-http => meilisearch-lib}/src/index_controller/uuid_resolver/store.rs (99%) create mode 100644 meilisearch-lib/src/lib.rs create mode 100644 meilisearch-lib/src/options.rs diff --git a/Cargo.lock b/Cargo.lock index 809535e1d..33660f836 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -872,6 +872,15 @@ dependencies = [ "termcolor", ] +[[package]] +name = "erased-serde" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa" +dependencies = [ + "serde", +] + [[package]] name = "fake-simd" version = "0.1.2" @@ -1571,6 +1580,7 @@ name = "meilisearch-error" version = "0.22.0" dependencies = [ "actix-http", + "serde", ] [[package]] @@ -1606,6 +1616,7 @@ dependencies = [ "log", "main_error", "meilisearch-error", + "meilisearch-lib", "meilisearch-tokenizer", "memmap", "milli", @@ -1646,6 +1657,74 @@ dependencies = [ "zip", ] +[[package]] +name = "meilisearch-lib" +version = "0.1.0" +dependencies = [ + "actix-cors", + "actix-rt", + "actix-web", + "actix-web-static-files", + "anyhow", + "arc-swap", + "assert-json-diff", + "async-stream", + "async-trait", + "byte-unit", + "bytes", + "chrono", + "crossbeam-channel", + "either", + "env_logger", + "erased-serde", + "flate2", + "fst", + "futures", + "futures-util", + "heed", + "http", + "indexmap", + "itertools", + "log", + "main_error", + "meilisearch-error", + "meilisearch-tokenizer", + "memmap", + "milli", + "mime", + "mockall", + "num_cpus", + "obkv", + "once_cell", + "parking_lot", + "paste", + "pin-project", + "rand 0.8.4", + "rayon", + "regex", + "reqwest", + "rustls", + "serde", + "serde_json", + "serde_url_params", + "serdeval", + "sha2", + "siphasher", + "slice-group-by", + "structopt", + "sysinfo", + "tar", + "tempdir", + "tempfile", + "thiserror", + "tokio", + "tokio-stream", + "urlencoding", + "uuid", + "walkdir", + "whoami", +] + [[package]] name = "meilisearch-tokenizer" version = "0.2.5" diff --git a/Cargo.toml b/Cargo.toml index a1dca038e..fc64a107f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "meilisearch-http", "meilisearch-error", + "meilisearch-lib", ] [profile.release] diff --git a/meilisearch-error/Cargo.toml b/meilisearch-error/Cargo.toml index 810270183..612e92821 100644 --- a/meilisearch-error/Cargo.toml +++ b/meilisearch-error/Cargo.toml @@ -6,3 +6,4 @@ edition = "2018" [dependencies] actix-http = "=3.0.0-beta.10" +serde = { version = "1.0.130", features = ["derive"] } diff --git a/meilisearch-error/src/lib.rs b/meilisearch-error/src/lib.rs index 5e08317a9..9d5b79f69 100644 --- a/meilisearch-error/src/lib.rs +++ b/meilisearch-error/src/lib.rs @@ -1,6 +1,7 @@ use std::fmt; use actix_http::http::StatusCode; +use serde::{Serialize, Deserialize}; pub trait ErrorCode: std::error::Error { fn error_code(&self) -> Code; @@ -45,6 +46,7 @@ impl fmt::Display for ErrorType { } } +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] pub enum Code { // index related error CreateIndex, diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 02e72668b..eb3d550ab 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -45,6 +45,7 @@ indexmap = { version = "1.7.0", features = ["serde-1"] } itertools = "0.10.1" log = "0.4.14" main_error = "0.1.1" +meilisearch-lib = { path = "../meilisearch-lib" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 11347175b..8d91c9e9c 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -4,8 +4,8 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use log::debug; use serde::Serialize; use siphasher::sip::SipHasher; +use meilisearch_lib::MeiliSearch; -use crate::Data; use crate::Opt; const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47"; @@ -18,8 +18,8 @@ struct EventProperties { } impl EventProperties { - async fn from(data: Data) -> anyhow::Result { - let stats = data.index_controller.get_all_stats().await?; + async fn from(data: MeiliSearch) -> anyhow::Result { + let stats = data.get_all_stats().await?; let database_size = stats.database_size; let last_update_timestamp = stats.last_update.map(|u| u.timestamp()); @@ -62,7 +62,7 @@ struct AmplitudeRequest<'a> { events: Vec>, } -pub async fn analytics_sender(data: Data, opt: Opt) { +pub async fn analytics_sender(data: MeiliSearch, opt: Opt) { let username = whoami::username(); let hostname = whoami::hostname(); let platform = whoami::platform(); diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs deleted file mode 100644 index a4cd274ff..000000000 --- a/meilisearch-http/src/data/mod.rs +++ /dev/null @@ -1,86 +0,0 @@ -use std::ops::Deref; -use std::sync::Arc; - -use crate::index::{Checked, Settings}; -use crate::index_controller::{ - error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats, -}; -use crate::option::Opt; - -pub mod search; -mod updates; - -#[derive(Clone)] -pub struct Data { - inner: Arc, -} - -impl Deref for Data { - type Target = DataInner; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -pub struct DataInner { - pub index_controller: IndexController, - //pub api_keys: ApiKeys, -} - -impl Data { - pub fn new(options: Opt) -> anyhow::Result { - let path = options.db_path.clone(); - - let index_controller = IndexController::new(&path, &options)?; - - let inner = DataInner { - index_controller, - }; - let inner = Arc::new(inner); - - Ok(Data { inner }) - } - - pub async fn settings(&self, uid: String) -> Result> { - self.index_controller.settings(uid).await - } - - pub async fn list_indexes(&self) -> Result> { - self.index_controller.list_indexes().await - } - - pub async fn index(&self, uid: String) -> Result { - self.index_controller.get_index(uid).await - } - - //pub async fn create_index( - //&self, - //uid: String, - //primary_key: Option, - //) -> Result { - //let settings = IndexSettings { - //uid: Some(uid), - //primary_key, - //}; - - //let meta = self.index_controller.create_index(settings).await?; - //Ok(meta) - //} - - pub async fn get_index_stats(&self, uid: String) -> Result { - Ok(self.index_controller.get_index_stats(uid).await?) - } - - pub async fn get_all_stats(&self) -> Result { - Ok(self.index_controller.get_all_stats().await?) - } - - pub async fn create_dump(&self) -> Result { - Ok(self.index_controller.create_dump().await?) - } - - pub async fn dump_status(&self, uid: String) -> Result { - Ok(self.index_controller.dump_info(uid).await?) - } -} diff --git a/meilisearch-http/src/data/search.rs b/meilisearch-http/src/data/search.rs deleted file mode 100644 index 5ad8d4a07..000000000 --- a/meilisearch-http/src/data/search.rs +++ /dev/null @@ -1,34 +0,0 @@ -use serde_json::{Map, Value}; - -use super::Data; -use crate::index::{SearchQuery, SearchResult}; -use crate::index_controller::error::Result; - -impl Data { - pub async fn search(&self, index: String, search_query: SearchQuery) -> Result { - self.index_controller.search(index, search_query).await - } - - pub async fn retrieve_documents( - &self, - index: String, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result>> { - self.index_controller - .documents(index, offset, limit, attributes_to_retrieve) - .await - } - - pub async fn retrieve_document( - &self, - index: String, - document_id: String, - attributes_to_retrieve: Option>, - ) -> Result> { - self.index_controller - .document(index, document_id, attributes_to_retrieve) - .await - } -} diff --git a/meilisearch-http/src/data/updates.rs b/meilisearch-http/src/data/updates.rs deleted file mode 100644 index 8228cd2b2..000000000 --- a/meilisearch-http/src/data/updates.rs +++ /dev/null @@ -1,32 +0,0 @@ -use crate::index_controller::Update; -use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus}; -use crate::Data; - -impl Data { - pub async fn register_update(&self, index_uid: &str, update: Update) -> Result { - let status = self.index_controller.register_update(index_uid, update).await?; - Ok(status) - } - - pub async fn get_update_status(&self, index: String, uid: u64) -> Result { - self.index_controller.update_status(index, uid).await - } - - pub async fn get_updates_status(&self, index: String) -> Result> { - self.index_controller.all_update_status(index).await - } - - pub async fn update_index( - &self, - uid: String, - primary_key: Option, - new_uid: Option, - ) -> Result { - let settings = IndexSettings { - uid: new_uid, - primary_key, - }; - - self.index_controller.update_index(uid, settings).await - } -} diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 61b8dbcd9..c18c32ea5 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -55,18 +55,6 @@ impl aweb::error::ResponseError for ResponseError { } } -macro_rules! internal_error { - ($target:ty : $($other:path), *) => { - $( - impl From<$other> for $target { - fn from(other: $other) -> Self { - Self::Internal(Box::new(other)) - } - } - )* - } -} - #[derive(Debug)] pub struct MilliError<'a>(pub &'a milli::Error); diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index af7e776d7..307bbcefa 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -38,7 +38,6 @@ //! Most of the routes use [extractors] to handle the authentication. #![allow(rustdoc::private_intra_doc_links)] -pub mod data; #[macro_use] pub mod error; #[macro_use] @@ -46,11 +45,8 @@ pub mod extractors; #[cfg(all(not(debug_assertions), feature = "analytics"))] pub mod analytics; pub mod helpers; -mod index; -mod index_controller; pub mod option; pub mod routes; -pub use self::data::Data; use crate::extractors::authentication::AuthConfig; pub use option::Opt; @@ -58,6 +54,7 @@ use actix_web::web; use extractors::authentication::policies::*; use extractors::payload::PayloadConfig; +use meilisearch_lib::MeiliSearch; use sha2::Digest; #[derive(Clone)] @@ -86,14 +83,14 @@ impl ApiKeys { pub fn configure_data( config: &mut web::ServiceConfig, - data: Data, + data: MeiliSearch, opt: &Opt, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config - .app_data(web::Data::new(data.clone())) - // TODO!: Why are we passing the data with two different things? .app_data(data) + // TODO!: Why are we passing the data with two different things? + //.app_data(data) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 3c796f29d..77f439d05 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,7 +1,8 @@ use std::env; use actix_web::HttpServer; -use meilisearch_http::{create_app, Data, Opt}; +use meilisearch_http::{create_app, Opt}; +use meilisearch_lib::MeiliSearch; use structopt::StructOpt; #[cfg(all(not(debug_assertions), feature = "analytics"))] @@ -39,6 +40,26 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { Ok(()) } +fn setup_meilisearch(opt: &Opt) -> anyhow::Result { + let mut meilisearch = MeiliSearch::builder(); + meilisearch + .set_max_index_size(opt.max_index_size.get_bytes() as usize) + .set_max_update_store_size(opt.max_udb_size.get_bytes() as usize) + .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) + .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) + .set_dump_dst(opt.dumps_dir.clone()) + .set_snapshot_dir(opt.snapshot_dir.clone()); + + if let Some(ref path) = opt.import_snapshot { + meilisearch.set_import_snapshot(path.clone()); + } + if let Some(ref path) = opt.import_dump { + meilisearch.set_dump_src(path.clone()); + } + + meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) +} + #[actix_web::main] async fn main() -> anyhow::Result<()> { let opt = Opt::from_args(); @@ -55,23 +76,23 @@ async fn main() -> anyhow::Result<()> { _ => unreachable!(), } - let data = Data::new(opt.clone())?; + let meilisearch = setup_meilisearch(&opt)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] if !opt.no_analytics { - let analytics_data = data.clone(); + let analytics_data = meilisearch.clone(); let analytics_opt = opt.clone(); tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt)); } print_launch_resume(&opt); - run_http(data, opt).await?; + run_http(meilisearch, opt).await?; Ok(()) } -async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> { +async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> { let _enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone)) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 3a0ab8acb..2a4d425e9 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -8,7 +8,6 @@ use std::sync::Arc; use std::fs; use byte_unit::Byte; -use milli::CompressionType; use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use rustls::{ AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth, @@ -16,56 +15,7 @@ use rustls::{ }; use structopt::StructOpt; use sysinfo::{RefreshKind, System, SystemExt}; - -#[derive(Debug, Clone, StructOpt)] -pub struct IndexerOpts { - /// The amount of documents to skip before printing - /// a log regarding the indexing advancement. - #[structopt(long, default_value = "100000")] // 100k - pub log_every_n: usize, - - /// Grenad max number of chunks in bytes. - #[structopt(long)] - pub max_nb_chunks: Option, - - /// The maximum amount of memory the indexer will use. It defaults to 2/3 - /// of the available memory. It is recommended to use something like 80%-90% - /// of the available memory, no more. - /// - /// In case the engine is unable to retrieve the available memory the engine will - /// try to use the memory it needs but without real limit, this can lead to - /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[structopt(long, default_value)] - pub max_memory: MaxMemory, - - /// The name of the compression algorithm to use when compressing intermediate - /// Grenad chunks while indexing documents. - /// - /// Choosing a fast algorithm will make the indexing faster but may consume more memory. - #[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] - pub chunk_compression_type: CompressionType, - - /// The level of compression of the chosen algorithm. - #[structopt(long, requires = "chunk-compression-type")] - pub chunk_compression_level: Option, - - /// Number of parallel jobs for indexing, defaults to # of CPUs. - #[structopt(long)] - pub indexing_jobs: Option, -} - -impl Default for IndexerOpts { - fn default() -> Self { - Self { - log_every_n: 100_000, - max_nb_chunks: None, - max_memory: MaxMemory::default(), - chunk_compression_type: CompressionType::None, - chunk_compression_level: None, - indexing_jobs: None, - } - } -} +use meilisearch_lib::options::IndexerOpts; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 72bc55986..a598f875b 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -1,17 +1,17 @@ use actix_web::{web, HttpResponse}; use log::debug; +use meilisearch_lib::MeiliSearch; use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::Data; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(create_dump))) .service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status))); } -pub async fn create_dump(data: GuardedData) -> Result { +pub async fn create_dump(data: GuardedData) -> Result { let res = data.create_dump().await?; debug!("returns: {:?}", res); @@ -30,10 +30,10 @@ struct DumpParam { } async fn get_dump_status( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { - let res = data.dump_status(path.dump_uid.clone()).await?; + let res = data.dump_info(path.dump_uid.clone()).await?; debug!("returns: {:?}", res); Ok(HttpResponse::Ok().json(res)) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index be80a55a0..b7d13d16b 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -3,6 +3,8 @@ use actix_web::{web, HttpResponse}; use actix_web::web::Bytes; use futures::{Stream, StreamExt}; use log::debug; +use meilisearch_lib::MeiliSearch; +use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update}; use milli::update::IndexDocumentsMethod; use serde::Deserialize; //use serde_json::Value; @@ -11,9 +13,7 @@ use tokio::sync::mpsc; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::payload::Payload; -use crate::index_controller::{DocumentAdditionFormat, Update}; use crate::routes::IndexParam; -use crate::Data; const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0; const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20; @@ -88,20 +88,20 @@ pub fn configure(cfg: &mut web::ServiceConfig) { } pub async fn get_document( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { let index = path.index_uid.clone(); let id = path.document_id.clone(); let document = data - .retrieve_document(index, id, None as Option>) + .document(index, id, None as Option>) .await?; debug!("returns: {:?}", document); Ok(HttpResponse::Ok().json(document)) } //pub async fn delete_document( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //) -> Result { //let update_status = data @@ -120,7 +120,7 @@ pub struct BrowseQuery { } pub async fn get_all_documents( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, ) -> Result { @@ -137,7 +137,7 @@ pub async fn get_all_documents( }); let documents = data - .retrieve_documents( + .documents( path.index_uid.clone(), params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET), params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT), @@ -157,7 +157,7 @@ pub struct UpdateDocumentsQuery { /// Route used when the payload type is "application/json" /// Used to add or replace documents pub async fn add_documents( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, body: Payload, @@ -180,7 +180,7 @@ pub async fn add_documents( /// Route used when the payload type is "application/json" /// Used to add or replace documents pub async fn update_documents( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, body: Payload, @@ -201,7 +201,7 @@ pub async fn update_documents( } //pub async fn delete_documents( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //body: web::Json>, //) -> Result { @@ -221,7 +221,7 @@ pub async fn update_documents( //} //pub async fn clear_all_documents( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //) -> Result { //let update_status = data.clear_documents(path.index_uid.clone()).await?; diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index ef68215b4..da7008640 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -1,12 +1,13 @@ use actix_web::{web, HttpResponse}; use chrono::{DateTime, Utc}; use log::debug; +use meilisearch_lib::MeiliSearch; +use meilisearch_lib::index_controller::IndexSettings; use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::routes::IndexParam; -use crate::Data; pub mod documents; pub mod search; @@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -pub async fn list_indexes(data: GuardedData) -> Result { +pub async fn list_indexes(data: GuardedData) -> Result { let indexes = data.list_indexes().await?; debug!("returns: {:?}", indexes); Ok(HttpResponse::Ok().json(indexes)) @@ -49,7 +50,7 @@ pub struct IndexCreateRequest { } //pub async fn create_index( - //data: GuardedData, + //data: GuardedData, //body: web::Json, //) -> Result { //let body = body.into_inner(); @@ -75,30 +76,34 @@ pub struct UpdateIndexResponse { } pub async fn get_index( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { - let meta = data.index(path.index_uid.clone()).await?; + let meta = data.get_index(path.index_uid.clone()).await?; debug!("returns: {:?}", meta); Ok(HttpResponse::Ok().json(meta)) } pub async fn update_index( - data: GuardedData, + data: GuardedData, path: web::Path, body: web::Json, ) -> Result { debug!("called with params: {:?}", body); let body = body.into_inner(); + let settings = IndexSettings { + uid: body.uid, + primary_key: body.primary_key, + }; let meta = data - .update_index(path.into_inner().index_uid, body.primary_key, body.uid) + .update_index(path.into_inner().index_uid, settings) .await?; debug!("returns: {:?}", meta); Ok(HttpResponse::Ok().json(meta)) } //pub async fn delete_index( - //data: GuardedData, + //data: GuardedData, //path: web::Path, //) -> Result { //data.delete_index(path.index_uid.clone()).await?; @@ -106,7 +111,7 @@ pub async fn update_index( //} pub async fn get_index_stats( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { let response = data.get_index_stats(path.index_uid.clone()).await?; diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 83f58648e..2b68e6ed6 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -1,13 +1,13 @@ use actix_web::{web, HttpResponse}; use log::debug; +use meilisearch_lib::MeiliSearch; +use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use serde::Deserialize; use serde_json::Value; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use crate::routes::IndexParam; -use crate::Data; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( @@ -82,7 +82,7 @@ impl From for SearchQuery { } pub async fn search_with_url_query( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Query, ) -> Result { @@ -99,7 +99,7 @@ pub async fn search_with_url_query( } pub async fn search_with_post( - data: GuardedData, + data: GuardedData, path: web::Path, params: web::Json, ) -> Result { diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 051483b20..07a96003f 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -148,7 +148,7 @@ //); //pub async fn update_all( - //data: GuardedData, + //data: GuardedData, //index_uid: web::Path, //body: web::Json>, //) -> Result { @@ -162,7 +162,7 @@ //} //pub async fn get_all( - //data: GuardedData, + //data: GuardedData, //index_uid: web::Path, //) -> Result { //let settings = data.settings(index_uid.into_inner()).await?; @@ -171,7 +171,7 @@ //} //pub async fn delete_all( - //data: GuardedData, + //data: GuardedData, //index_uid: web::Path, //) -> Result { //let settings = Settings::cleared(); diff --git a/meilisearch-http/src/routes/indexes/updates.rs b/meilisearch-http/src/routes/indexes/updates.rs index 471636abf..547977790 100644 --- a/meilisearch-http/src/routes/indexes/updates.rs +++ b/meilisearch-http/src/routes/indexes/updates.rs @@ -1,12 +1,12 @@ use actix_web::{web, HttpResponse}; use chrono::{DateTime, Utc}; use log::debug; +use meilisearch_lib::MeiliSearch; use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::routes::{IndexParam, UpdateStatusResponse}; -use crate::Data; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::get().to(get_all_updates_status))) @@ -37,12 +37,12 @@ pub struct UpdateParam { } pub async fn get_update_status( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { let params = path.into_inner(); let meta = data - .get_update_status(params.index_uid, params.update_id) + .update_status(params.index_uid, params.update_id) .await?; let meta = UpdateStatusResponse::from(meta); debug!("returns: {:?}", meta); @@ -50,10 +50,10 @@ pub async fn get_update_status( } pub async fn get_all_updates_status( - data: GuardedData, + data: GuardedData, path: web::Path, ) -> Result { - let metas = data.get_updates_status(path.into_inner().index_uid).await?; + let metas = data.all_update_status(path.into_inner().index_uid).await?; let metas = metas .into_iter() .map(UpdateStatusResponse::from) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 12b0612ad..6c99d1766 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -5,12 +5,12 @@ use chrono::{DateTime, Utc}; use log::debug; use serde::{Deserialize, Serialize}; +use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate}; +use meilisearch_lib::index::{Settings, Unchecked}; + use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::index::{Settings, Unchecked}; -use crate::index_controller::update_actor::RegisterUpdate; -use crate::index_controller::{UpdateResult, UpdateStatus}; -use crate::{ApiKeys, Data}; +use crate::ApiKeys; mod dump; mod indexes; @@ -187,15 +187,17 @@ impl From for UpdateStatusResponse { let duration = Duration::from_millis(duration as u64).as_secs_f64(); let update_id = failed.id(); - let response = failed.error; + let processed_at = failed.failed_at; + let enqueued_at = failed.from.from.enqueued_at; + let response = failed.into(); let content = FailedUpdateResult { update_id, update_type, response, duration, - enqueued_at: failed.from.from.enqueued_at, - processed_at: failed.failed_at, + enqueued_at, + processed_at, }; UpdateStatusResponse::Failed { content } } @@ -230,7 +232,7 @@ pub async fn running() -> HttpResponse { HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" })) } -async fn get_stats(data: GuardedData) -> Result { +async fn get_stats(data: GuardedData) -> Result { let response = data.get_all_stats().await?; debug!("returns: {:?}", response); @@ -245,7 +247,7 @@ struct VersionResponse { pkg_version: String, } -async fn get_version(_data: GuardedData) -> HttpResponse { +async fn get_version(_data: GuardedData) -> HttpResponse { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); @@ -288,7 +290,7 @@ mod test { macro_rules! impl_is_policy { ($($param:ident)*) => { impl Is for Func - where Func: Fn(GuardedData, $($param,)*) -> Res {} + where Func: Fn(GuardedData, $($param,)*) -> Res {} }; } diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml new file mode 100644 index 000000000..7ef4ecad7 --- /dev/null +++ b/meilisearch-lib/Cargo.toml @@ -0,0 +1,72 @@ +[package] +name = "meilisearch-lib" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" } +actix-web = { version = "4.0.0-beta.9", features = ["rustls"] } +actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true } +anyhow = { version = "1.0.43", features = ["backtrace"] } +async-stream = "0.3.2" +async-trait = "0.1.51" +arc-swap = "1.3.2" +byte-unit = { version = "4.0.12", default-features = false, features = ["std"] } +bytes = "1.1.0" +chrono = { version = "0.4.19", features = ["serde"] } +crossbeam-channel = "0.5.1" +either = "1.6.1" +env_logger = "0.9.0" +flate2 = "1.0.21" +fst = "0.4.7" +futures = "0.3.17" +futures-util = "0.3.17" +heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } +http = "0.2.4" +indexmap = { version = "1.7.0", features = ["serde-1"] } +itertools = "0.10.1" +log = "0.4.14" +main_error = "0.1.1" +meilisearch-error = { path = "../meilisearch-error" } +meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } +memmap = "0.7.0" +milli = { git = "https://github.com/meilisearch/milli.git", rev = "6de1b41" } +mime = "0.3.16" +num_cpus = "1.13.0" +once_cell = "1.8.0" +parking_lot = "0.11.2" +rand = "0.8.4" +rayon = "1.5.1" +regex = "1.5.4" +rustls = "0.19.1" +serde = { version = "1.0.130", features = ["derive"] } +serde_json = { version = "1.0.67", features = ["preserve_order"] } +sha2 = "0.9.6" +siphasher = "0.3.7" +slice-group-by = "0.2.6" +structopt = "0.3.23" +tar = "0.4.37" +tempfile = "3.2.0" +thiserror = "1.0.28" +tokio = { version = "1.11.0", features = ["full"] } +uuid = { version = "0.8.2", features = ["serde"] } +walkdir = "2.3.2" +obkv = "0.2.0" +pin-project = "1.0.8" +whoami = { version = "1.1.3", optional = true } +reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true } +serdeval = "0.1.0" +sysinfo = "0.20.2" +tokio-stream = "0.1.7" +erased-serde = "0.3.16" + +[dev-dependencies] +actix-rt = "2.2.0" +assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" } +mockall = "0.10.2" +paste = "1.0.5" +serde_url_params = "0.2.1" +tempdir = "0.3.7" +urlencoding = "2.1.0" diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs new file mode 100644 index 000000000..80141dae5 --- /dev/null +++ b/meilisearch-lib/src/error.rs @@ -0,0 +1,62 @@ +use std::error::Error; +use std::fmt; + +use meilisearch_error::{Code, ErrorCode}; +use milli::UserError; + +macro_rules! internal_error { + ($target:ty : $($other:path), *) => { + $( + impl From<$other> for $target { + fn from(other: $other) -> Self { + Self::Internal(Box::new(other)) + } + } + )* + } +} + +#[derive(Debug)] +pub struct MilliError<'a>(pub &'a milli::Error); + +impl Error for MilliError<'_> {} + +impl fmt::Display for MilliError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl ErrorCode for MilliError<'_> { + fn error_code(&self) -> Code { + match self.0 { + milli::Error::InternalError(_) => Code::Internal, + milli::Error::IoError(_) => Code::Internal, + milli::Error::UserError(ref error) => { + match error { + // TODO: wait for spec for new error codes. + | UserError::SerdeJson(_) + | UserError::MaxDatabaseSizeReached + | UserError::InvalidCriterionName { .. } + | UserError::InvalidDocumentId { .. } + | UserError::InvalidStoreFile + | UserError::NoSpaceLeftOnDevice + | UserError::InvalidAscDescSyntax { .. } + | UserError::DocumentLimitReached => Code::Internal, + UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, + UserError::InvalidFilter(_) => Code::Filter, + UserError::InvalidFilterAttribute(_) => Code::Filter, + UserError::InvalidSortName { .. } => Code::Sort, + UserError::MissingDocumentId { .. } => Code::MissingDocumentId, + UserError::MissingPrimaryKey => Code::MissingPrimaryKey, + UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, + UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent, + UserError::SortRankingRuleMissing => Code::Sort, + UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, + UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, + UserError::InvalidSortableAttribute { .. } => Code::Sort, + } + } + } + } +} diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs similarity index 99% rename from meilisearch-http/src/index/dump.rs rename to meilisearch-lib/src/index/dump.rs index 9c8acf960..018ae6d2f 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -6,7 +6,7 @@ use heed::RoTxn; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; use super::error::Result; use super::{Index, Settings, Unchecked}; diff --git a/meilisearch-http/src/index/error.rs b/meilisearch-lib/src/index/error.rs similarity index 100% rename from meilisearch-http/src/index/error.rs rename to meilisearch-lib/src/index/error.rs diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs similarity index 99% rename from meilisearch-http/src/index/mod.rs rename to meilisearch-lib/src/index/mod.rs index 1ea481ec9..c05e337e2 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -14,7 +14,7 @@ use error::Result; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Checked, Facets, Settings, Unchecked}; -use crate::helpers::EnvSizer; +use crate::EnvSizer; use crate::index_controller::update_file_store::UpdateFileStore; use self::error::IndexError; diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-lib/src/index/search.rs similarity index 100% rename from meilisearch-http/src/index/search.rs rename to meilisearch-lib/src/index/search.rs diff --git a/meilisearch-http/src/index/update_handler.rs b/meilisearch-lib/src/index/update_handler.rs similarity index 96% rename from meilisearch-http/src/index/update_handler.rs rename to meilisearch-lib/src/index/update_handler.rs index 0ad71d313..8fba55341 100644 --- a/meilisearch-http/src/index/update_handler.rs +++ b/meilisearch-lib/src/index/update_handler.rs @@ -5,7 +5,7 @@ use rayon::ThreadPool; use crate::index_controller::update_actor::RegisterUpdate; use crate::index_controller::{Failed, Processed, Processing}; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; pub struct UpdateHandler { max_nb_chunks: Option, @@ -66,7 +66,7 @@ impl UpdateHandler { match result { Ok(result) => Ok(meta.process(result)), - Err(e) => Err(meta.fail(e.into())), + Err(e) => Err(meta.fail(e)), } } } diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs similarity index 100% rename from meilisearch-http/src/index/updates.rs rename to meilisearch-lib/src/index/updates.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/index_controller/dump_actor/actor.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/actor.rs rename to meilisearch-lib/src/index_controller/dump_actor/actor.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/error.rs b/meilisearch-lib/src/index_controller/dump_actor/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/error.rs rename to meilisearch-lib/src/index_controller/dump_actor/error.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/handle_impl.rs rename to meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs rename to meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs similarity index 99% rename from meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs rename to meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 21893eb49..584828b4e 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -11,7 +11,7 @@ use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata} use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::{ index::Unchecked, - option::IndexerOpts, + options::IndexerOpts, }; #[derive(Serialize, Deserialize, Debug)] diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs similarity index 97% rename from meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs rename to meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs index eddd8a3b7..c39da1e44 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use crate::index::Index; use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] diff --git a/meilisearch-http/src/index_controller/dump_actor/message.rs b/meilisearch-lib/src/index_controller/dump_actor/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump_actor/message.rs rename to meilisearch-lib/src/index_controller/dump_actor/message.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs similarity index 96% rename from meilisearch-http/src/index_controller/dump_actor/mod.rs rename to meilisearch-lib/src/index_controller/dump_actor/mod.rs index c8aad6815..e0f9535f3 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -18,7 +18,7 @@ pub use message::DumpMsg; use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; use crate::index_controller::dump_actor::error::DumpActorError; -use crate::{helpers::compression, option::IndexerOpts}; +use crate::options::IndexerOpts; use error::Result; mod actor; @@ -112,7 +112,7 @@ pub fn load_dump( let tmp_src = tempfile::tempdir_in(".")?; let tmp_src_path = tmp_src.path(); - compression::from_tar_gz(&src_path, tmp_src_path)?; + crate::from_tar_gz(&src_path, tmp_src_path)?; let meta_path = tmp_src_path.join(META_FILE_NAME); let mut meta_file = File::open(&meta_path)?; @@ -162,6 +162,7 @@ impl DumpTask where U: UuidResolverHandle + Send + Sync + Clone + 'static, P: UpdateActorHandle + Send + Sync + Clone + 'static, + { async fn run(self) -> Result<()> { trace!("Performing dump."); @@ -186,7 +187,7 @@ where let dump_path = tokio::task::spawn_blocking(move || -> Result { let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; - compression::to_tar_gz(temp_dump_path, temp_dump_file.path()) + crate::to_tar_gz(temp_dump_path, temp_dump_file.path()) .map_err(|e| DumpActorError::Internal(e.into()))?; let dump_path = self.path.join(self.uid).with_extension("dump"); diff --git a/meilisearch-http/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/error.rs rename to meilisearch-lib/src/index_controller/error.rs diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-lib/src/index_controller/index_actor/actor.rs similarity index 99% rename from meilisearch-http/src/index_controller/index_actor/actor.rs rename to meilisearch-lib/src/index_controller/index_actor/actor.rs index abc08788e..cee656b97 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/index_actor/actor.rs @@ -16,7 +16,7 @@ use crate::index::{ use crate::index_controller::{ get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing, }; -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; use super::error::{IndexActorError, Result}; use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore}; @@ -29,7 +29,9 @@ pub struct IndexActor { store: S, } -impl IndexActor { +impl IndexActor +where S: IndexStore + Sync + Send, +{ pub fn new( receiver: mpsc::Receiver, store: S, diff --git a/meilisearch-http/src/index_controller/index_actor/error.rs b/meilisearch-lib/src/index_controller/index_actor/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/error.rs rename to meilisearch-lib/src/index_controller/index_actor/error.rs diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs similarity index 99% rename from meilisearch-http/src/index_controller/index_actor/handle_impl.rs rename to meilisearch-lib/src/index_controller/index_actor/handle_impl.rs index efc104c54..8cc66edee 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs @@ -1,4 +1,4 @@ -use crate::option::IndexerOpts; +use crate::options::IndexerOpts; use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-lib/src/index_controller/index_actor/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/message.rs rename to meilisearch-lib/src/index_controller/index_actor/message.rs diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-lib/src/index_controller/index_actor/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/mod.rs rename to meilisearch-lib/src/index_controller/index_actor/mod.rs diff --git a/meilisearch-http/src/index_controller/index_actor/store.rs b/meilisearch-lib/src/index_controller/index_actor/store.rs similarity index 100% rename from meilisearch-http/src/index_controller/index_actor/store.rs rename to meilisearch-lib/src/index_controller/index_actor/store.rs diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs similarity index 82% rename from meilisearch-http/src/index_controller/mod.rs rename to meilisearch-lib/src/index_controller/mod.rs index 88a219530..73df4eee6 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -22,8 +22,8 @@ use update_actor::UpdateActorHandle; pub use updates::*; use uuid_resolver::{error::UuidResolverError, UuidResolverHandle}; +use crate::options::IndexerOpts; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; -use crate::option::Opt; use error::Result; use self::dump_actor::load_dump; @@ -99,45 +99,58 @@ pub enum Update { } } -impl IndexController { - pub fn new(path: impl AsRef, options: &Opt) -> anyhow::Result { - let index_size = options.max_index_size.get_bytes() as usize; - let update_store_size = options.max_index_size.get_bytes() as usize; +#[derive(Default, Debug)] +pub struct IndexControllerBuilder { + max_index_size: Option, + max_update_store_size: Option, + snapshot_dir: Option, + import_snapshot: Option, + ignore_snapshot_if_db_exists: bool, + ignore_missing_snapshot: bool, + dump_src: Option, + dump_dst: Option, +} - if let Some(ref path) = options.import_snapshot { +impl IndexControllerBuilder { + pub fn build(self, db_path: impl AsRef, indexer_options: IndexerOpts) -> anyhow::Result { + let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?; + let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; + + if let Some(ref path) = self.import_snapshot { info!("Loading from snapshot {:?}", path); load_snapshot( - &options.db_path, + db_path.as_ref(), path, - options.ignore_snapshot_if_db_exists, - options.ignore_missing_snapshot, + self.ignore_snapshot_if_db_exists, + self.ignore_missing_snapshot, )?; - } else if let Some(ref src_path) = options.import_dump { + } else if let Some(ref src_path) = self.dump_src { load_dump( - &options.db_path, + db_path.as_ref(), src_path, - options.max_index_size.get_bytes() as usize, - options.max_udb_size.get_bytes() as usize, - &options.indexer_options, + index_size, + update_store_size, + &indexer_options, )?; } - std::fs::create_dir_all(&path)?; + std::fs::create_dir_all(db_path.as_ref())?; - let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?; + let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?; let index_handle = - index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?; + index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?; let update_handle = update_actor::UpdateActorHandleImpl::new( index_handle.clone(), - &path, + &db_path, update_store_size, )?; + let dump_handle = dump_actor::DumpActorHandleImpl::new( - &options.dumps_dir, + &self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?, uuid_resolver.clone(), update_handle.clone(), - options.max_index_size.get_bytes() as usize, - options.max_udb_size.get_bytes() as usize, + index_size, + update_store_size, )?; //if options.schedule_snapshot { @@ -156,7 +169,7 @@ impl IndexController { //tokio::task::spawn(snapshot_service.run()); //} - Ok(Self { + Ok(IndexController { uuid_resolver, index_handle, update_handle, @@ -164,6 +177,59 @@ impl IndexController { }) } + /// Set the index controller builder's max update store size. + pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self { + self.max_update_store_size.replace(max_update_store_size); + self + } + + pub fn set_max_index_size(&mut self, size: usize) -> &mut Self { + self.max_index_size.replace(size); + self + } + + /// Set the index controller builder's snapshot path. + pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self { + self.snapshot_dir.replace(snapshot_dir); + self + } + + /// Set the index controller builder's ignore snapshot if db exists. + pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self { + self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists; + self + } + + /// Set the index controller builder's ignore missing snapshot. + pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self { + self.ignore_missing_snapshot = ignore_missing_snapshot; + self + } + + /// Set the index controller builder's dump src. + pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { + self.dump_src.replace(dump_src); + self + } + + /// Set the index controller builder's dump dst. + pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { + self.dump_dst.replace(dump_dst); + self + } + + /// Set the index controller builder's import snapshot. + pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { + self.import_snapshot.replace(import_snapshot); + self + } +} + +impl IndexController { + pub fn builder() -> IndexControllerBuilder { + IndexControllerBuilder::default() + } + pub async fn register_update(&self, uid: &str, update: Update) -> Result { match self.uuid_resolver.get(uid.to_string()).await { Ok(uuid) => { diff --git a/meilisearch-http/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs similarity index 98% rename from meilisearch-http/src/index_controller/snapshot.rs rename to meilisearch-lib/src/index_controller/snapshot.rs index 6c5171d62..c2f600bbc 100644 --- a/meilisearch-http/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -2,8 +2,6 @@ use std::path::Path; use anyhow::bail; -use crate::helpers::compression; - //pub struct SnapshotService { //uuid_resolver_handle: R, //update_handle: U, @@ -93,7 +91,7 @@ pub fn load_snapshot( ignore_missing_snapshot: bool, ) -> anyhow::Result<()> { if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { - match compression::from_tar_gz(snapshot_path, &db_path) { + match crate::from_tar_gz(snapshot_path, &db_path) { Ok(()) => Ok(()), Err(e) => { // clean created db folder diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-lib/src/index_controller/update_actor/actor.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/actor.rs rename to meilisearch-lib/src/index_controller/update_actor/actor.rs diff --git a/meilisearch-http/src/index_controller/update_actor/error.rs b/meilisearch-lib/src/index_controller/update_actor/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/error.rs rename to meilisearch-lib/src/index_controller/update_actor/error.rs diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/update_actor/handle_impl.rs similarity index 99% rename from meilisearch-http/src/index_controller/update_actor/handle_impl.rs rename to meilisearch-lib/src/index_controller/update_actor/handle_impl.rs index 5175f2eb5..e1df0b5d4 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/update_actor/handle_impl.rs @@ -41,6 +41,7 @@ impl UpdateActorHandle for UpdateActorHandleImpl { self.sender.send(msg).await?; receiver.await? } + async fn update_status(&self, uuid: Uuid, id: u64) -> Result { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::GetUpdate { uuid, id, ret }; diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-lib/src/index_controller/update_actor/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/message.rs rename to meilisearch-lib/src/index_controller/update_actor/message.rs diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-lib/src/index_controller/update_actor/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/mod.rs rename to meilisearch-lib/src/index_controller/update_actor/mod.rs diff --git a/meilisearch-http/src/index_controller/update_actor/store/codec.rs b/meilisearch-lib/src/index_controller/update_actor/store/codec.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/store/codec.rs rename to meilisearch-lib/src/index_controller/update_actor/store/codec.rs diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-lib/src/index_controller/update_actor/store/dump.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_actor/store/dump.rs rename to meilisearch-lib/src/index_controller/update_actor/store/dump.rs diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-lib/src/index_controller/update_actor/store/mod.rs similarity index 99% rename from meilisearch-http/src/index_controller/update_actor/store/mod.rs rename to meilisearch-lib/src/index_controller/update_actor/store/mod.rs index 2dd758b82..62fcbd5ad 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-lib/src/index_controller/update_actor/store/mod.rs @@ -28,7 +28,7 @@ use codec::*; use super::RegisterUpdate; use super::error::Result; -use crate::helpers::EnvSizer; +use crate::EnvSizer; use crate::index_controller::update_files_path; use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; @@ -323,7 +323,7 @@ impl UpdateStore { let result = match handle.block_on(index_handle.update(index_uuid, processing.clone())) { Ok(result) => result, - Err(e) => Err(processing.fail(e.into())), + Err(e) => Err(processing.fail(e)), }; // Once the pending update have been successfully processed diff --git a/meilisearch-http/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs similarity index 100% rename from meilisearch-http/src/index_controller/update_file_store.rs rename to meilisearch-lib/src/index_controller/update_file_store.rs diff --git a/meilisearch-http/src/index_controller/updates.rs b/meilisearch-lib/src/index_controller/updates.rs similarity index 89% rename from meilisearch-http/src/index_controller/updates.rs rename to meilisearch-lib/src/index_controller/updates.rs index 7065b0462..efe48e5e5 100644 --- a/meilisearch-http/src/index_controller/updates.rs +++ b/meilisearch-lib/src/index_controller/updates.rs @@ -1,11 +1,12 @@ +use std::{error::Error, fmt::Display}; + use chrono::{DateTime, Utc}; + +use meilisearch_error::{Code, ErrorCode}; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; -use crate::{ - error::ResponseError, - index::{Settings, Unchecked}, -}; +use crate::index::{Settings, Unchecked}; use super::update_actor::RegisterUpdate; @@ -115,10 +116,13 @@ impl Processing { } } - pub fn fail(self, error: ResponseError) -> Failed { + pub fn fail(self, error: impl ErrorCode) -> Failed { + let msg = error.to_string(); + let code = error.error_code(); Failed { from: self, - error, + msg, + code, failed_at: Utc::now(), } } @@ -147,10 +151,25 @@ impl Aborted { pub struct Failed { #[serde(flatten)] pub from: Processing, - pub error: ResponseError, + pub msg: String, + pub code: Code, pub failed_at: DateTime, } +impl Display for Failed { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.msg.fmt(f) + } +} + +impl Error for Failed { } + +impl ErrorCode for Failed { + fn error_code(&self) -> Code { + self.code + } +} + impl Failed { pub fn id(&self) -> u64 { self.from.id() diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-lib/src/index_controller/uuid_resolver/actor.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/actor.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/actor.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/error.rs b/meilisearch-lib/src/index_controller/uuid_resolver/error.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/error.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/error.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-lib/src/index_controller/uuid_resolver/message.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/message.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/message.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs similarity index 100% rename from meilisearch-http/src/index_controller/uuid_resolver/mod.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/mod.rs diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-lib/src/index_controller/uuid_resolver/store.rs similarity index 99% rename from meilisearch-http/src/index_controller/uuid_resolver/store.rs rename to meilisearch-lib/src/index_controller/uuid_resolver/store.rs index f02d22d7f..5457ab91d 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-lib/src/index_controller/uuid_resolver/store.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::{error::UuidResolverError, Result, UUID_STORE_SIZE}; -use crate::helpers::EnvSizer; +use crate::EnvSizer; #[derive(Serialize, Deserialize)] struct DumpEntry { diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs new file mode 100644 index 000000000..9f6be4361 --- /dev/null +++ b/meilisearch-lib/src/lib.rs @@ -0,0 +1,53 @@ +#[macro_use] +pub mod error; +pub mod options; + +pub mod index; +pub mod index_controller; + +pub use index_controller::{UpdateResult, UpdateStatus, IndexController as MeiliSearch, update_actor::RegisterUpdate}; + +use walkdir::WalkDir; + +pub trait EnvSizer { + fn size(&self) -> u64; +} + +impl EnvSizer for heed::Env { + fn size(&self) -> u64 { + WalkDir::new(self.path()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len()) + } +} + +use std::fs::{create_dir_all, File}; +use std::io::Write; +use std::path::Path; + +use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use tar::{Archive, Builder}; + +pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { + let mut f = File::create(dest)?; + let gz_encoder = GzEncoder::new(&mut f, Compression::default()); + let mut tar_encoder = Builder::new(gz_encoder); + tar_encoder.append_dir_all(".", src)?; + let gz_encoder = tar_encoder.into_inner()?; + gz_encoder.finish()?; + f.flush()?; + Ok(()) +} + +pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { + let f = File::open(&src)?; + let gz = GzDecoder::new(f); + let mut ar = Archive::new(gz); + create_dir_all(&dest)?; + ar.unpack(&dest)?; + Ok(()) +} + diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs new file mode 100644 index 000000000..f4b992f2e --- /dev/null +++ b/meilisearch-lib/src/options.rs @@ -0,0 +1,115 @@ +use core::fmt; +use std::{ops::Deref, str::FromStr}; + +use byte_unit::{Byte, ByteError}; +use milli::CompressionType; +use structopt::StructOpt; +use sysinfo::{RefreshKind, System, SystemExt}; + +#[derive(Debug, Clone, StructOpt)] +pub struct IndexerOpts { + /// The amount of documents to skip before printing + /// a log regarding the indexing advancement. + #[structopt(long, default_value = "100000")] // 100k + pub log_every_n: usize, + + /// Grenad max number of chunks in bytes. + #[structopt(long)] + pub max_nb_chunks: Option, + + /// The maximum amount of memory the indexer will use. It defaults to 2/3 + /// of the available memory. It is recommended to use something like 80%-90% + /// of the available memory, no more. + /// + /// In case the engine is unable to retrieve the available memory the engine will + /// try to use the memory it needs but without real limit, this can lead to + /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. + #[structopt(long, default_value)] + pub max_memory: MaxMemory, + + /// The name of the compression algorithm to use when compressing intermediate + /// Grenad chunks while indexing documents. + /// + /// Choosing a fast algorithm will make the indexing faster but may consume more memory. + #[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] + pub chunk_compression_type: CompressionType, + + /// The level of compression of the chosen algorithm. + #[structopt(long, requires = "chunk-compression-type")] + pub chunk_compression_level: Option, + + /// Number of parallel jobs for indexing, defaults to # of CPUs. + #[structopt(long)] + pub indexing_jobs: Option, +} + +impl Default for IndexerOpts { + fn default() -> Self { + Self { + log_every_n: 100_000, + max_nb_chunks: None, + max_memory: MaxMemory::default(), + chunk_compression_type: CompressionType::None, + chunk_compression_level: None, + indexing_jobs: None, + } + } +} + +/// A type used to detect the max memory available and use 2/3 of it. +#[derive(Debug, Clone, Copy)] +pub struct MaxMemory(Option); + +impl FromStr for MaxMemory { + type Err = ByteError; + + fn from_str(s: &str) -> Result { + Byte::from_str(s).map(Some).map(MaxMemory) + } +} + +impl Default for MaxMemory { + fn default() -> MaxMemory { + MaxMemory( + total_memory_bytes() + .map(|bytes| bytes * 2 / 3) + .map(Byte::from_bytes), + ) + } +} + +impl fmt::Display for MaxMemory { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), + None => f.write_str("unknown"), + } + } +} + +impl Deref for MaxMemory { + type Target = Option; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl MaxMemory { + pub fn unlimited() -> Self { + Self(None) + } +} + +/// Returns the total amount of bytes available or `None` if this system isn't supported. +fn total_memory_bytes() -> Option { + if System::IS_SUPPORTED { + let memory_kind = RefreshKind::new().with_memory(); + let mut system = System::new_with_specifics(memory_kind); + system.refresh_memory(); + Some(system.total_memory() * 1024) // KiB into bytes + } else { + None + } +} + From def737edeeef8c37056a0644f8b3cb5e0e598396 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 22 Sep 2021 10:49:59 +0200 Subject: [PATCH 04/37] refactor uuid resolver --- .../src/index_controller/dump_actor/actor.rs | 13 +- .../dump_actor/handle_impl.rs | 4 +- .../index_controller/dump_actor/loaders/v1.rs | 3 +- .../index_controller/dump_actor/loaders/v2.rs | 2 +- .../src/index_controller/dump_actor/mod.rs | 13 +- meilisearch-lib/src/index_controller/error.rs | 5 + meilisearch-lib/src/index_controller/mod.rs | 38 +++--- .../index_controller/uuid_resolver/actor.rs | 98 ------------- .../index_controller/uuid_resolver/error.rs | 16 +++ .../uuid_resolver/handle_impl.rs | 87 ------------ .../index_controller/uuid_resolver/message.rs | 58 +++++++- .../src/index_controller/uuid_resolver/mod.rs | 129 ++++++++++++++---- .../index_controller/uuid_resolver/store.rs | 3 +- 13 files changed, 223 insertions(+), 246 deletions(-) delete mode 100644 meilisearch-lib/src/index_controller/uuid_resolver/actor.rs delete mode 100644 meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs diff --git a/meilisearch-lib/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/index_controller/dump_actor/actor.rs index eee733c4a..f82101bc1 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/actor.rs @@ -8,17 +8,17 @@ use futures::{lock::Mutex, stream::StreamExt}; use log::{error, trace}; use tokio::sync::{mpsc, oneshot, RwLock}; use update_actor::UpdateActorHandle; -use uuid_resolver::UuidResolverHandle; use super::error::{DumpActorError, Result}; use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask}; -use crate::index_controller::{update_actor, uuid_resolver}; +use crate::index_controller::uuid_resolver::UuidResolverSender; +use crate::index_controller::update_actor; pub const CONCURRENT_DUMP_MSG: usize = 10; -pub struct DumpActor { +pub struct DumpActor { inbox: Option>, - uuid_resolver: UuidResolver, + uuid_resolver: UuidResolverSender, update: Update, dump_path: PathBuf, lock: Arc>, @@ -32,14 +32,13 @@ fn generate_uid() -> String { Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() } -impl DumpActor +impl DumpActor where - UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static, Update: UpdateActorHandle + Send + Sync + Clone + 'static, { pub fn new( inbox: mpsc::Receiver, - uuid_resolver: UuidResolver, + uuid_resolver: UuidResolverSender, update: Update, dump_path: impl AsRef, index_db_size: usize, diff --git a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs index 649d82405..544cb89c6 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs @@ -2,6 +2,8 @@ use std::path::Path; use tokio::sync::{mpsc, oneshot}; +use crate::index_controller::uuid_resolver::UuidResolverSender; + use super::error::Result; use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg}; @@ -30,7 +32,7 @@ impl DumpActorHandle for DumpActorHandleImpl { impl DumpActorHandleImpl { pub fn new( path: impl AsRef, - uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl, + uuid_resolver: UuidResolverSender, update: crate::index_controller::update_actor::UpdateActorHandleImpl, index_db_size: usize, update_db_size: usize, diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 584828b4e..b489b2107 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -7,7 +7,8 @@ use milli::update::Setting; use serde::{Deserialize, Deserializer, Serialize}; use uuid::Uuid; -use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}; +use crate::index_controller::uuid_resolver::store::HeedUuidStore; +use crate::index_controller::{self, IndexMetadata}; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::{ index::Unchecked, diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs index c39da1e44..7b7a8236c 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs @@ -5,7 +5,7 @@ use log::info; use serde::{Deserialize, Serialize}; use crate::index::Index; -use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}; +use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::store::HeedUuidStore}; use crate::options::IndexerOpts; #[derive(Serialize, Deserialize, Debug)] diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index e0f9535f3..445966a56 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -16,8 +16,10 @@ pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; -use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; +use super::update_actor::UpdateActorHandle; +use super::uuid_resolver::UuidResolverSender; use crate::index_controller::dump_actor::error::DumpActorError; +use crate::index_controller::uuid_resolver::UuidResolverMsg; use crate::options::IndexerOpts; use error::Result; @@ -149,18 +151,17 @@ pub fn load_dump( Ok(()) } -struct DumpTask { +struct DumpTask

{ path: PathBuf, - uuid_resolver: U, + uuid_resolver: UuidResolverSender, update_handle: P, uid: String, update_db_size: usize, index_db_size: usize, } -impl DumpTask +impl

DumpTask

where - U: UuidResolverHandle + Send + Sync + Clone + 'static, P: UpdateActorHandle + Send + Sync + Clone + 'static, { @@ -179,7 +180,7 @@ where let mut meta_file = File::create(&meta_path)?; serde_json::to_writer(&mut meta_file, &meta)?; - let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?; + let uuids = UuidResolverMsg::dump(&self.uuid_resolver, temp_dump_path.clone()).await?; self.update_handle .dump(uuids, temp_dump_path.clone()) diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs index 00f6b8656..d3be7d7b7 100644 --- a/meilisearch-lib/src/index_controller/error.rs +++ b/meilisearch-lib/src/index_controller/error.rs @@ -1,3 +1,5 @@ +use std::error::Error; + use meilisearch_error::Code; use meilisearch_error::ErrorCode; @@ -24,6 +26,8 @@ pub enum IndexControllerError { DumpActor(#[from] DumpActorError), #[error("{0}")] IndexError(#[from] IndexError), + #[error("Internal error: {0}")] + Internal(Box), } impl ErrorCode for IndexControllerError { @@ -35,6 +39,7 @@ impl ErrorCode for IndexControllerError { IndexControllerError::UpdateActor(e) => e.error_code(), IndexControllerError::DumpActor(e) => e.error_code(), IndexControllerError::IndexError(e) => e.error_code(), + IndexControllerError::Internal(_) => Code::Internal, } } } diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 73df4eee6..f22fec33f 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -20,13 +20,14 @@ use index_actor::IndexActorHandle; use snapshot::load_snapshot; use update_actor::UpdateActorHandle; pub use updates::*; -use uuid_resolver::{error::UuidResolverError, UuidResolverHandle}; +use uuid_resolver::error::UuidResolverError; use crate::options::IndexerOpts; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use error::Result; use self::dump_actor::load_dump; +use self::uuid_resolver::UuidResolverMsg; mod dump_actor; pub mod error; @@ -71,7 +72,7 @@ pub struct IndexStats { #[derive(Clone)] pub struct IndexController { - uuid_resolver: uuid_resolver::UuidResolverHandleImpl, + uuid_resolver: uuid_resolver::UuidResolverSender, index_handle: index_actor::IndexActorHandleImpl, update_handle: update_actor::UpdateActorHandleImpl, dump_handle: dump_actor::DumpActorHandleImpl, @@ -136,7 +137,7 @@ impl IndexControllerBuilder { std::fs::create_dir_all(db_path.as_ref())?; - let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?; + let uuid_resolver = uuid_resolver::create_uuid_resolver(&db_path)?; let index_handle = index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?; let update_handle = update_actor::UpdateActorHandleImpl::new( @@ -231,7 +232,8 @@ impl IndexController { } pub async fn register_update(&self, uid: &str, update: Update) -> Result { - match self.uuid_resolver.get(uid.to_string()).await { + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.to_string()).await; + match uuid { Ok(uuid) => { let update_result = self.update_handle.update(uuid, update).await?; Ok(update_result) @@ -241,7 +243,8 @@ impl IndexController { let update_result = self.update_handle.update(uuid, update).await?; // ignore if index creation fails now, since it may already have been created let _ = self.index_handle.create_index(uuid, None).await; - self.uuid_resolver.insert(name, uuid).await?; + UuidResolverMsg::insert(&self.uuid_resolver, uuid, name).await?; + Ok(update_result) } Err(e) => Err(e.into()), @@ -374,22 +377,20 @@ impl IndexController { //} pub async fn update_status(&self, uid: String, id: u64) -> Result { - let uuid = self.uuid_resolver.get(uid).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let result = self.update_handle.update_status(uuid, id).await?; Ok(result) } pub async fn all_update_status(&self, uid: String) -> Result> { - let uuid = self.uuid_resolver.get(uid).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let result = self.update_handle.get_all_updates_status(uuid).await?; Ok(result) } pub async fn list_indexes(&self) -> Result> { - let uuids = self.uuid_resolver.list().await?; - + let uuids = UuidResolverMsg::list(&self.uuid_resolver).await?; let mut ret = Vec::new(); - for (uid, uuid) in uuids { let meta = self.index_handle.get_index_meta(uuid).await?; let meta = IndexMetadata { @@ -405,7 +406,7 @@ impl IndexController { } pub async fn settings(&self, uid: String) -> Result> { - let uuid = self.uuid_resolver.get(uid.clone()).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let settings = self.index_handle.settings(uuid).await?; Ok(settings) } @@ -417,7 +418,7 @@ impl IndexController { limit: usize, attributes_to_retrieve: Option>, ) -> Result> { - let uuid = self.uuid_resolver.get(uid.clone()).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let documents = self .index_handle .documents(uuid, offset, limit, attributes_to_retrieve) @@ -431,7 +432,7 @@ impl IndexController { doc_id: String, attributes_to_retrieve: Option>, ) -> Result { - let uuid = self.uuid_resolver.get(uid.clone()).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let document = self .index_handle .document(uuid, doc_id, attributes_to_retrieve) @@ -448,7 +449,7 @@ impl IndexController { index_settings.uid.take(); } - let uuid = self.uuid_resolver.get(uid.clone()).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?; let meta = self.index_handle.update_index(uuid, index_settings).await?; let meta = IndexMetadata { uuid, @@ -460,13 +461,13 @@ impl IndexController { } pub async fn search(&self, uid: String, query: SearchQuery) -> Result { - let uuid = self.uuid_resolver.get(uid).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let result = self.index_handle.search(uuid, query).await?; Ok(result) } pub async fn get_index(&self, uid: String) -> Result { - let uuid = self.uuid_resolver.get(uid.clone()).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?; let meta = self.index_handle.get_index_meta(uuid).await?; let meta = IndexMetadata { uuid, @@ -478,11 +479,12 @@ impl IndexController { } pub async fn get_uuids_size(&self) -> Result { - Ok(self.uuid_resolver.get_size().await?) + let size = UuidResolverMsg::get_size(&self.uuid_resolver).await?; + Ok(size) } pub async fn get_index_stats(&self, uid: String) -> Result { - let uuid = self.uuid_resolver.get(uid).await?; + let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let update_infos = self.update_handle.get_info().await?; let mut stats = self.index_handle.get_index_stats(uuid).await?; // Check if the currently indexing update is from out index. diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/actor.rs b/meilisearch-lib/src/index_controller/uuid_resolver/actor.rs deleted file mode 100644 index d221bd4f2..000000000 --- a/meilisearch-lib/src/index_controller/uuid_resolver/actor.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::{collections::HashSet, path::PathBuf}; - -use log::{trace, warn}; -use tokio::sync::mpsc; -use uuid::Uuid; - -use super::{error::UuidResolverError, Result, UuidResolveMsg, UuidStore}; - -pub struct UuidResolverActor { - inbox: mpsc::Receiver, - store: S, -} - -impl UuidResolverActor { - pub fn new(inbox: mpsc::Receiver, store: S) -> Self { - Self { inbox, store } - } - - pub async fn run(mut self) { - use UuidResolveMsg::*; - - trace!("uuid resolver started"); - - loop { - match self.inbox.recv().await { - Some(Get { uid: name, ret }) => { - let _ = ret.send(self.handle_get(name).await); - } - Some(Delete { uid: name, ret }) => { - let _ = ret.send(self.handle_delete(name).await); - } - Some(List { ret }) => { - let _ = ret.send(self.handle_list().await); - } - Some(Insert { ret, uuid, name }) => { - let _ = ret.send(self.handle_insert(name, uuid).await); - } - Some(SnapshotRequest { path, ret }) => { - let _ = ret.send(self.handle_snapshot(path).await); - } - Some(GetSize { ret }) => { - let _ = ret.send(self.handle_get_size().await); - } - Some(DumpRequest { path, ret }) => { - let _ = ret.send(self.handle_dump(path).await); - } - // all senders have been dropped, need to quit. - None => break, - } - } - - warn!("exiting uuid resolver loop"); - } - - async fn handle_get(&self, uid: String) -> Result { - self.store - .get_uuid(uid.clone()) - .await? - .ok_or(UuidResolverError::UnexistingIndex(uid)) - } - - async fn handle_delete(&self, uid: String) -> Result { - self.store - .delete(uid.clone()) - .await? - .ok_or(UuidResolverError::UnexistingIndex(uid)) - } - - async fn handle_list(&self) -> Result> { - let result = self.store.list().await?; - Ok(result) - } - - async fn handle_snapshot(&self, path: PathBuf) -> Result> { - self.store.snapshot(path).await - } - - async fn handle_dump(&self, path: PathBuf) -> Result> { - self.store.dump(path).await - } - - async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> { - if !is_index_uid_valid(&uid) { - return Err(UuidResolverError::BadlyFormatted(uid)); - } - self.store.insert(uid, uuid).await?; - Ok(()) - } - - async fn handle_get_size(&self) -> Result { - self.store.get_size().await - } -} - -fn is_index_uid_valid(uid: &str) -> bool { - uid.chars() - .all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') -} diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/error.rs b/meilisearch-lib/src/index_controller/uuid_resolver/error.rs index de3dc662e..8f32fa35d 100644 --- a/meilisearch-lib/src/index_controller/uuid_resolver/error.rs +++ b/meilisearch-lib/src/index_controller/uuid_resolver/error.rs @@ -1,4 +1,8 @@ +use std::fmt; + use meilisearch_error::{Code, ErrorCode}; +use tokio::sync::mpsc::error::SendError as MpscSendError; +use tokio::sync::oneshot::error::RecvError as OneshotRecvError; pub type Result = std::result::Result; @@ -22,6 +26,18 @@ internal_error!( serde_json::Error ); +impl From> for UuidResolverError { + fn from(other: MpscSendError) -> Self { + Self::Internal(Box::new(other)) + } +} + +impl From for UuidResolverError { + fn from(other: OneshotRecvError) -> Self { + Self::Internal(Box::new(other)) + } +} + impl ErrorCode for UuidResolverError { fn error_code(&self) -> Code { match self { diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs deleted file mode 100644 index 1296264e0..000000000 --- a/meilisearch-lib/src/index_controller/uuid_resolver/handle_impl.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::collections::HashSet; -use std::path::{Path, PathBuf}; - -use tokio::sync::{mpsc, oneshot}; -use uuid::Uuid; - -use super::{HeedUuidStore, Result, UuidResolveMsg, UuidResolverActor, UuidResolverHandle}; - -#[derive(Clone)] -pub struct UuidResolverHandleImpl { - sender: mpsc::Sender, -} - -impl UuidResolverHandleImpl { - pub fn new(path: impl AsRef) -> Result { - let (sender, reveiver) = mpsc::channel(100); - let store = HeedUuidStore::new(path)?; - let actor = UuidResolverActor::new(reveiver, store); - tokio::spawn(actor.run()); - Ok(Self { sender }) - } -} - -#[async_trait::async_trait] -impl UuidResolverHandle for UuidResolverHandleImpl { - async fn get(&self, name: String) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::Get { uid: name, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } - - async fn delete(&self, name: String) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::Delete { uid: name, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } - - async fn list(&self) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::List { ret }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } - - async fn insert(&self, name: String, uuid: Uuid) -> Result<()> { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::Insert { ret, name, uuid }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } - - async fn snapshot(&self, path: PathBuf) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::SnapshotRequest { path, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } - - async fn get_size(&self) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::GetSize { ret }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } - async fn dump(&self, path: PathBuf) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::DumpRequest { ret, path }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } -} diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/message.rs b/meilisearch-lib/src/index_controller/uuid_resolver/message.rs index 46d9b585f..e9da56d5e 100644 --- a/meilisearch-lib/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-lib/src/index_controller/uuid_resolver/message.rs @@ -1,12 +1,13 @@ use std::collections::HashSet; use std::path::PathBuf; -use tokio::sync::oneshot; +use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; -use super::Result; +use super::error::Result; -pub enum UuidResolveMsg { +#[derive(Debug)] +pub enum UuidResolverMsg { Get { uid: String, ret: oneshot::Sender>, @@ -35,3 +36,54 @@ pub enum UuidResolveMsg { ret: oneshot::Sender>>, }, } + +impl UuidResolverMsg { + pub async fn get(channel: &mpsc::Sender, uid: String) -> Result { + let (ret, recv) = oneshot::channel(); + let msg = Self::Get { uid, ret }; + channel.send(msg).await?; + recv.await? + } + + pub async fn insert(channel: &mpsc::Sender, uuid: Uuid, name: String) -> Result<()> { + let (ret, recv) = oneshot::channel(); + let msg = Self::Insert { name, uuid, ret }; + channel.send(msg).await?; + recv.await? + } + + pub async fn list(channel: &mpsc::Sender) -> Result> { + let (ret, recv) = oneshot::channel(); + let msg = Self::List { ret }; + channel.send(msg).await?; + recv.await? + } + + pub async fn get_size(channel: &mpsc::Sender) -> Result { + let (ret, recv) = oneshot::channel(); + let msg = Self::GetSize { ret }; + channel.send(msg).await?; + recv.await? + } + + pub async fn dump(channel: &mpsc::Sender, path: PathBuf) -> Result> { + let (ret, recv) = oneshot::channel(); + let msg = Self::DumpRequest { ret, path }; + channel.send(msg).await?; + recv.await? + } + + pub async fn snapshot(channel: &mpsc::Sender, path: PathBuf) -> Result> { + let (ret, recv) = oneshot::channel(); + let msg = Self::SnapshotRequest { ret, path }; + channel.send(msg).await?; + recv.await? + } + + pub async fn delete(channel: &mpsc::Sender, uid: String) -> Result { + let (ret, recv) = oneshot::channel(); + let msg = Self::Delete { ret, uid }; + channel.send(msg).await?; + recv.await? + } +} diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs b/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs index da6c1264d..7157c1b41 100644 --- a/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs @@ -1,35 +1,118 @@ -mod actor; pub mod error; -mod handle_impl; mod message; pub mod store; -use std::collections::HashSet; -use std::path::PathBuf; +use std::path::Path; +use std::{collections::HashSet, path::PathBuf}; +use log::{trace, warn}; +use tokio::sync::mpsc; use uuid::Uuid; -use actor::UuidResolverActor; -use error::Result; -use message::UuidResolveMsg; -use store::UuidStore; +pub use self::error::UuidResolverError; +pub use self::message::UuidResolverMsg; +pub use self::store::{HeedUuidStore, UuidStore}; +use self::error::Result; -#[cfg(test)] -use mockall::automock; - -pub use handle_impl::UuidResolverHandleImpl; -pub use store::HeedUuidStore; +pub type UuidResolverSender = mpsc::Sender; const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB -#[async_trait::async_trait] -#[cfg_attr(test, automock)] -pub trait UuidResolverHandle { - async fn get(&self, name: String) -> Result; - async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; - async fn delete(&self, name: String) -> Result; - async fn list(&self) -> Result>; - async fn snapshot(&self, path: PathBuf) -> Result>; - async fn get_size(&self) -> Result; - async fn dump(&self, path: PathBuf) -> Result>; +pub fn create_uuid_resolver(path: impl AsRef) -> Result> { + let (sender, reveiver) = mpsc::channel(100); + let store = HeedUuidStore::new(path)?; + let actor = UuidResolver::new(reveiver, store); + tokio::spawn(actor.run()); + Ok(sender) +} + +pub struct UuidResolver { + inbox: mpsc::Receiver, + store: S, +} + +impl UuidResolver { + pub fn new(inbox: mpsc::Receiver, store: S) -> Self { + Self { inbox, store } + } + + pub async fn run(mut self) { + use UuidResolverMsg::*; + + trace!("uuid resolver started"); + + loop { + match self.inbox.recv().await { + Some(Get { uid: name, ret }) => { + let _ = ret.send(self.handle_get(name).await); + } + Some(Delete { uid: name, ret }) => { + let _ = ret.send(self.handle_delete(name).await); + } + Some(List { ret }) => { + let _ = ret.send(self.handle_list().await); + } + Some(Insert { ret, uuid, name }) => { + let _ = ret.send(self.handle_insert(name, uuid).await); + } + Some(SnapshotRequest { path, ret }) => { + let _ = ret.send(self.handle_snapshot(path).await); + } + Some(GetSize { ret }) => { + let _ = ret.send(self.handle_get_size().await); + } + Some(DumpRequest { path, ret }) => { + let _ = ret.send(self.handle_dump(path).await); + } + // all senders have been dropped, need to quit. + None => break, + } + } + + warn!("exiting uuid resolver loop"); + } + + async fn handle_get(&self, uid: String) -> Result { + self.store + .get_uuid(uid.clone()) + .await? + .ok_or(UuidResolverError::UnexistingIndex(uid)) + } + + async fn handle_delete(&self, uid: String) -> Result { + self.store + .delete(uid.clone()) + .await? + .ok_or(UuidResolverError::UnexistingIndex(uid)) + } + + async fn handle_list(&self) -> Result> { + let result = self.store.list().await?; + Ok(result) + } + + async fn handle_snapshot(&self, path: PathBuf) -> Result> { + self.store.snapshot(path).await + } + + async fn handle_dump(&self, path: PathBuf) -> Result> { + self.store.dump(path).await + } + + async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> { + if !is_index_uid_valid(&uid) { + return Err(UuidResolverError::BadlyFormatted(uid)); + } + self.store.insert(uid, uuid).await?; + Ok(()) + } + + async fn handle_get_size(&self) -> Result { + self.store.get_size().await + } +} + +fn is_index_uid_valid(uid: &str) -> bool { + uid.chars() + .all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') } diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/store.rs b/meilisearch-lib/src/index_controller/uuid_resolver/store.rs index 5457ab91d..34ba8ced5 100644 --- a/meilisearch-lib/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-lib/src/index_controller/uuid_resolver/store.rs @@ -8,7 +8,8 @@ use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::{error::UuidResolverError, Result, UUID_STORE_SIZE}; +use super::UUID_STORE_SIZE; +use super::error::{UuidResolverError, Result}; use crate::EnvSizer; #[derive(Serialize, Deserialize)] From 12542bf9229a1b1298d16e95a33a7c6d6c3fcaa4 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 22 Sep 2021 11:52:29 +0200 Subject: [PATCH 05/37] refactor update actor --- meilisearch-lib/src/index/update_handler.rs | 4 +- meilisearch-lib/src/index/updates.rs | 2 +- .../src/index_controller/dump_actor/actor.rs | 14 +- .../src/index_controller/dump_actor/error.rs | 2 +- .../dump_actor/handle_impl.rs | 2 +- .../index_controller/dump_actor/loaders/v2.rs | 3 +- .../src/index_controller/dump_actor/mod.rs | 17 +-- meilisearch-lib/src/index_controller/error.rs | 2 +- .../src/index_controller/index_actor/actor.rs | 3 +- .../index_actor/handle_impl.rs | 5 +- .../index_controller/index_actor/message.rs | 3 +- .../src/index_controller/index_actor/mod.rs | 3 +- meilisearch-lib/src/index_controller/mod.rs | 27 ++-- .../src/index_controller/snapshot.rs | 2 +- .../update_actor/handle_impl.rs | 94 ------------- .../index_controller/update_actor/message.rs | 42 ------ .../src/index_controller/update_actor/mod.rs | 49 ------- .../{update_actor => updates}/error.rs | 0 .../src/index_controller/updates/message.rs | 112 +++++++++++++++ .../{update_actor/actor.rs => updates/mod.rs} | 131 ++++++++++++------ .../{updates.rs => updates/status.rs} | 4 +- .../{update_actor => updates}/store/codec.rs | 0 .../{update_actor => updates}/store/dump.rs | 12 +- .../{update_actor => updates}/store/mod.rs | 15 +- meilisearch-lib/src/lib.rs | 2 +- 25 files changed, 253 insertions(+), 297 deletions(-) delete mode 100644 meilisearch-lib/src/index_controller/update_actor/handle_impl.rs delete mode 100644 meilisearch-lib/src/index_controller/update_actor/message.rs delete mode 100644 meilisearch-lib/src/index_controller/update_actor/mod.rs rename meilisearch-lib/src/index_controller/{update_actor => updates}/error.rs (100%) create mode 100644 meilisearch-lib/src/index_controller/updates/message.rs rename meilisearch-lib/src/index_controller/{update_actor/actor.rs => updates/mod.rs} (71%) rename meilisearch-lib/src/index_controller/{updates.rs => updates/status.rs} (98%) rename meilisearch-lib/src/index_controller/{update_actor => updates}/store/codec.rs (100%) rename meilisearch-lib/src/index_controller/{update_actor => updates}/store/dump.rs (95%) rename meilisearch-lib/src/index_controller/{update_actor => updates}/store/mod.rs (98%) diff --git a/meilisearch-lib/src/index/update_handler.rs b/meilisearch-lib/src/index/update_handler.rs index 8fba55341..95ae2f556 100644 --- a/meilisearch-lib/src/index/update_handler.rs +++ b/meilisearch-lib/src/index/update_handler.rs @@ -3,8 +3,8 @@ use milli::update::UpdateBuilder; use milli::CompressionType; use rayon::ThreadPool; -use crate::index_controller::update_actor::RegisterUpdate; -use crate::index_controller::{Failed, Processed, Processing}; +use crate::index_controller::updates::RegisterUpdate; +use crate::index_controller::updates::status::{Failed, Processed, Processing}; use crate::options::IndexerOpts; pub struct UpdateHandler { diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 6c7ae1416..e6012f4ab 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -8,7 +8,7 @@ use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use serde::{Deserialize, Serialize, Serializer}; use uuid::Uuid; -use crate::index_controller::UpdateResult; +use crate::index_controller::updates::status::UpdateResult; use super::Index; use super::error::Result; diff --git a/meilisearch-lib/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/index_controller/dump_actor/actor.rs index f82101bc1..881f3e5b8 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/actor.rs @@ -7,19 +7,18 @@ use chrono::Utc; use futures::{lock::Mutex, stream::StreamExt}; use log::{error, trace}; use tokio::sync::{mpsc, oneshot, RwLock}; -use update_actor::UpdateActorHandle; use super::error::{DumpActorError, Result}; use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask}; use crate::index_controller::uuid_resolver::UuidResolverSender; -use crate::index_controller::update_actor; +use crate::index_controller::updates::UpdateSender; pub const CONCURRENT_DUMP_MSG: usize = 10; -pub struct DumpActor { +pub struct DumpActor { inbox: Option>, uuid_resolver: UuidResolverSender, - update: Update, + update: UpdateSender, dump_path: PathBuf, lock: Arc>, dump_infos: Arc>>, @@ -32,14 +31,11 @@ fn generate_uid() -> String { Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() } -impl DumpActor -where - Update: UpdateActorHandle + Send + Sync + Clone + 'static, -{ +impl DumpActor { pub fn new( inbox: mpsc::Receiver, uuid_resolver: UuidResolverSender, - update: Update, + update: UpdateSender, dump_path: impl AsRef, index_db_size: usize, update_db_size: usize, diff --git a/meilisearch-lib/src/index_controller/dump_actor/error.rs b/meilisearch-lib/src/index_controller/dump_actor/error.rs index b6bddb5ea..eb6f08c00 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/error.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/error.rs @@ -1,6 +1,6 @@ use meilisearch_error::{Code, ErrorCode}; -use crate::index_controller::update_actor::error::UpdateActorError; +use crate::index_controller::updates::error::UpdateActorError; use crate::index_controller::uuid_resolver::error::UuidResolverError; pub type Result = std::result::Result; diff --git a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs index 544cb89c6..a629ff753 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs @@ -33,7 +33,7 @@ impl DumpActorHandleImpl { pub fn new( path: impl AsRef, uuid_resolver: UuidResolverSender, - update: crate::index_controller::update_actor::UpdateActorHandleImpl, + update: crate::index_controller::updates::UpdateSender, index_db_size: usize, update_db_size: usize, ) -> anyhow::Result { diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs index 7b7a8236c..c50e8a722 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs @@ -5,7 +5,8 @@ use log::info; use serde::{Deserialize, Serialize}; use crate::index::Index; -use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::store::HeedUuidStore}; +use crate::index_controller::updates::store::UpdateStore; +use crate::index_controller::{uuid_resolver::store::HeedUuidStore}; use crate::options::IndexerOpts; #[derive(Serialize, Deserialize, Debug)] diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 445966a56..7db682e98 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -16,9 +16,10 @@ pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; -use super::update_actor::UpdateActorHandle; +use super::updates::UpdateSender; use super::uuid_resolver::UuidResolverSender; use crate::index_controller::dump_actor::error::DumpActorError; +use crate::index_controller::updates::UpdateMsg; use crate::index_controller::uuid_resolver::UuidResolverMsg; use crate::options::IndexerOpts; use error::Result; @@ -151,20 +152,16 @@ pub fn load_dump( Ok(()) } -struct DumpTask

{ +struct DumpTask { path: PathBuf, uuid_resolver: UuidResolverSender, - update_handle: P, + update_handle: UpdateSender, uid: String, update_db_size: usize, index_db_size: usize, } -impl

DumpTask

-where - P: UpdateActorHandle + Send + Sync + Clone + 'static, - -{ +impl DumpTask { async fn run(self) -> Result<()> { trace!("Performing dump."); @@ -182,9 +179,7 @@ where let uuids = UuidResolverMsg::dump(&self.uuid_resolver, temp_dump_path.clone()).await?; - self.update_handle - .dump(uuids, temp_dump_path.clone()) - .await?; + UpdateMsg::dump(&self.update_handle, uuids, temp_dump_path.clone()).await?; let dump_path = tokio::task::spawn_blocking(move || -> Result { let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs index d3be7d7b7..ddf698d29 100644 --- a/meilisearch-lib/src/index_controller/error.rs +++ b/meilisearch-lib/src/index_controller/error.rs @@ -7,7 +7,7 @@ use crate::index::error::IndexError; use super::dump_actor::error::DumpActorError; use super::index_actor::error::IndexActorError; -use super::update_actor::error::UpdateActorError; +use super::updates::error::UpdateActorError; use super::uuid_resolver::error::UuidResolverError; pub type Result = std::result::Result; diff --git a/meilisearch-lib/src/index_controller/index_actor/actor.rs b/meilisearch-lib/src/index_controller/index_actor/actor.rs index cee656b97..6e7d13760 100644 --- a/meilisearch-lib/src/index_controller/index_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/index_actor/actor.rs @@ -14,8 +14,9 @@ use crate::index::{ update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings, }; use crate::index_controller::{ - get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing, + get_arc_ownership_blocking, IndexStats, }; +use crate::index_controller::updates::status::{Failed, Processed, Processing}; use crate::options::IndexerOpts; use super::error::{IndexActorError, Result}; diff --git a/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs index 8cc66edee..de295af6d 100644 --- a/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs @@ -1,4 +1,4 @@ -use crate::options::IndexerOpts; +use crate::{index_controller::updates::status::{Failed, Processed, Processing}, options::IndexerOpts}; use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; @@ -6,11 +6,10 @@ use uuid::Uuid; use crate::{ index::Checked, - index_controller::{IndexSettings, IndexStats, Processing}, + index_controller::{IndexSettings, IndexStats}, }; use crate::{ index::{Document, SearchQuery, SearchResult, Settings}, - index_controller::{Failed, Processed}, }; use super::error::Result; diff --git a/meilisearch-lib/src/index_controller/index_actor/message.rs b/meilisearch-lib/src/index_controller/index_actor/message.rs index 1b93ec34f..55aaf5bc7 100644 --- a/meilisearch-lib/src/index_controller/index_actor/message.rs +++ b/meilisearch-lib/src/index_controller/index_actor/message.rs @@ -5,7 +5,8 @@ use uuid::Uuid; use super::error::Result as IndexResult; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::{Failed, IndexStats, Processed, Processing}; +use crate::index_controller::IndexStats; +use crate::index_controller::updates::status::{Failed, Processed, Processing}; use super::{IndexMeta, IndexSettings}; diff --git a/meilisearch-lib/src/index_controller/index_actor/mod.rs b/meilisearch-lib/src/index_controller/index_actor/mod.rs index bf5833222..8f2ac4d2d 100644 --- a/meilisearch-lib/src/index_controller/index_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/index_actor/mod.rs @@ -13,10 +13,9 @@ use message::IndexMsg; use store::{IndexStore, MapIndexStore}; use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings}; -use crate::index_controller::{Failed, IndexStats, Processed, Processing}; use error::Result; -use super::IndexSettings; +use super::{IndexSettings, IndexStats, updates::status::{Failed, Processed, Processing}}; mod actor; pub mod error; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index f22fec33f..da92eca20 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -18,8 +18,6 @@ use dump_actor::DumpActorHandle; pub use dump_actor::{DumpInfo, DumpStatus}; use index_actor::IndexActorHandle; use snapshot::load_snapshot; -use update_actor::UpdateActorHandle; -pub use updates::*; use uuid_resolver::error::UuidResolverError; use crate::options::IndexerOpts; @@ -27,14 +25,15 @@ use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use error::Result; use self::dump_actor::load_dump; +use self::updates::UpdateMsg; +use self::updates::status::UpdateStatus; use self::uuid_resolver::UuidResolverMsg; mod dump_actor; pub mod error; pub mod index_actor; mod snapshot; -pub mod update_actor; -mod updates; +pub mod updates; mod uuid_resolver; pub mod update_file_store; @@ -74,7 +73,7 @@ pub struct IndexStats { pub struct IndexController { uuid_resolver: uuid_resolver::UuidResolverSender, index_handle: index_actor::IndexActorHandleImpl, - update_handle: update_actor::UpdateActorHandleImpl, + update_handle: updates::UpdateSender, dump_handle: dump_actor::DumpActorHandleImpl, } @@ -140,8 +139,10 @@ impl IndexControllerBuilder { let uuid_resolver = uuid_resolver::create_uuid_resolver(&db_path)?; let index_handle = index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?; - let update_handle = update_actor::UpdateActorHandleImpl::new( - index_handle.clone(), + + #[allow(unreachable_code)] + let update_handle = updates::create_update_handler( + todo!(), &db_path, update_store_size, )?; @@ -235,12 +236,12 @@ impl IndexController { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.to_string()).await; match uuid { Ok(uuid) => { - let update_result = self.update_handle.update(uuid, update).await?; + let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; Ok(update_result) }, Err(UuidResolverError::UnexistingIndex(name)) => { let uuid = Uuid::new_v4(); - let update_result = self.update_handle.update(uuid, update).await?; + let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; // ignore if index creation fails now, since it may already have been created let _ = self.index_handle.create_index(uuid, None).await; UuidResolverMsg::insert(&self.uuid_resolver, uuid, name).await?; @@ -378,13 +379,13 @@ impl IndexController { pub async fn update_status(&self, uid: String, id: u64) -> Result { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let result = self.update_handle.update_status(uuid, id).await?; + let result = UpdateMsg::get_update(&self.update_handle, uuid, id).await?; Ok(result) } pub async fn all_update_status(&self, uid: String) -> Result> { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let result = self.update_handle.get_all_updates_status(uuid).await?; + let result = UpdateMsg::list_updates(&self.update_handle, uuid).await?; Ok(result) } @@ -485,7 +486,7 @@ impl IndexController { pub async fn get_index_stats(&self, uid: String) -> Result { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let update_infos = self.update_handle.get_info().await?; + let update_infos = UpdateMsg::get_info(&self.update_handle).await?; let mut stats = self.index_handle.get_index_stats(uuid).await?; // Check if the currently indexing update is from out index. stats.is_indexing = Some(Some(uuid) == update_infos.processing); @@ -493,7 +494,7 @@ impl IndexController { } pub async fn get_all_stats(&self) -> Result { - let update_infos = self.update_handle.get_info().await?; + let update_infos = UpdateMsg::get_info(&self.update_handle).await?; let mut database_size = self.get_uuids_size().await? + update_infos.size; let mut last_update: Option> = None; let mut indexes = BTreeMap::new(); diff --git a/meilisearch-lib/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs index c2f600bbc..7c999fd74 100644 --- a/meilisearch-lib/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -132,7 +132,7 @@ mod test { use super::*; use crate::index_controller::index_actor::MockIndexActorHandle; - use crate::index_controller::update_actor::{ + use crate::index_controller::updates::{ error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl, }; use crate::index_controller::uuid_resolver::{ diff --git a/meilisearch-lib/src/index_controller/update_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/update_actor/handle_impl.rs deleted file mode 100644 index e1df0b5d4..000000000 --- a/meilisearch-lib/src/index_controller/update_actor/handle_impl.rs +++ /dev/null @@ -1,94 +0,0 @@ -use std::collections::HashSet; -use std::path::{Path, PathBuf}; - -use tokio::sync::{mpsc, oneshot}; -use uuid::Uuid; - -use crate::index_controller::{IndexActorHandle, Update, UpdateStatus}; - -use super::error::Result; -use super::{UpdateActor, UpdateActorHandle, UpdateMsg, UpdateStoreInfo}; - -#[derive(Clone)] -pub struct UpdateActorHandleImpl { - sender: mpsc::Sender, -} - -impl UpdateActorHandleImpl { - pub fn new( - index_handle: I, - path: impl AsRef, - update_store_size: usize, - ) -> anyhow::Result - where - I: IndexActorHandle + Clone + Sync + Send +'static, - { - let path = path.as_ref().to_owned(); - let (sender, receiver) = mpsc::channel(100); - let actor = UpdateActor::new(update_store_size, receiver, path, index_handle)?; - - tokio::task::spawn_local(actor.run()); - - Ok(Self { sender }) - } -} - -#[async_trait::async_trait] -impl UpdateActorHandle for UpdateActorHandleImpl { - async fn get_all_updates_status(&self, uuid: Uuid) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::ListUpdates { uuid, ret }; - self.sender.send(msg).await?; - receiver.await? - } - - async fn update_status(&self, uuid: Uuid, id: u64) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::GetUpdate { uuid, id, ret }; - self.sender.send(msg).await?; - receiver.await? - } - - async fn delete(&self, uuid: Uuid) -> Result<()> { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Delete { uuid, ret }; - self.sender.send(msg).await?; - receiver.await? - } - - async fn snapshot(&self, uuids: HashSet, path: PathBuf) -> Result<()> { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Snapshot { uuids, path, ret }; - self.sender.send(msg).await?; - receiver.await? - } - - async fn dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Dump { uuids, path, ret }; - self.sender.send(msg).await?; - receiver.await? - } - - async fn get_info(&self) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::GetInfo { ret }; - self.sender.send(msg).await?; - receiver.await? - } - - async fn update( - &self, - uuid: Uuid, - update: Update, - ) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Update { - uuid, - update, - ret, - }; - self.sender.send(msg).await?; - receiver.await? - } -} diff --git a/meilisearch-lib/src/index_controller/update_actor/message.rs b/meilisearch-lib/src/index_controller/update_actor/message.rs deleted file mode 100644 index 40cc3360c..000000000 --- a/meilisearch-lib/src/index_controller/update_actor/message.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::collections::HashSet; -use std::path::PathBuf; - -use tokio::sync::oneshot; -use uuid::Uuid; - -use super::error::Result; -use super::{UpdateStatus, UpdateStoreInfo, Update}; - -pub enum UpdateMsg { - Update { - uuid: Uuid, - update: Update, - ret: oneshot::Sender>, - }, - ListUpdates { - uuid: Uuid, - ret: oneshot::Sender>>, - }, - GetUpdate { - uuid: Uuid, - ret: oneshot::Sender>, - id: u64, - }, - Delete { - uuid: Uuid, - ret: oneshot::Sender>, - }, - Snapshot { - uuids: HashSet, - path: PathBuf, - ret: oneshot::Sender>, - }, - Dump { - uuids: HashSet, - path: PathBuf, - ret: oneshot::Sender>, - }, - GetInfo { - ret: oneshot::Sender>, - }, -} diff --git a/meilisearch-lib/src/index_controller/update_actor/mod.rs b/meilisearch-lib/src/index_controller/update_actor/mod.rs deleted file mode 100644 index b83cf491c..000000000 --- a/meilisearch-lib/src/index_controller/update_actor/mod.rs +++ /dev/null @@ -1,49 +0,0 @@ -use std::{collections::HashSet, path::PathBuf}; - -use milli::update::IndexDocumentsMethod; -use uuid::Uuid; -use serde::{Serialize, Deserialize}; - -use crate::index_controller::UpdateStatus; -use super::Update; - -use actor::UpdateActor; -use error::Result; -use message::UpdateMsg; - -pub use handle_impl::UpdateActorHandleImpl; -pub use store::{UpdateStore, UpdateStoreInfo}; - -mod actor; -pub mod error; -mod handle_impl; -mod message; -pub mod store; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum RegisterUpdate { - DocumentAddition { - primary_key: Option, - method: IndexDocumentsMethod, - content_uuid: Uuid, - } -} - - -#[cfg(test)] -use mockall::automock; - -#[async_trait::async_trait] -pub trait UpdateActorHandle { - async fn get_all_updates_status(&self, uuid: Uuid) -> Result>; - async fn update_status(&self, uuid: Uuid, id: u64) -> Result; - async fn delete(&self, uuid: Uuid) -> Result<()>; - async fn snapshot(&self, uuid: HashSet, path: PathBuf) -> Result<()>; - async fn dump(&self, uuids: HashSet, path: PathBuf) -> Result<()>; - async fn get_info(&self) -> Result; - async fn update( - &self, - uuid: Uuid, - update: Update, - ) -> Result; -} diff --git a/meilisearch-lib/src/index_controller/update_actor/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs similarity index 100% rename from meilisearch-lib/src/index_controller/update_actor/error.rs rename to meilisearch-lib/src/index_controller/updates/error.rs diff --git a/meilisearch-lib/src/index_controller/updates/message.rs b/meilisearch-lib/src/index_controller/updates/message.rs new file mode 100644 index 000000000..fe6e1360b --- /dev/null +++ b/meilisearch-lib/src/index_controller/updates/message.rs @@ -0,0 +1,112 @@ +use std::collections::HashSet; +use std::path::PathBuf; + +use tokio::sync::{mpsc, oneshot}; +use uuid::Uuid; + +use super::error::Result; +use super::{Update, UpdateStatus, UpdateStoreInfo}; + +pub enum UpdateMsg { + Update { + uuid: Uuid, + update: Update, + ret: oneshot::Sender>, + }, + ListUpdates { + uuid: Uuid, + ret: oneshot::Sender>>, + }, + GetUpdate { + uuid: Uuid, + ret: oneshot::Sender>, + id: u64, + }, + Delete { + uuid: Uuid, + ret: oneshot::Sender>, + }, + Snapshot { + uuids: HashSet, + path: PathBuf, + ret: oneshot::Sender>, + }, + Dump { + uuids: HashSet, + path: PathBuf, + ret: oneshot::Sender>, + }, + GetInfo { + ret: oneshot::Sender>, + }, +} + +impl UpdateMsg { + pub async fn dump( + sender: &mpsc::Sender, + uuids: HashSet, + path: PathBuf, + ) -> Result<()> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Dump { + path, + uuids, + ret, + }; + sender.send(msg).await?; + rcv.await? + } + pub async fn update( + sender: &mpsc::Sender, + uuid: Uuid, + update: Update, + ) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Update { + uuid, + update, + ret, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn get_update( + sender: &mpsc::Sender, + uuid: Uuid, + id: u64, + ) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::GetUpdate { + uuid, + id, + ret, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn list_updates( + sender: &mpsc::Sender, + uuid: Uuid, + ) -> Result> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::ListUpdates { + uuid, + ret, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn get_info( + sender: &mpsc::Sender, + ) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::GetInfo { + ret, + }; + sender.send(msg).await?; + rcv.await? + } +} diff --git a/meilisearch-lib/src/index_controller/update_actor/actor.rs b/meilisearch-lib/src/index_controller/updates/mod.rs similarity index 71% rename from meilisearch-lib/src/index_controller/update_actor/actor.rs rename to meilisearch-lib/src/index_controller/updates/mod.rs index 01e34e000..f281250a6 100644 --- a/meilisearch-lib/src/index_controller/update_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -1,3 +1,8 @@ +pub mod error; +mod message; +pub mod status; +pub mod store; + use std::collections::HashSet; use std::io; use std::path::{Path, PathBuf}; @@ -10,25 +15,47 @@ use bytes::Bytes; use futures::{Stream, StreamExt}; use log::trace; use milli::documents::DocumentBatchBuilder; +use milli::update::IndexDocumentsMethod; +use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use tokio::sync::mpsc; use uuid::Uuid; -use super::error::{Result, UpdateActorError}; -use super::RegisterUpdate; -use super::{UpdateMsg, UpdateStore, UpdateStoreInfo, Update}; -use crate::index_controller::index_actor::IndexActorHandle; +use self::error::{Result, UpdateActorError}; +pub use self::message::UpdateMsg; +use self::store::{UpdateStore, UpdateStoreInfo}; use crate::index_controller::update_file_store::UpdateFileStore; -use crate::index_controller::{DocumentAdditionFormat, Payload, UpdateStatus}; +use status::UpdateStatus; -pub struct UpdateActor { - store: Arc, - inbox: Option>, - update_file_store: UpdateFileStore, - index_handle: I, - must_exit: Arc, +use super::{DocumentAdditionFormat, Payload, Update}; + +pub type UpdateSender = mpsc::Sender; +type IndexSender = mpsc::Sender<()>; + +pub fn create_update_handler( + index_sender: IndexSender, + db_path: impl AsRef, + update_store_size: usize, +) -> anyhow::Result { + let path = db_path.as_ref().to_owned(); + let (sender, receiver) = mpsc::channel(100); + let actor = UpdateHandler::new(update_store_size, receiver, path, index_sender)?; + + tokio::task::spawn_local(actor.run()); + + Ok(sender) } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum RegisterUpdate { + DocumentAddition { + primary_key: Option, + method: IndexDocumentsMethod, + content_uuid: Uuid, + }, +} + +/// A wrapper type to implement read on a `Stream>`. struct StreamReader { stream: S, current: Option, @@ -36,13 +63,18 @@ struct StreamReader { impl StreamReader { fn new(stream: S) -> Self { - Self { stream, current: None } + Self { + stream, + current: None, + } } - } -impl> + Unpin> io::Read for StreamReader { +impl> + Unpin> io::Read + for StreamReader +{ fn read(&mut self, buf: &mut [u8]) -> io::Result { + // TODO: optimize buf filling match self.current.take() { Some(mut bytes) => { let copied = bytes.split_to(buf.len()); @@ -52,29 +84,32 @@ impl> + Unpin> io::Rea } Ok(copied.len()) } - None => { - match tokio::runtime::Handle::current().block_on(self.stream.next()) { - Some(Ok(bytes)) => { - self.current.replace(bytes); - self.read(buf) - }, - Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)), - None => return Ok(0), + None => match tokio::runtime::Handle::current().block_on(self.stream.next()) { + Some(Ok(bytes)) => { + self.current.replace(bytes); + self.read(buf) } - } + Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)), + None => return Ok(0), + }, } } } -impl UpdateActor -where - I: IndexActorHandle + Clone + Sync + Send + 'static, -{ +pub struct UpdateHandler { + store: Arc, + inbox: Option>, + update_file_store: UpdateFileStore, + index_handle: IndexSender, + must_exit: Arc, +} + +impl UpdateHandler { pub fn new( update_db_size: usize, inbox: mpsc::Receiver, path: impl AsRef, - index_handle: I, + index_handle: IndexSender, ) -> anyhow::Result { let path = path.as_ref().to_owned(); std::fs::create_dir_all(&path)?; @@ -88,14 +123,14 @@ where let inbox = Some(inbox); - let update_file_store = UpdateFileStore::new(&path).unwrap(); + let update_file_store = UpdateFileStore::new(&path).unwrap(); Ok(Self { store, inbox, index_handle, must_exit, - update_file_store + update_file_store, }) } @@ -128,11 +163,7 @@ where stream .for_each_concurrent(Some(10), |msg| async { match msg { - Update { - uuid, - update, - ret, - } => { + Update { uuid, update, ret } => { let _ = ret.send(self.handle_update(uuid, update).await); } ListUpdates { uuid, ret } => { @@ -158,23 +189,30 @@ where .await; } - async fn handle_update( - &self, - index_uuid: Uuid, - update: Update, - ) -> Result { + async fn handle_update(&self, index_uuid: Uuid, update: Update) -> Result { let registration = match update { - Update::DocumentAddition { payload, primary_key, method, format } => { + Update::DocumentAddition { + payload, + primary_key, + method, + format, + } => { let content_uuid = match format { DocumentAdditionFormat::Json => self.documents_from_json(payload).await?, }; - RegisterUpdate::DocumentAddition { primary_key, method, content_uuid } + RegisterUpdate::DocumentAddition { + primary_key, + method, + content_uuid, + } } }; let store = self.store.clone(); - let status = tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration)).await??; + let status = + tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration)) + .await??; Ok(status.into()) } @@ -185,14 +223,16 @@ where let (uuid, mut file) = file_store.new_update().unwrap(); let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap(); - let documents: Vec> = serde_json::from_reader(StreamReader::new(payload))?; + let documents: Vec> = + serde_json::from_reader(StreamReader::new(payload))?; builder.add_documents(documents).unwrap(); builder.finish().unwrap(); file.persist(); Ok(uuid) - }).await? + }) + .await? } async fn handle_list_updates(&self, uuid: Uuid) -> Result> { @@ -256,5 +296,4 @@ where Ok(info) } - } diff --git a/meilisearch-lib/src/index_controller/updates.rs b/meilisearch-lib/src/index_controller/updates/status.rs similarity index 98% rename from meilisearch-lib/src/index_controller/updates.rs rename to meilisearch-lib/src/index_controller/updates/status.rs index efe48e5e5..7716473ab 100644 --- a/meilisearch-lib/src/index_controller/updates.rs +++ b/meilisearch-lib/src/index_controller/updates/status.rs @@ -6,9 +6,7 @@ use meilisearch_error::{Code, ErrorCode}; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; -use crate::index::{Settings, Unchecked}; - -use super::update_actor::RegisterUpdate; +use crate::{RegisterUpdate, index::{Settings, Unchecked}}; #[derive(Debug, Clone, Serialize, Deserialize)] pub enum UpdateResult { diff --git a/meilisearch-lib/src/index_controller/update_actor/store/codec.rs b/meilisearch-lib/src/index_controller/updates/store/codec.rs similarity index 100% rename from meilisearch-lib/src/index_controller/update_actor/store/codec.rs rename to meilisearch-lib/src/index_controller/updates/store/codec.rs diff --git a/meilisearch-lib/src/index_controller/update_actor/store/dump.rs b/meilisearch-lib/src/index_controller/updates/store/dump.rs similarity index 95% rename from meilisearch-lib/src/index_controller/update_actor/store/dump.rs rename to meilisearch-lib/src/index_controller/updates/store/dump.rs index 5f3605999..ccb09a309 100644 --- a/meilisearch-lib/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-lib/src/index_controller/updates/store/dump.rs @@ -10,10 +10,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::{Result, State, UpdateStore}; -use crate::index_controller::{ - index_actor::IndexActorHandle, - UpdateStatus, -}; +use crate::index_controller::{updates::{IndexSender, status::UpdateStatus}}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -26,7 +23,7 @@ impl UpdateStore { &self, uuids: &HashSet, path: PathBuf, - handle: impl IndexActorHandle, + handle: IndexSender, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Dumping); @@ -175,11 +172,12 @@ impl UpdateStore { async fn dump_indexes( uuids: &HashSet, - handle: impl IndexActorHandle, + handle: IndexSender, path: impl AsRef, ) -> Result<()> { for uuid in uuids { - handle.dump(*uuid, path.as_ref().to_owned()).await?; + //handle.dump(*uuid, path.as_ref().to_owned()).await?; + todo!() } Ok(()) diff --git a/meilisearch-lib/src/index_controller/update_actor/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs similarity index 98% rename from meilisearch-lib/src/index_controller/update_actor/store/mod.rs rename to meilisearch-lib/src/index_controller/updates/store/mod.rs index 62fcbd5ad..be8c5f859 100644 --- a/meilisearch-lib/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -28,9 +28,10 @@ use codec::*; use super::RegisterUpdate; use super::error::Result; +use super::status::{Enqueued, Processing}; use crate::EnvSizer; use crate::index_controller::update_files_path; -use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; +use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*}; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; @@ -145,7 +146,7 @@ impl UpdateStore { pub fn open( options: EnvOpenOptions, path: impl AsRef, - index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static, + index_handle: IndexSender, must_exit: Arc, ) -> anyhow::Result> { let (update_store, mut notification_receiver) = Self::new(options, path)?; @@ -283,7 +284,7 @@ impl UpdateStore { /// Executes the user provided function on the next pending update (the one with the lowest id). /// This is asynchronous as it let the user process the update with a read-only txn and /// only writing the result meta to the processed-meta store *after* it has been processed. - fn process_pending_update(&self, index_handle: impl IndexActorHandle) -> Result> { + fn process_pending_update(&self, index_handle: IndexSender) -> Result> { // Create a read transaction to be able to retrieve the pending update in order. let rtxn = self.env.read_txn()?; let first_meta = self.pending_queue.first(&rtxn)?; @@ -313,7 +314,7 @@ impl UpdateStore { fn perform_update( &self, processing: Processing, - index_handle: impl IndexActorHandle, + index_handle: IndexSender, index_uuid: Uuid, global_id: u64, ) -> Result> { @@ -321,7 +322,7 @@ impl UpdateStore { let handle = Handle::current(); let update_id = processing.id(); let result = - match handle.block_on(index_handle.update(index_uuid, processing.clone())) { + match handle.block_on(/*index_handle.update(index_uuid, processing.clone())*/ todo!()) { Ok(result) => result, Err(e) => Err(processing.fail(e)), }; @@ -483,7 +484,7 @@ impl UpdateStore { &self, uuids: &HashSet, path: impl AsRef, - handle: impl IndexActorHandle + Clone, + handle: IndexSender, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Snapshoting); @@ -524,7 +525,7 @@ impl UpdateStore { // Perform the snapshot of each index concurently. Only a third of the capabilities of // the index actor at a time not to put too much pressure on the index actor let mut stream = futures::stream::iter(uuids.iter()) - .map(move |uuid| handle.snapshot(*uuid, path.clone())) + .map(move |uuid| todo!() /*handle.snapshot(*uuid, path.clone())*/) .buffer_unordered(CONCURRENT_INDEX_MSG / 3); Handle::current().block_on(async { diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 9f6be4361..64f93695e 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -5,7 +5,7 @@ pub mod options; pub mod index; pub mod index_controller; -pub use index_controller::{UpdateResult, UpdateStatus, IndexController as MeiliSearch, update_actor::RegisterUpdate}; +pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate}; use walkdir::WalkDir; From 5353be74c351cb524226fb211aae4982ee258b60 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 22 Sep 2021 15:07:04 +0200 Subject: [PATCH 06/37] refactor index actor --- Cargo.lock | 12 + meilisearch-http/src/routes/mod.rs | 3 +- meilisearch-lib/Cargo.toml | 1 + meilisearch-lib/src/index/updates.rs | 3 + meilisearch-lib/src/index_controller/error.rs | 2 +- .../index_actor/handle_impl.rs | 161 --------- .../index_controller/index_actor/message.rs | 74 ----- .../src/index_controller/index_actor/mod.rs | 166 ---------- .../{index_actor => indexes}/error.rs | 16 + .../src/index_controller/indexes/message.rs | 212 ++++++++++++ .../{index_actor/actor.rs => indexes/mod.rs} | 169 ++++++++-- .../{index_actor => indexes}/store.rs | 0 meilisearch-lib/src/index_controller/mod.rs | 312 +++++++++--------- .../src/index_controller/updates/error.rs | 15 +- .../src/index_controller/updates/message.rs | 1 + .../src/index_controller/updates/mod.rs | 14 +- .../index_controller/updates/store/dump.rs | 9 +- .../src/index_controller/updates/store/mod.rs | 16 +- 18 files changed, 590 insertions(+), 596 deletions(-) delete mode 100644 meilisearch-lib/src/index_controller/index_actor/handle_impl.rs delete mode 100644 meilisearch-lib/src/index_controller/index_actor/message.rs delete mode 100644 meilisearch-lib/src/index_controller/index_actor/mod.rs rename meilisearch-lib/src/index_controller/{index_actor => indexes}/error.rs (77%) create mode 100644 meilisearch-lib/src/index_controller/indexes/message.rs rename meilisearch-lib/src/index_controller/{index_actor/actor.rs => indexes/mod.rs} (70%) rename meilisearch-lib/src/index_controller/{index_actor => indexes}/store.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 33660f836..d08b1a83c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -769,6 +769,17 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + [[package]] name = "derive_more" version = "0.99.16" @@ -1674,6 +1685,7 @@ dependencies = [ "bytes", "chrono", "crossbeam-channel", + "derivative", "either", "env_logger", "erased-serde", diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 6c99d1766..a38689bd9 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -3,9 +3,10 @@ use std::time::Duration; use actix_web::{web, HttpResponse}; use chrono::{DateTime, Utc}; use log::debug; +use meilisearch_lib::index_controller::updates::status::{UpdateResult, UpdateStatus}; use serde::{Deserialize, Serialize}; -use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate}; +use meilisearch_lib::{MeiliSearch, RegisterUpdate}; use meilisearch_lib::index::{Settings, Unchecked}; use crate::error::ResponseError; diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 7ef4ecad7..0d9f6520b 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -61,6 +61,7 @@ serdeval = "0.1.0" sysinfo = "0.20.2" tokio-stream = "0.1.7" erased-serde = "0.3.16" +derivative = "2.2.0" [dev-dependencies] actix-rt = "2.2.0" diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index e6012f4ab..c83862f9b 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -35,6 +35,9 @@ pub struct Checked; #[derive(Clone, Default, Debug, Serialize, Deserialize)] pub struct Unchecked; +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. #[derive(Debug, Clone, Default, Serialize, Deserialize)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs index ddf698d29..8c60e9103 100644 --- a/meilisearch-lib/src/index_controller/error.rs +++ b/meilisearch-lib/src/index_controller/error.rs @@ -6,7 +6,7 @@ use meilisearch_error::ErrorCode; use crate::index::error::IndexError; use super::dump_actor::error::DumpActorError; -use super::index_actor::error::IndexActorError; +use super::indexes::error::IndexActorError; use super::updates::error::UpdateActorError; use super::uuid_resolver::error::UuidResolverError; diff --git a/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs deleted file mode 100644 index de295af6d..000000000 --- a/meilisearch-lib/src/index_controller/index_actor/handle_impl.rs +++ /dev/null @@ -1,161 +0,0 @@ -use crate::{index_controller::updates::status::{Failed, Processed, Processing}, options::IndexerOpts}; -use std::path::{Path, PathBuf}; - -use tokio::sync::{mpsc, oneshot}; -use uuid::Uuid; - -use crate::{ - index::Checked, - index_controller::{IndexSettings, IndexStats}, -}; -use crate::{ - index::{Document, SearchQuery, SearchResult, Settings}, -}; - -use super::error::Result; -use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore}; - -#[derive(Clone)] -pub struct IndexActorHandleImpl { - sender: mpsc::Sender, -} - -#[async_trait::async_trait] -impl IndexActorHandle for IndexActorHandleImpl { - async fn create_index(&self, uuid: Uuid, primary_key: Option) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::CreateIndex { - ret, - uuid, - primary_key, - }; - let _ = self.sender.send(msg).await; - receiver.await.expect("IndexActor has been killed") - } - - async fn update( - &self, - uuid: Uuid, - meta: Processing, - ) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Update { - ret, - meta, - uuid, - }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Search { uuid, query, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn settings(&self, uuid: Uuid) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Settings { uuid, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn documents( - &self, - uuid: Uuid, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Documents { - uuid, - ret, - offset, - attributes_to_retrieve, - limit, - }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn document( - &self, - uuid: Uuid, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Document { - uuid, - ret, - doc_id, - attributes_to_retrieve, - }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn delete(&self, uuid: Uuid) -> Result<()> { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Delete { uuid, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn get_index_meta(&self, uuid: Uuid) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::GetMeta { uuid, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::UpdateIndex { - uuid, - index_settings, - ret, - }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Snapshot { uuid, path, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Dump { uuid, path, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } - - async fn get_index_stats(&self, uuid: Uuid) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::GetStats { uuid, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver.await.expect("IndexActor has been killed")?) - } -} - -impl IndexActorHandleImpl { - pub fn new( - path: impl AsRef, - index_size: usize, - options: &IndexerOpts, - ) -> anyhow::Result { - let (sender, receiver) = mpsc::channel(100); - - let store = MapIndexStore::new(&path, index_size); - let actor = IndexActor::new(receiver, store, options)?; - tokio::task::spawn(actor.run()); - Ok(Self { sender }) - } -} diff --git a/meilisearch-lib/src/index_controller/index_actor/message.rs b/meilisearch-lib/src/index_controller/index_actor/message.rs deleted file mode 100644 index 55aaf5bc7..000000000 --- a/meilisearch-lib/src/index_controller/index_actor/message.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::path::PathBuf; - -use tokio::sync::oneshot; -use uuid::Uuid; - -use super::error::Result as IndexResult; -use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::IndexStats; -use crate::index_controller::updates::status::{Failed, Processed, Processing}; - -use super::{IndexMeta, IndexSettings}; - -#[allow(clippy::large_enum_variant)] -pub enum IndexMsg { - CreateIndex { - uuid: Uuid, - primary_key: Option, - ret: oneshot::Sender>, - }, - Update { - uuid: Uuid, - meta: Processing, - ret: oneshot::Sender>>, - }, - Search { - uuid: Uuid, - query: SearchQuery, - ret: oneshot::Sender>, - }, - Settings { - uuid: Uuid, - ret: oneshot::Sender>>, - }, - Documents { - uuid: Uuid, - attributes_to_retrieve: Option>, - offset: usize, - limit: usize, - ret: oneshot::Sender>>, - }, - Document { - uuid: Uuid, - attributes_to_retrieve: Option>, - doc_id: String, - ret: oneshot::Sender>, - }, - Delete { - uuid: Uuid, - ret: oneshot::Sender>, - }, - GetMeta { - uuid: Uuid, - ret: oneshot::Sender>, - }, - UpdateIndex { - uuid: Uuid, - index_settings: IndexSettings, - ret: oneshot::Sender>, - }, - Snapshot { - uuid: Uuid, - path: PathBuf, - ret: oneshot::Sender>, - }, - Dump { - uuid: Uuid, - path: PathBuf, - ret: oneshot::Sender>, - }, - GetStats { - uuid: Uuid, - ret: oneshot::Sender>, - }, -} diff --git a/meilisearch-lib/src/index_controller/index_actor/mod.rs b/meilisearch-lib/src/index_controller/index_actor/mod.rs deleted file mode 100644 index 8f2ac4d2d..000000000 --- a/meilisearch-lib/src/index_controller/index_actor/mod.rs +++ /dev/null @@ -1,166 +0,0 @@ -use std::path::PathBuf; - -use chrono::{DateTime, Utc}; -#[cfg(test)] -use mockall::automock; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use actor::IndexActor; -pub use actor::CONCURRENT_INDEX_MSG; -pub use handle_impl::IndexActorHandleImpl; -use message::IndexMsg; -use store::{IndexStore, MapIndexStore}; - -use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings}; -use error::Result; - -use super::{IndexSettings, IndexStats, updates::status::{Failed, Processed, Processing}}; - -mod actor; -pub mod error; -mod handle_impl; -mod message; -mod store; - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct IndexMeta { - created_at: DateTime, - pub updated_at: DateTime, - pub primary_key: Option, -} - -impl IndexMeta { - fn new(index: &Index) -> Result { - let txn = index.read_txn()?; - Self::new_txn(index, &txn) - } - - fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result { - let created_at = index.created_at(txn)?; - let updated_at = index.updated_at(txn)?; - let primary_key = index.primary_key(txn)?.map(String::from); - Ok(Self { - created_at, - updated_at, - primary_key, - }) - } -} - -#[async_trait::async_trait] -#[cfg_attr(test, automock)] -pub trait IndexActorHandle { - async fn create_index(&self, uuid: Uuid, primary_key: Option) -> Result; - async fn update( - &self, - uuid: Uuid, - meta: Processing, - ) -> Result>; - async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result; - async fn settings(&self, uuid: Uuid) -> Result>; - - async fn documents( - &self, - uuid: Uuid, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result>; - async fn document( - &self, - uuid: Uuid, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result; - async fn delete(&self, uuid: Uuid) -> Result<()>; - async fn get_index_meta(&self, uuid: Uuid) -> Result; - async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result; - async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; - async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>; - async fn get_index_stats(&self, uuid: Uuid) -> Result; -} - -#[cfg(test)] -mod test { - use std::sync::Arc; - - use super::*; - - #[async_trait::async_trait] - /// Useful for passing around an `Arc` in tests. - impl IndexActorHandle for Arc { - async fn create_index(&self, uuid: Uuid, primary_key: Option) -> Result { - self.as_ref().create_index(uuid, primary_key).await - } - - async fn update( - &self, - uuid: Uuid, - meta: Processing, - data: Option, - ) -> Result> { - self.as_ref().update(uuid, meta, data).await - } - - async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result { - self.as_ref().search(uuid, query).await - } - - async fn settings(&self, uuid: Uuid) -> Result> { - self.as_ref().settings(uuid).await - } - - async fn documents( - &self, - uuid: Uuid, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result> { - self.as_ref() - .documents(uuid, offset, limit, attributes_to_retrieve) - .await - } - - async fn document( - &self, - uuid: Uuid, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - self.as_ref() - .document(uuid, doc_id, attributes_to_retrieve) - .await - } - - async fn delete(&self, uuid: Uuid) -> Result<()> { - self.as_ref().delete(uuid).await - } - - async fn get_index_meta(&self, uuid: Uuid) -> Result { - self.as_ref().get_index_meta(uuid).await - } - - async fn update_index( - &self, - uuid: Uuid, - index_settings: IndexSettings, - ) -> Result { - self.as_ref().update_index(uuid, index_settings).await - } - - async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - self.as_ref().snapshot(uuid, path).await - } - - async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - self.as_ref().dump(uuid, path).await - } - - async fn get_index_stats(&self, uuid: Uuid) -> Result { - self.as_ref().get_index_stats(uuid).await - } - } -} diff --git a/meilisearch-lib/src/index_controller/index_actor/error.rs b/meilisearch-lib/src/index_controller/indexes/error.rs similarity index 77% rename from meilisearch-lib/src/index_controller/index_actor/error.rs rename to meilisearch-lib/src/index_controller/indexes/error.rs index 12a81796b..51fe273f7 100644 --- a/meilisearch-lib/src/index_controller/index_actor/error.rs +++ b/meilisearch-lib/src/index_controller/indexes/error.rs @@ -1,3 +1,5 @@ +use std::fmt; + use meilisearch_error::{Code, ErrorCode}; use crate::{error::MilliError, index::error::IndexError}; @@ -20,6 +22,20 @@ pub enum IndexActorError { Milli(#[from] milli::Error), } +impl From> for IndexActorError +where T: Send + Sync + 'static + fmt::Debug +{ + fn from(other: tokio::sync::mpsc::error::SendError) -> Self { + Self::Internal(Box::new(other)) + } +} + +impl From for IndexActorError { + fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { + Self::Internal(Box::new(other)) + } +} + macro_rules! internal_error { ($($other:path), *) => { $( diff --git a/meilisearch-lib/src/index_controller/indexes/message.rs b/meilisearch-lib/src/index_controller/indexes/message.rs new file mode 100644 index 000000000..e9c67d0ab --- /dev/null +++ b/meilisearch-lib/src/index_controller/indexes/message.rs @@ -0,0 +1,212 @@ +use std::path::PathBuf; + +use tokio::sync::{mpsc, oneshot}; +use uuid::Uuid; + +use super::error::Result; +use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; +use crate::index_controller::updates::status::{Failed, Processed, Processing}; +use crate::index_controller::{IndexSettings, IndexStats}; + +use super::IndexMeta; + +#[allow(clippy::large_enum_variant)] +#[derive(Debug)] +pub enum IndexMsg { + CreateIndex { + uuid: Uuid, + primary_key: Option, + ret: oneshot::Sender>, + }, + Update { + uuid: Uuid, + meta: Processing, + ret: oneshot::Sender>>, + }, + Search { + uuid: Uuid, + query: SearchQuery, + ret: oneshot::Sender>, + }, + Settings { + uuid: Uuid, + ret: oneshot::Sender>>, + }, + Documents { + uuid: Uuid, + attributes_to_retrieve: Option>, + offset: usize, + limit: usize, + ret: oneshot::Sender>>, + }, + Document { + uuid: Uuid, + attributes_to_retrieve: Option>, + doc_id: String, + ret: oneshot::Sender>, + }, + Delete { + uuid: Uuid, + ret: oneshot::Sender>, + }, + GetMeta { + uuid: Uuid, + ret: oneshot::Sender>, + }, + UpdateIndex { + uuid: Uuid, + index_settings: IndexSettings, + ret: oneshot::Sender>, + }, + Snapshot { + uuid: Uuid, + path: PathBuf, + ret: oneshot::Sender>, + }, + Dump { + uuid: Uuid, + path: PathBuf, + ret: oneshot::Sender>, + }, + GetStats { + uuid: Uuid, + ret: oneshot::Sender>, + }, +} + +impl IndexMsg { + pub async fn search( + sender: &mpsc::Sender, + uuid: Uuid, + query: SearchQuery, + ) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Search { + ret, + uuid, + query, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn update_index( + sender: &mpsc::Sender, + uuid: Uuid, + index_settings: IndexSettings, + ) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::UpdateIndex { + ret, + uuid, + index_settings, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn create_index( + sender: &mpsc::Sender, + uuid: Uuid, + primary_key: Option, + ) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::CreateIndex { + ret, + uuid, + primary_key, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn index_meta(sender: &mpsc::Sender, uuid: Uuid) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::GetMeta { ret, uuid }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn index_stats(sender: &mpsc::Sender, uuid: Uuid) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::GetStats { ret, uuid }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn settings(sender: &mpsc::Sender, uuid: Uuid) -> Result> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Settings { ret, uuid }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn documents( + sender: &mpsc::Sender, + uuid: Uuid, + offset: usize, + limit: usize, + attributes_to_retrieve: Option>, + ) -> Result> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Documents { + ret, + uuid, + attributes_to_retrieve, + offset, + limit, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn document( + sender: &mpsc::Sender, + uuid: Uuid, + attributes_to_retrieve: Option>, + doc_id: String, + ) -> Result { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Document { + ret, + uuid, + attributes_to_retrieve, + doc_id, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn update(sender: &mpsc::Sender, uuid: Uuid, meta: Processing) -> Result> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Update { + ret, + uuid, + meta, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn snapshot(sender: &mpsc::Sender, uuid: Uuid, path: PathBuf) -> Result<()> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Snapshot { + uuid, + path, + ret, + }; + sender.send(msg).await?; + rcv.await? + } + + pub async fn dump(sender: &mpsc::Sender, uuid: Uuid, path: PathBuf) -> Result<()> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Dump { + uuid, + ret, + path, + }; + sender.send(msg).await?; + rcv.await? + } +} diff --git a/meilisearch-lib/src/index_controller/index_actor/actor.rs b/meilisearch-lib/src/index_controller/indexes/mod.rs similarity index 70% rename from meilisearch-lib/src/index_controller/index_actor/actor.rs rename to meilisearch-lib/src/index_controller/indexes/mod.rs index 6e7d13760..bac492364 100644 --- a/meilisearch-lib/src/index_controller/index_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/indexes/mod.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::Arc; use async_stream::stream; @@ -8,22 +8,74 @@ use log::debug; use milli::update::UpdateBuilder; use tokio::task::spawn_blocking; use tokio::{fs, sync::mpsc}; -use uuid::Uuid; -use crate::index::{ - update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings, -}; -use crate::index_controller::{ - get_arc_ownership_blocking, IndexStats, -}; +use crate::index::update_handler::UpdateHandler; use crate::index_controller::updates::status::{Failed, Processed, Processing}; +use crate::index_controller::{get_arc_ownership_blocking, IndexStats}; use crate::options::IndexerOpts; -use super::error::{IndexActorError, Result}; -use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore}; - pub const CONCURRENT_INDEX_MSG: usize = 10; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +pub use message::IndexMsg; + +use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings}; +use error::Result; + +use self::error::IndexActorError; +use self::store::{IndexStore, MapIndexStore}; + +use super::IndexSettings; + +pub mod error; +mod message; +mod store; + +pub type IndexHandlerSender = mpsc::Sender; + +pub fn create_indexes_handler( + db_path: impl AsRef, + index_size: usize, + indexer_options: &IndexerOpts, +) -> anyhow::Result { + let (sender, receiver) = mpsc::channel(100); + let store = MapIndexStore::new(&db_path, index_size); + let actor = IndexActor::new(receiver, store, indexer_options)?; + + tokio::task::spawn(actor.run()); + + Ok(sender) +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "camelCase")] +pub struct IndexMeta { + created_at: DateTime, + pub updated_at: DateTime, + pub primary_key: Option, +} + +impl IndexMeta { + fn new(index: &Index) -> Result { + let txn = index.read_txn()?; + Self::new_txn(index, &txn) + } + + fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result { + let created_at = index.created_at(txn)?; + let updated_at = index.updated_at(txn)?; + let primary_key = index.primary_key(txn)?.map(String::from); + Ok(Self { + created_at, + updated_at, + primary_key, + }) + } +} + pub struct IndexActor { receiver: Option>, update_handler: Arc, @@ -31,15 +83,15 @@ pub struct IndexActor { } impl IndexActor -where S: IndexStore + Sync + Send, +where + S: IndexStore + Sync + Send, { pub fn new( receiver: mpsc::Receiver, store: S, options: &IndexerOpts, ) -> anyhow::Result { - let update_handler = UpdateHandler::new(options)?; - let update_handler = Arc::new(update_handler); + let update_handler = Arc::new(UpdateHandler::new(options)?); let receiver = Some(receiver); Ok(Self { @@ -82,11 +134,7 @@ where S: IndexStore + Sync + Send, } => { let _ = ret.send(self.handle_create_index(uuid, primary_key).await); } - Update { - ret, - meta, - uuid, - } => { + Update { ret, meta, uuid } => { let _ = ret.send(self.handle_update(uuid, meta).await); } Search { ret, query, uuid } => { @@ -350,3 +398,86 @@ where S: IndexStore + Sync + Send, .await? } } + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use super::*; + + #[async_trait::async_trait] + /// Useful for passing around an `Arc` in tests. + impl IndexActorHandle for Arc { + async fn create_index(&self, uuid: Uuid, primary_key: Option) -> Result { + self.as_ref().create_index(uuid, primary_key).await + } + + async fn update( + &self, + uuid: Uuid, + meta: Processing, + data: Option, + ) -> Result> { + self.as_ref().update(uuid, meta, data).await + } + + async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result { + self.as_ref().search(uuid, query).await + } + + async fn settings(&self, uuid: Uuid) -> Result> { + self.as_ref().settings(uuid).await + } + + async fn documents( + &self, + uuid: Uuid, + offset: usize, + limit: usize, + attributes_to_retrieve: Option>, + ) -> Result> { + self.as_ref() + .documents(uuid, offset, limit, attributes_to_retrieve) + .await + } + + async fn document( + &self, + uuid: Uuid, + doc_id: String, + attributes_to_retrieve: Option>, + ) -> Result { + self.as_ref() + .document(uuid, doc_id, attributes_to_retrieve) + .await + } + + async fn delete(&self, uuid: Uuid) -> Result<()> { + self.as_ref().delete(uuid).await + } + + async fn get_index_meta(&self, uuid: Uuid) -> Result { + self.as_ref().get_index_meta(uuid).await + } + + async fn update_index( + &self, + uuid: Uuid, + index_settings: IndexSettings, + ) -> Result { + self.as_ref().update_index(uuid, index_settings).await + } + + async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + self.as_ref().snapshot(uuid, path).await + } + + async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + self.as_ref().dump(uuid, path).await + } + + async fn get_index_stats(&self, uuid: Uuid) -> Result { + self.as_ref().get_index_stats(uuid).await + } + } +} diff --git a/meilisearch-lib/src/index_controller/index_actor/store.rs b/meilisearch-lib/src/index_controller/indexes/store.rs similarity index 100% rename from meilisearch-lib/src/index_controller/index_actor/store.rs rename to meilisearch-lib/src/index_controller/indexes/store.rs diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index da92eca20..bd3f4c07b 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -8,36 +8,38 @@ use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::Stream; use log::info; -use milli::FieldDistribution; use milli::update::IndexDocumentsMethod; +use milli::FieldDistribution; use serde::{Deserialize, Serialize}; use tokio::time::sleep; use uuid::Uuid; use dump_actor::DumpActorHandle; pub use dump_actor::{DumpInfo, DumpStatus}; -use index_actor::IndexActorHandle; use snapshot::load_snapshot; use uuid_resolver::error::UuidResolverError; -use crate::options::IndexerOpts; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; +use crate::options::IndexerOpts; use error::Result; use self::dump_actor::load_dump; -use self::updates::UpdateMsg; +use self::indexes::IndexMsg; use self::updates::status::UpdateStatus; +use self::updates::UpdateMsg; use self::uuid_resolver::UuidResolverMsg; mod dump_actor; pub mod error; -pub mod index_actor; +pub mod indexes; mod snapshot; +pub mod update_file_store; pub mod updates; mod uuid_resolver; -pub mod update_file_store; -pub type Payload = Box> + Send + Sync + 'static + Unpin>; +pub type Payload = Box< + dyn Stream> + Send + Sync + 'static + Unpin, +>; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] @@ -47,7 +49,7 @@ pub struct IndexMetadata { pub uid: String, name: String, #[serde(flatten)] - pub meta: index_actor::IndexMeta, + pub meta: indexes::IndexMeta, } #[derive(Clone, Debug)] @@ -72,16 +74,16 @@ pub struct IndexStats { #[derive(Clone)] pub struct IndexController { uuid_resolver: uuid_resolver::UuidResolverSender, - index_handle: index_actor::IndexActorHandleImpl, + index_handle: indexes::IndexHandlerSender, update_handle: updates::UpdateSender, dump_handle: dump_actor::DumpActorHandleImpl, } +#[derive(Debug)] pub enum DocumentAdditionFormat { Json, } - #[derive(Serialize, Debug)] #[serde(rename_all = "camelCase")] pub struct Stats { @@ -90,13 +92,16 @@ pub struct Stats { pub indexes: BTreeMap, } +#[derive(derivative::Derivative)] +#[derivative(Debug)] pub enum Update { DocumentAddition { + #[derivative(Debug="ignore")] payload: Payload, primary_key: Option, method: IndexDocumentsMethod, format: DocumentAdditionFormat, - } + }, } #[derive(Default, Debug)] @@ -112,9 +117,17 @@ pub struct IndexControllerBuilder { } impl IndexControllerBuilder { - pub fn build(self, db_path: impl AsRef, indexer_options: IndexerOpts) -> anyhow::Result { - let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?; - let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; + pub fn build( + self, + db_path: impl AsRef, + indexer_options: IndexerOpts, + ) -> anyhow::Result { + let index_size = self + .max_index_size + .ok_or_else(|| anyhow::anyhow!("Missing index size"))?; + let update_store_size = self + .max_index_size + .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; if let Some(ref path) = self.import_snapshot { info!("Loading from snapshot {:?}", path); @@ -137,18 +150,15 @@ impl IndexControllerBuilder { std::fs::create_dir_all(db_path.as_ref())?; let uuid_resolver = uuid_resolver::create_uuid_resolver(&db_path)?; - let index_handle = - index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?; + let index_handle = indexes::create_indexes_handler(&db_path, index_size, &indexer_options)?; #[allow(unreachable_code)] - let update_handle = updates::create_update_handler( - todo!(), - &db_path, - update_store_size, - )?; + let update_handle = updates::create_update_handler(index_handle.clone(), &db_path, update_store_size)?; let dump_handle = dump_actor::DumpActorHandleImpl::new( - &self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?, + &self + .dump_dst + .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?, uuid_resolver.clone(), update_handle.clone(), index_size, @@ -156,19 +166,19 @@ impl IndexControllerBuilder { )?; //if options.schedule_snapshot { - //let snapshot_service = SnapshotService::new( - //uuid_resolver.clone(), - //update_handle.clone(), - //Duration::from_secs(options.snapshot_interval_sec), - //options.snapshot_dir.clone(), - //options - //.db_path - //.file_name() - //.map(|n| n.to_owned().into_string().expect("invalid path")) - //.unwrap_or_else(|| String::from("data.ms")), - //); + //let snapshot_service = SnapshotService::new( + //uuid_resolver.clone(), + //update_handle.clone(), + //Duration::from_secs(options.snapshot_interval_sec), + //options.snapshot_dir.clone(), + //options + //.db_path + //.file_name() + //.map(|n| n.to_owned().into_string().expect("invalid path")) + //.unwrap_or_else(|| String::from("data.ms")), + //); - //tokio::task::spawn(snapshot_service.run()); + //tokio::task::spawn(snapshot_service.run()); //} Ok(IndexController { @@ -197,7 +207,10 @@ impl IndexControllerBuilder { } /// Set the index controller builder's ignore snapshot if db exists. - pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self { + pub fn set_ignore_snapshot_if_db_exists( + &mut self, + ignore_snapshot_if_db_exists: bool, + ) -> &mut Self { self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists; self } @@ -238,12 +251,12 @@ impl IndexController { Ok(uuid) => { let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; Ok(update_result) - }, + } Err(UuidResolverError::UnexistingIndex(name)) => { let uuid = Uuid::new_v4(); let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; // ignore if index creation fails now, since it may already have been created - let _ = self.index_handle.create_index(uuid, None).await; + let _ = IndexMsg::create_index(&self.index_handle, uuid, None).await?; UuidResolverMsg::insert(&self.uuid_resolver, uuid, name).await?; Ok(update_result) @@ -253,128 +266,128 @@ impl IndexController { } //pub async fn add_documents( - //&self, - //uid: String, - //method: milli::update::IndexDocumentsMethod, - //payload: Payload, - //primary_key: Option, + //&self, + //uid: String, + //method: milli::update::IndexDocumentsMethod, + //payload: Payload, + //primary_key: Option, //) -> Result { - //let perform_update = |uuid| async move { - //let meta = UpdateMeta::DocumentsAddition { - //method, - //primary_key, - //}; - //let (sender, receiver) = mpsc::channel(10); + //let perform_update = |uuid| async move { + //let meta = UpdateMeta::DocumentsAddition { + //method, + //primary_key, + //}; + //let (sender, receiver) = mpsc::channel(10); - //// It is necessary to spawn a local task to send the payload to the update handle to - //// prevent dead_locking between the update_handle::update that waits for the update to be - //// registered and the update_actor that waits for the the payload to be sent to it. - //tokio::task::spawn_local(async move { - //payload - //.for_each(|r| async { - //let _ = sender.send(r).await; - //}) - //.await - //}); + //// It is necessary to spawn a local task to send the payload to the update handle to + //// prevent dead_locking between the update_handle::update that waits for the update to be + //// registered and the update_actor that waits for the the payload to be sent to it. + //tokio::task::spawn_local(async move { + //payload + //.for_each(|r| async { + //let _ = sender.send(r).await; + //}) + //.await + //}); - //// This must be done *AFTER* spawning the task. - //self.update_handle.update(meta, receiver, uuid).await - //}; + //// This must be done *AFTER* spawning the task. + //self.update_handle.update(meta, receiver, uuid).await + //}; - //match self.uuid_resolver.get(uid).await { - //Ok(uuid) => Ok(perform_update(uuid).await?), - //Err(UuidResolverError::UnexistingIndex(name)) => { - //let uuid = Uuid::new_v4(); - //let status = perform_update(uuid).await?; - //// ignore if index creation fails now, since it may already have been created - //let _ = self.index_handle.create_index(uuid, None).await; - //self.uuid_resolver.insert(name, uuid).await?; - //Ok(status) - //} - //Err(e) => Err(e.into()), - //} + //match self.uuid_resolver.get(uid).await { + //Ok(uuid) => Ok(perform_update(uuid).await?), + //Err(UuidResolverError::UnexistingIndex(name)) => { + //let uuid = Uuid::new_v4(); + //let status = perform_update(uuid).await?; + //// ignore if index creation fails now, since it may already have been created + //let _ = self.index_handle.create_index(uuid, None).await; + //self.uuid_resolver.insert(name, uuid).await?; + //Ok(status) + //} + //Err(e) => Err(e.into()), + //} //} //pub async fn clear_documents(&self, uid: String) -> Result { - //let uuid = self.uuid_resolver.get(uid).await?; - //let meta = UpdateMeta::ClearDocuments; - //let (_, receiver) = mpsc::channel(1); - //let status = self.update_handle.update(meta, receiver, uuid).await?; - //Ok(status) + //let uuid = self.uuid_resolver.get(uid).await?; + //let meta = UpdateMeta::ClearDocuments; + //let (_, receiver) = mpsc::channel(1); + //let status = self.update_handle.update(meta, receiver, uuid).await?; + //Ok(status) //} //pub async fn delete_documents( - //&self, - //uid: String, - //documents: Vec, + //&self, + //uid: String, + //documents: Vec, //) -> Result { - //let uuid = self.uuid_resolver.get(uid).await?; - //let meta = UpdateMeta::DeleteDocuments { ids: documents }; - //let (_, receiver) = mpsc::channel(1); - //let status = self.update_handle.update(meta, receiver, uuid).await?; - //Ok(status) + //let uuid = self.uuid_resolver.get(uid).await?; + //let meta = UpdateMeta::DeleteDocuments { ids: documents }; + //let (_, receiver) = mpsc::channel(1); + //let status = self.update_handle.update(meta, receiver, uuid).await?; + //Ok(status) //} //pub async fn update_settings( - //&self, - //uid: String, - //settings: Settings, - //create: bool, + //&self, + //uid: String, + //settings: Settings, + //create: bool, //) -> Result { - //let perform_udpate = |uuid| async move { - //let meta = UpdateMeta::Settings(settings.into_unchecked()); - //// Nothing so send, drop the sender right away, as not to block the update actor. - //let (_, receiver) = mpsc::channel(1); - //self.update_handle.update(meta, receiver, uuid).await - //}; + //let perform_udpate = |uuid| async move { + //let meta = UpdateMeta::Settings(settings.into_unchecked()); + //// Nothing so send, drop the sender right away, as not to block the update actor. + //let (_, receiver) = mpsc::channel(1); + //self.update_handle.update(meta, receiver, uuid).await + //}; - //match self.uuid_resolver.get(uid).await { - //Ok(uuid) => Ok(perform_udpate(uuid).await?), - //Err(UuidResolverError::UnexistingIndex(name)) if create => { - //let uuid = Uuid::new_v4(); - //let status = perform_udpate(uuid).await?; - //// ignore if index creation fails now, since it may already have been created - //let _ = self.index_handle.create_index(uuid, None).await; - //self.uuid_resolver.insert(name, uuid).await?; - //Ok(status) - //} - //Err(e) => Err(e.into()), - //} + //match self.uuid_resolver.get(uid).await { + //Ok(uuid) => Ok(perform_udpate(uuid).await?), + //Err(UuidResolverError::UnexistingIndex(name)) if create => { + //let uuid = Uuid::new_v4(); + //let status = perform_udpate(uuid).await?; + //// ignore if index creation fails now, since it may already have been created + //let _ = self.index_handle.create_index(uuid, None).await; + //self.uuid_resolver.insert(name, uuid).await?; + //Ok(status) + //} + //Err(e) => Err(e.into()), + //} //} //pub async fn create_index(&self, index_settings: IndexSettings) -> Result { - //let IndexSettings { uid, primary_key } = index_settings; - //let uid = uid.ok_or(IndexControllerError::MissingUid)?; - //let uuid = Uuid::new_v4(); - //let meta = self.index_handle.create_index(uuid, primary_key).await?; - //self.uuid_resolver.insert(uid.clone(), uuid).await?; - //let meta = IndexMetadata { - //uuid, - //name: uid.clone(), - //uid, - //meta, - //}; + //let IndexSettings { uid, primary_key } = index_settings; + //let uid = uid.ok_or(IndexControllerError::MissingUid)?; + //let uuid = Uuid::new_v4(); + //let meta = self.index_handle.create_index(uuid, primary_key).await?; + //self.uuid_resolver.insert(uid.clone(), uuid).await?; + //let meta = IndexMetadata { + //uuid, + //name: uid.clone(), + //uid, + //meta, + //}; - //Ok(meta) + //Ok(meta) //} //pub async fn delete_index(&self, uid: String) -> Result<()> { - //let uuid = self.uuid_resolver.delete(uid).await?; + //let uuid = self.uuid_resolver.delete(uid).await?; - //// We remove the index from the resolver synchronously, and effectively perform the index - //// deletion as a background task. - //let update_handle = self.update_handle.clone(); - //let index_handle = self.index_handle.clone(); - //tokio::spawn(async move { - //if let Err(e) = update_handle.delete(uuid).await { - //error!("Error while deleting index: {}", e); - //} - //if let Err(e) = index_handle.delete(uuid).await { - //error!("Error while deleting index: {}", e); - //} - //}); + //// We remove the index from the resolver synchronously, and effectively perform the index + //// deletion as a background task. + //let update_handle = self.update_handle.clone(); + //let index_handle = self.index_handle.clone(); + //tokio::spawn(async move { + //if let Err(e) = update_handle.delete(uuid).await { + //error!("Error while deleting index: {}", e); + //} + //if let Err(e) = index_handle.delete(uuid).await { + //error!("Error while deleting index: {}", e); + //} + //}); - //Ok(()) + //Ok(()) //} pub async fn update_status(&self, uid: String, id: u64) -> Result { @@ -393,7 +406,7 @@ impl IndexController { let uuids = UuidResolverMsg::list(&self.uuid_resolver).await?; let mut ret = Vec::new(); for (uid, uuid) in uuids { - let meta = self.index_handle.get_index_meta(uuid).await?; + let meta = IndexMsg::index_meta(&self.index_handle, uuid).await?; let meta = IndexMetadata { uuid, name: uid.clone(), @@ -408,7 +421,7 @@ impl IndexController { pub async fn settings(&self, uid: String) -> Result> { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let settings = self.index_handle.settings(uuid).await?; + let settings = IndexMsg::settings(&self.index_handle, uuid).await?; Ok(settings) } @@ -420,10 +433,14 @@ impl IndexController { attributes_to_retrieve: Option>, ) -> Result> { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let documents = self - .index_handle - .documents(uuid, offset, limit, attributes_to_retrieve) - .await?; + let documents = IndexMsg::documents( + &self.index_handle, + uuid, + offset, + limit, + attributes_to_retrieve, + ) + .await?; Ok(documents) } @@ -434,10 +451,7 @@ impl IndexController { attributes_to_retrieve: Option>, ) -> Result { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let document = self - .index_handle - .document(uuid, doc_id, attributes_to_retrieve) - .await?; + let document = IndexMsg::document(&self.index_handle, uuid, attributes_to_retrieve, doc_id).await?; Ok(document) } @@ -451,7 +465,7 @@ impl IndexController { } let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?; - let meta = self.index_handle.update_index(uuid, index_settings).await?; + let meta = IndexMsg::update_index(&self.index_handle, uuid, index_settings).await?; let meta = IndexMetadata { uuid, name: uid.clone(), @@ -463,13 +477,13 @@ impl IndexController { pub async fn search(&self, uid: String, query: SearchQuery) -> Result { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let result = self.index_handle.search(uuid, query).await?; + let result = IndexMsg::search(&self.index_handle, uuid, query).await?; Ok(result) } pub async fn get_index(&self, uid: String) -> Result { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?; - let meta = self.index_handle.get_index_meta(uuid).await?; + let meta = IndexMsg::index_meta(&self.index_handle, uuid).await?; let meta = IndexMetadata { uuid, name: uid.clone(), @@ -487,7 +501,7 @@ impl IndexController { pub async fn get_index_stats(&self, uid: String) -> Result { let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let update_infos = UpdateMsg::get_info(&self.update_handle).await?; - let mut stats = self.index_handle.get_index_stats(uuid).await?; + let mut stats = IndexMsg::index_stats(&self.index_handle, uuid).await?; // Check if the currently indexing update is from out index. stats.is_indexing = Some(Some(uuid) == update_infos.processing); Ok(stats) @@ -500,7 +514,7 @@ impl IndexController { let mut indexes = BTreeMap::new(); for index in self.list_indexes().await? { - let mut index_stats = self.index_handle.get_index_stats(index.uuid).await?; + let mut index_stats = IndexMsg::index_stats(&self.index_handle, index.uuid).await?; database_size += index_stats.size; last_update = last_update.map_or(Some(index.meta.updated_at), |last| { diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs index 0a457c977..858631f69 100644 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ b/meilisearch-lib/src/index_controller/updates/error.rs @@ -1,8 +1,9 @@ +use std::fmt; use std::error::Error; use meilisearch_error::{Code, ErrorCode}; -use crate::index_controller::index_actor::error::IndexActorError; +use crate::index_controller::indexes::error::IndexActorError; pub type Result = std::result::Result; @@ -25,15 +26,17 @@ pub enum UpdateActorError { PayloadError(#[from] actix_web::error::PayloadError), } -impl From> for UpdateActorError { - fn from(_: tokio::sync::mpsc::error::SendError) -> Self { - Self::FatalUpdateStoreError +impl From> for UpdateActorError +where T: Sync + Send + 'static + fmt::Debug +{ + fn from(other: tokio::sync::mpsc::error::SendError) -> Self { + Self::Internal(Box::new(other)) } } impl From for UpdateActorError { - fn from(_: tokio::sync::oneshot::error::RecvError) -> Self { - Self::FatalUpdateStoreError + fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { + Self::Internal(Box::new(other)) } } diff --git a/meilisearch-lib/src/index_controller/updates/message.rs b/meilisearch-lib/src/index_controller/updates/message.rs index fe6e1360b..09dc7443a 100644 --- a/meilisearch-lib/src/index_controller/updates/message.rs +++ b/meilisearch-lib/src/index_controller/updates/message.rs @@ -7,6 +7,7 @@ use uuid::Uuid; use super::error::Result; use super::{Update, UpdateStatus, UpdateStoreInfo}; +#[derive(Debug)] pub enum UpdateMsg { Update { uuid: Uuid, diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index f281250a6..750ca7c46 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -27,19 +27,19 @@ use self::store::{UpdateStore, UpdateStoreInfo}; use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; +use super::indexes::IndexHandlerSender; use super::{DocumentAdditionFormat, Payload, Update}; pub type UpdateSender = mpsc::Sender; -type IndexSender = mpsc::Sender<()>; pub fn create_update_handler( - index_sender: IndexSender, + index_sender: IndexHandlerSender, db_path: impl AsRef, update_store_size: usize, ) -> anyhow::Result { let path = db_path.as_ref().to_owned(); let (sender, receiver) = mpsc::channel(100); - let actor = UpdateHandler::new(update_store_size, receiver, path, index_sender)?; + let actor = UpdateLoop::new(update_store_size, receiver, path, index_sender)?; tokio::task::spawn_local(actor.run()); @@ -96,20 +96,20 @@ impl> + Unpin> io::Rea } } -pub struct UpdateHandler { +pub struct UpdateLoop { store: Arc, inbox: Option>, update_file_store: UpdateFileStore, - index_handle: IndexSender, + index_handle: IndexHandlerSender, must_exit: Arc, } -impl UpdateHandler { +impl UpdateLoop { pub fn new( update_db_size: usize, inbox: mpsc::Receiver, path: impl AsRef, - index_handle: IndexSender, + index_handle: IndexHandlerSender, ) -> anyhow::Result { let path = path.as_ref().to_owned(); std::fs::create_dir_all(&path)?; diff --git a/meilisearch-lib/src/index_controller/updates/store/dump.rs b/meilisearch-lib/src/index_controller/updates/store/dump.rs index ccb09a309..689678cc4 100644 --- a/meilisearch-lib/src/index_controller/updates/store/dump.rs +++ b/meilisearch-lib/src/index_controller/updates/store/dump.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::{Result, State, UpdateStore}; -use crate::index_controller::{updates::{IndexSender, status::UpdateStatus}}; +use crate::index_controller::{indexes::{IndexHandlerSender, IndexMsg}, updates::{status::UpdateStatus}}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -23,7 +23,7 @@ impl UpdateStore { &self, uuids: &HashSet, path: PathBuf, - handle: IndexSender, + handle: IndexHandlerSender, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Dumping); @@ -172,12 +172,11 @@ impl UpdateStore { async fn dump_indexes( uuids: &HashSet, - handle: IndexSender, + handle: IndexHandlerSender, path: impl AsRef, ) -> Result<()> { for uuid in uuids { - //handle.dump(*uuid, path.as_ref().to_owned()).await?; - todo!() + IndexMsg::dump(&handle, *uuid, path.as_ref().to_owned()).await?; } Ok(()) diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index be8c5f859..25eb840c9 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -30,14 +30,16 @@ use super::RegisterUpdate; use super::error::Result; use super::status::{Enqueued, Processing}; use crate::EnvSizer; +use crate::index_controller::indexes::{CONCURRENT_INDEX_MSG, IndexHandlerSender, IndexMsg}; use crate::index_controller::update_files_path; -use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*}; +use crate::index_controller::updates::*; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; const UPDATE_DIR: &str = "update_files"; +#[derive(Debug)] pub struct UpdateStoreInfo { /// Size of the update store in bytes. pub size: u64, @@ -146,7 +148,7 @@ impl UpdateStore { pub fn open( options: EnvOpenOptions, path: impl AsRef, - index_handle: IndexSender, + index_handle: IndexHandlerSender, must_exit: Arc, ) -> anyhow::Result> { let (update_store, mut notification_receiver) = Self::new(options, path)?; @@ -284,7 +286,7 @@ impl UpdateStore { /// Executes the user provided function on the next pending update (the one with the lowest id). /// This is asynchronous as it let the user process the update with a read-only txn and /// only writing the result meta to the processed-meta store *after* it has been processed. - fn process_pending_update(&self, index_handle: IndexSender) -> Result> { + fn process_pending_update(&self, index_handle: IndexHandlerSender) -> Result> { // Create a read transaction to be able to retrieve the pending update in order. let rtxn = self.env.read_txn()?; let first_meta = self.pending_queue.first(&rtxn)?; @@ -314,7 +316,7 @@ impl UpdateStore { fn perform_update( &self, processing: Processing, - index_handle: IndexSender, + index_handle: IndexHandlerSender, index_uuid: Uuid, global_id: u64, ) -> Result> { @@ -322,7 +324,7 @@ impl UpdateStore { let handle = Handle::current(); let update_id = processing.id(); let result = - match handle.block_on(/*index_handle.update(index_uuid, processing.clone())*/ todo!()) { + match handle.block_on(IndexMsg::update(&index_handle, index_uuid, processing.clone())) { Ok(result) => result, Err(e) => Err(processing.fail(e)), }; @@ -484,7 +486,7 @@ impl UpdateStore { &self, uuids: &HashSet, path: impl AsRef, - handle: IndexSender, + handle: IndexHandlerSender, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Snapshoting); @@ -525,7 +527,7 @@ impl UpdateStore { // Perform the snapshot of each index concurently. Only a third of the capabilities of // the index actor at a time not to put too much pressure on the index actor let mut stream = futures::stream::iter(uuids.iter()) - .map(move |uuid| todo!() /*handle.snapshot(*uuid, path.clone())*/) + .map(move |uuid| IndexMsg::snapshot(handle,*uuid, path.clone())) .buffer_unordered(CONCURRENT_INDEX_MSG / 3); Handle::current().block_on(async { From 42a6260b656a3c045579f59d40b80e1be5600f2e Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 24 Sep 2021 11:53:11 +0200 Subject: [PATCH 07/37] introduce index resolver --- meilisearch-lib/src/index/error.rs | 3 + meilisearch-lib/src/index/mod.rs | 65 ++++- meilisearch-lib/src/index/update_handler.rs | 2 +- meilisearch-lib/src/index/updates.rs | 31 ++- .../src/index_controller/dump_actor/actor.rs | 10 +- .../src/index_controller/dump_actor/error.rs | 12 +- .../dump_actor/handle_impl.rs | 7 +- .../index_controller/dump_actor/loaders/v1.rs | 2 +- .../index_controller/dump_actor/loaders/v2.rs | 2 +- .../src/index_controller/dump_actor/mod.rs | 10 +- meilisearch-lib/src/index_controller/error.rs | 19 +- .../index_controller/index_resolver/error.rs | 63 +++++ .../index_resolver/index_store.rs | 116 +++++++++ .../index_resolver/message.rs | 37 +++ .../index_controller/index_resolver/mod.rs | 117 +++++++++ .../index_resolver/uuid_store.rs | 226 ++++++++++++++++++ .../src/index_controller/indexes/mod.rs | 6 +- .../src/index_controller/indexes/store.rs | 8 +- meilisearch-lib/src/index_controller/mod.rs | 150 ++++++------ .../src/index_controller/updates/error.rs | 30 ++- .../src/index_controller/updates/mod.rs | 33 +-- .../index_controller/updates/store/dump.rs | 26 +- .../src/index_controller/updates/store/mod.rs | 51 ++-- 23 files changed, 833 insertions(+), 193 deletions(-) create mode 100644 meilisearch-lib/src/index_controller/index_resolver/error.rs create mode 100644 meilisearch-lib/src/index_controller/index_resolver/index_store.rs create mode 100644 meilisearch-lib/src/index_controller/index_resolver/message.rs create mode 100644 meilisearch-lib/src/index_controller/index_resolver/mod.rs create mode 100644 meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs diff --git a/meilisearch-lib/src/index/error.rs b/meilisearch-lib/src/index/error.rs index cfae11a1f..5899b9356 100644 --- a/meilisearch-lib/src/index/error.rs +++ b/meilisearch-lib/src/index/error.rs @@ -17,6 +17,8 @@ pub enum IndexError { Facet(#[from] FacetError), #[error("{0}")] Milli(#[from] milli::Error), + #[error("A primary key is already present. It's impossible to update it")] + ExistingPrimaryKey, } internal_error!( @@ -33,6 +35,7 @@ impl ErrorCode for IndexError { IndexError::DocumentNotFound(_) => Code::DocumentNotFound, IndexError::Facet(e) => e.error_code(), IndexError::Milli(e) => MilliError(e).error_code(), + IndexError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent, } } } diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index c05e337e2..911a22464 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -5,19 +5,23 @@ use std::ops::Deref; use std::path::Path; use std::sync::Arc; +use chrono::{DateTime, Utc}; use heed::{EnvOpenOptions, RoTxn}; use milli::update::Setting; -use milli::{obkv_to_json, FieldId}; +use milli::{FieldDistribution, FieldId, obkv_to_json}; use serde_json::{Map, Value}; +use serde::{Serialize, Deserialize}; use error::Result; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Checked, Facets, Settings, Unchecked}; +use uuid::Uuid; use crate::EnvSizer; use crate::index_controller::update_file_store::UpdateFileStore; use self::error::IndexError; +use self::update_handler::UpdateHandler; pub mod error; pub mod update_handler; @@ -28,10 +32,51 @@ mod updates; pub type Document = Map; +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "camelCase")] +pub struct IndexMeta { + created_at: DateTime, + pub updated_at: DateTime, + pub primary_key: Option, +} + +#[derive(Serialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct IndexStats { + #[serde(skip)] + pub size: u64, + pub number_of_documents: u64, + /// Whether the current index is performing an update. It is initially `None` when the + /// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is + /// later set to either true or false, we we retrieve the information from the `UpdateStore` + pub is_indexing: Option, + pub field_distribution: FieldDistribution, +} + +impl IndexMeta { + pub fn new(index: &Index) -> Result { + let txn = index.read_txn()?; + Self::new_txn(index, &txn) + } + + fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result { + let created_at = index.created_at(txn)?; + let updated_at = index.updated_at(txn)?; + let primary_key = index.primary_key(txn)?.map(String::from); + Ok(Self { + created_at, + updated_at, + primary_key, + }) + } +} + #[derive(Clone)] pub struct Index { + pub uuid: Uuid, pub inner: Arc, update_file_store: Arc, + update_handler: Arc, } impl Deref for Index { @@ -43,14 +88,28 @@ impl Deref for Index { } impl Index { - pub fn open(path: impl AsRef, size: usize, update_file_store: Arc) -> Result { + pub fn open(path: impl AsRef, size: usize, update_file_store: Arc, uuid: Uuid, update_handler: Arc) -> Result { create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let inner = Arc::new(milli::Index::new(options, &path)?); - Ok(Index { inner, update_file_store }) + Ok(Index { inner, update_file_store, uuid, update_handler }) } + pub fn stats(&self) -> Result { + let rtxn = self.read_txn()?; + + Ok(IndexStats { + size: self.size(), + number_of_documents: self.number_of_documents(&rtxn)?, + is_indexing: None, + field_distribution: self.field_distribution(&rtxn)?, + }) + } + + pub fn meta(&self) -> Result { + IndexMeta::new(self) + } pub fn settings(&self) -> Result> { let txn = self.read_txn()?; self.settings_txn(&txn) diff --git a/meilisearch-lib/src/index/update_handler.rs b/meilisearch-lib/src/index/update_handler.rs index 95ae2f556..6969b4567 100644 --- a/meilisearch-lib/src/index/update_handler.rs +++ b/meilisearch-lib/src/index/update_handler.rs @@ -52,7 +52,7 @@ impl UpdateHandler { pub fn handle_update( &self, - index: Index, + index: &Index, meta: Processing, ) -> Result { let update_id = meta.id(); diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index c83862f9b..20cf6b2ec 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -8,10 +8,10 @@ use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use serde::{Deserialize, Serialize, Serializer}; use uuid::Uuid; -use crate::index_controller::updates::status::UpdateResult; +use crate::index_controller::updates::status::{Failed, Processed, Processing, UpdateResult}; -use super::Index; -use super::error::Result; +use super::{Index, IndexMeta}; +use super::error::{IndexError, Result}; fn serialize_with_wildcard( field: &Setting>, @@ -163,6 +163,31 @@ pub struct Facets { } impl Index { + pub fn handle_update(&self, update: Processing) -> std::result::Result { + self.update_handler.handle_update(self, update) + } + + pub fn update_primary_key(&self, primary_key: Option) -> Result { + match primary_key { + Some(primary_key) => { + let mut txn = self.write_txn()?; + if self.primary_key(&txn)?.is_some() { + return Err(IndexError::ExistingPrimaryKey); + } + let mut builder = UpdateBuilder::new(0).settings(&mut txn, self); + builder.set_primary_key(primary_key); + builder.execute(|_, _| ())?; + let meta = IndexMeta::new_txn(self, &txn)?; + txn.commit()?; + Ok(meta) + } + None => { + let meta = IndexMeta::new(self)?; + Ok(meta) + } + } + } + pub fn update_documents( &self, method: IndexDocumentsMethod, diff --git a/meilisearch-lib/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/index_controller/dump_actor/actor.rs index 881f3e5b8..bfde3896c 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/actor.rs @@ -10,14 +10,14 @@ use tokio::sync::{mpsc, oneshot, RwLock}; use super::error::{DumpActorError, Result}; use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask}; -use crate::index_controller::uuid_resolver::UuidResolverSender; +use crate::index_controller::index_resolver::HardStateIndexResolver; use crate::index_controller::updates::UpdateSender; pub const CONCURRENT_DUMP_MSG: usize = 10; pub struct DumpActor { inbox: Option>, - uuid_resolver: UuidResolverSender, + index_resolver: Arc, update: UpdateSender, dump_path: PathBuf, lock: Arc>, @@ -34,7 +34,7 @@ fn generate_uid() -> String { impl DumpActor { pub fn new( inbox: mpsc::Receiver, - uuid_resolver: UuidResolverSender, + index_resolver: Arc, update: UpdateSender, dump_path: impl AsRef, index_db_size: usize, @@ -44,7 +44,7 @@ impl DumpActor { let lock = Arc::new(Mutex::new(())); Self { inbox: Some(inbox), - uuid_resolver, + index_resolver, update, dump_path: dump_path.as_ref().into(), dump_infos, @@ -113,7 +113,7 @@ impl DumpActor { let task = DumpTask { path: self.dump_path.clone(), - uuid_resolver: self.uuid_resolver.clone(), + index_resolver: self.index_resolver.clone(), update_handle: self.update.clone(), uid: uid.clone(), update_db_size: self.update_db_size, diff --git a/meilisearch-lib/src/index_controller/dump_actor/error.rs b/meilisearch-lib/src/index_controller/dump_actor/error.rs index eb6f08c00..9831f3931 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/error.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/error.rs @@ -1,7 +1,7 @@ use meilisearch_error::{Code, ErrorCode}; -use crate::index_controller::updates::error::UpdateActorError; -use crate::index_controller::uuid_resolver::error::UuidResolverError; +use crate::index_controller::index_resolver::error::IndexResolverError; +use crate::index_controller::updates::error::UpdateLoopError; pub type Result = std::result::Result; @@ -14,9 +14,9 @@ pub enum DumpActorError { #[error("Internal error: {0}")] Internal(Box), #[error("{0}")] - UuidResolver(#[from] UuidResolverError), + IndexResolver(#[from] IndexResolverError), #[error("{0}")] - UpdateActor(#[from] UpdateActorError), + UpdateLoop(#[from] UpdateLoopError), } macro_rules! internal_error { @@ -45,8 +45,8 @@ impl ErrorCode for DumpActorError { DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress, DumpActorError::DumpDoesNotExist(_) => Code::NotFound, DumpActorError::Internal(_) => Code::Internal, - DumpActorError::UuidResolver(e) => e.error_code(), - DumpActorError::UpdateActor(e) => e.error_code(), + DumpActorError::IndexResolver(e) => e.error_code(), + DumpActorError::UpdateLoop(e) => e.error_code(), } } } diff --git a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs index a629ff753..5acee2f81 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs @@ -1,8 +1,9 @@ use std::path::Path; +use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; -use crate::index_controller::uuid_resolver::UuidResolverSender; +use crate::index_controller::index_resolver::HardStateIndexResolver; use super::error::Result; use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg}; @@ -32,7 +33,7 @@ impl DumpActorHandle for DumpActorHandleImpl { impl DumpActorHandleImpl { pub fn new( path: impl AsRef, - uuid_resolver: UuidResolverSender, + index_resolver: Arc, update: crate::index_controller::updates::UpdateSender, index_db_size: usize, update_db_size: usize, @@ -40,7 +41,7 @@ impl DumpActorHandleImpl { let (sender, receiver) = mpsc::channel(10); let actor = DumpActor::new( receiver, - uuid_resolver, + index_resolver, update, path, index_db_size, diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index b489b2107..1add2709b 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -7,7 +7,7 @@ use milli::update::Setting; use serde::{Deserialize, Deserializer, Serialize}; use uuid::Uuid; -use crate::index_controller::uuid_resolver::store::HeedUuidStore; +use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; use crate::index_controller::{self, IndexMetadata}; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::{ diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs index c50e8a722..94b7321ae 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs @@ -5,8 +5,8 @@ use log::info; use serde::{Deserialize, Serialize}; use crate::index::Index; +use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; use crate::index_controller::updates::store::UpdateStore; -use crate::index_controller::{uuid_resolver::store::HeedUuidStore}; use crate::options::IndexerOpts; #[derive(Serialize, Deserialize, Debug)] diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 7db682e98..b7c61f568 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -1,5 +1,6 @@ use std::fs::File; use std::path::{Path, PathBuf}; +use std::sync::Arc; use anyhow::Context; use chrono::{DateTime, Utc}; @@ -16,11 +17,10 @@ pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; +use super::index_resolver::HardStateIndexResolver; use super::updates::UpdateSender; -use super::uuid_resolver::UuidResolverSender; use crate::index_controller::dump_actor::error::DumpActorError; use crate::index_controller::updates::UpdateMsg; -use crate::index_controller::uuid_resolver::UuidResolverMsg; use crate::options::IndexerOpts; use error::Result; @@ -154,7 +154,7 @@ pub fn load_dump( struct DumpTask { path: PathBuf, - uuid_resolver: UuidResolverSender, + index_resolver: Arc, update_handle: UpdateSender, uid: String, update_db_size: usize, @@ -177,9 +177,9 @@ impl DumpTask { let mut meta_file = File::create(&meta_path)?; serde_json::to_writer(&mut meta_file, &meta)?; - let uuids = UuidResolverMsg::dump(&self.uuid_resolver, temp_dump_path.clone()).await?; + let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?; - UpdateMsg::dump(&self.update_handle, uuids, temp_dump_path.clone()).await?; + UpdateMsg::dump(&self.update_handle, uuids.into_iter().collect(), temp_dump_path.clone()).await?; let dump_path = tokio::task::spawn_blocking(move || -> Result { let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs index 8c60e9103..417bda01b 100644 --- a/meilisearch-lib/src/index_controller/error.rs +++ b/meilisearch-lib/src/index_controller/error.rs @@ -2,13 +2,13 @@ use std::error::Error; use meilisearch_error::Code; use meilisearch_error::ErrorCode; +use tokio::task::JoinError; use crate::index::error::IndexError; use super::dump_actor::error::DumpActorError; -use super::indexes::error::IndexActorError; -use super::updates::error::UpdateActorError; -use super::uuid_resolver::error::UuidResolverError; +use super::index_resolver::error::IndexResolverError; +use super::updates::error::UpdateLoopError; pub type Result = std::result::Result; @@ -17,11 +17,9 @@ pub enum IndexControllerError { #[error("Index creation must have an uid")] MissingUid, #[error("{0}")] - Uuid(#[from] UuidResolverError), + IndexResolver(#[from] IndexResolverError), #[error("{0}")] - IndexActor(#[from] IndexActorError), - #[error("{0}")] - UpdateActor(#[from] UpdateActorError), + UpdateLoop(#[from] UpdateLoopError), #[error("{0}")] DumpActor(#[from] DumpActorError), #[error("{0}")] @@ -30,13 +28,14 @@ pub enum IndexControllerError { Internal(Box), } +internal_error!(IndexControllerError: JoinError); + impl ErrorCode for IndexControllerError { fn error_code(&self) -> Code { match self { IndexControllerError::MissingUid => Code::BadRequest, - IndexControllerError::Uuid(e) => e.error_code(), - IndexControllerError::IndexActor(e) => e.error_code(), - IndexControllerError::UpdateActor(e) => e.error_code(), + IndexControllerError::IndexResolver(e) => e.error_code(), + IndexControllerError::UpdateLoop(e) => e.error_code(), IndexControllerError::DumpActor(e) => e.error_code(), IndexControllerError::IndexError(e) => e.error_code(), IndexControllerError::Internal(_) => Code::Internal, diff --git a/meilisearch-lib/src/index_controller/index_resolver/error.rs b/meilisearch-lib/src/index_controller/index_resolver/error.rs new file mode 100644 index 000000000..af61a99de --- /dev/null +++ b/meilisearch-lib/src/index_controller/index_resolver/error.rs @@ -0,0 +1,63 @@ +use std::fmt; + +use meilisearch_error::{Code, ErrorCode}; +use tokio::sync::mpsc::error::SendError as MpscSendError; +use tokio::sync::oneshot::error::RecvError as OneshotRecvError; + +use crate::{error::MilliError, index::error::IndexError}; + +pub type Result = std::result::Result; + +#[derive(thiserror::Error, Debug)] +pub enum IndexResolverError { + #[error("{0}")] + IndexError(#[from] IndexError), + #[error("Index already exists")] + IndexAlreadyExists, + #[error("Index {0} not found")] + UnexistingIndex(String), + #[error("A primary key is already present. It's impossible to update it")] + ExistingPrimaryKey, + #[error("Internal Error: {0}")] + Internal(Box), + #[error("{0}")] + Milli(#[from] milli::Error), + #[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")] + BadlyFormatted(String), +} + +impl From> for IndexResolverError +where T: Send + Sync + 'static + fmt::Debug +{ + fn from(other: tokio::sync::mpsc::error::SendError) -> Self { + Self::Internal(Box::new(other)) + } +} + +impl From for IndexResolverError { + fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { + Self::Internal(Box::new(other)) + } +} + +internal_error!( + IndexResolverError: heed::Error, + uuid::Error, + std::io::Error, + tokio::task::JoinError, + serde_json::Error +); + +impl ErrorCode for IndexResolverError { + fn error_code(&self) -> Code { + match self { + IndexResolverError::IndexError(e) => e.error_code(), + IndexResolverError::IndexAlreadyExists => Code::IndexAlreadyExists, + IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound, + IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent, + IndexResolverError::Internal(_) => Code::Internal, + IndexResolverError::Milli(e) => MilliError(e).error_code(), + IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid, + } + } +} diff --git a/meilisearch-lib/src/index_controller/index_resolver/index_store.rs b/meilisearch-lib/src/index_controller/index_resolver/index_store.rs new file mode 100644 index 000000000..c038ceb20 --- /dev/null +++ b/meilisearch-lib/src/index_controller/index_resolver/index_store.rs @@ -0,0 +1,116 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use milli::update::UpdateBuilder; +use tokio::fs; +use tokio::sync::RwLock; +use tokio::task::spawn_blocking; +use uuid::Uuid; + +use super::error::{IndexResolverError, Result}; +use crate::index::Index; +use crate::index::update_handler::UpdateHandler; +use crate::index_controller::update_file_store::UpdateFileStore; +use crate::options::IndexerOpts; + +type AsyncMap = Arc>>; + +#[async_trait::async_trait] +pub trait IndexStore { + async fn create(&self, uuid: Uuid, primary_key: Option) -> Result; + async fn get(&self, uuid: Uuid) -> Result>; + async fn delete(&self, uuid: Uuid) -> Result>; +} + +pub struct MapIndexStore { + index_store: AsyncMap, + path: PathBuf, + index_size: usize, + update_file_store: Arc, + update_handler: Arc, +} + +impl MapIndexStore { + pub fn new(path: impl AsRef, index_size: usize, indexer_opts: &IndexerOpts) -> anyhow::Result { + let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?); + let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap()); + let path = path.as_ref().join("indexes/"); + let index_store = Arc::new(RwLock::new(HashMap::new())); + Ok(Self { + index_store, + path, + index_size, + update_file_store, + update_handler, + }) + } +} + +#[async_trait::async_trait] +impl IndexStore for MapIndexStore { + async fn create(&self, uuid: Uuid, primary_key: Option) -> Result { + // We need to keep the lock until we are sure the db file has been opened correclty, to + // ensure that another db is not created at the same time. + let mut lock = self.index_store.write().await; + + if let Some(index) = lock.get(&uuid) { + return Ok(index.clone()); + } + let path = self.path.join(format!("index-{}", uuid)); + if path.exists() { + return Err(IndexResolverError::IndexAlreadyExists); + } + + let index_size = self.index_size; + let file_store = self.update_file_store.clone(); + let update_handler = self.update_handler.clone(); + let index = spawn_blocking(move || -> Result { + let index = Index::open(path, index_size, file_store, uuid, update_handler)?; + if let Some(primary_key) = primary_key { + let mut txn = index.write_txn()?; + + let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index); + builder.set_primary_key(primary_key); + builder.execute(|_, _| ())?; + + txn.commit()?; + } + Ok(index) + }) + .await??; + + lock.insert(uuid, index.clone()); + + Ok(index) + } + + async fn get(&self, uuid: Uuid) -> Result> { + let guard = self.index_store.read().await; + match guard.get(&uuid) { + Some(index) => Ok(Some(index.clone())), + None => { + // drop the guard here so we can perform the write after without deadlocking; + drop(guard); + let path = self.path.join(format!("index-{}", uuid)); + if !path.exists() { + return Ok(None); + } + + let index_size = self.index_size; + let file_store = self.update_file_store.clone(); + let update_handler = self.update_handler.clone(); + let index = spawn_blocking(move || Index::open(path, index_size, file_store, uuid, update_handler)).await??; + self.index_store.write().await.insert(uuid, index.clone()); + Ok(Some(index)) + } + } + } + + async fn delete(&self, uuid: Uuid) -> Result> { + let db_path = self.path.join(format!("index-{}", uuid)); + fs::remove_dir_all(db_path).await?; + let index = self.index_store.write().await.remove(&uuid); + Ok(index) + } +} diff --git a/meilisearch-lib/src/index_controller/index_resolver/message.rs b/meilisearch-lib/src/index_controller/index_resolver/message.rs new file mode 100644 index 000000000..25a0d64a9 --- /dev/null +++ b/meilisearch-lib/src/index_controller/index_resolver/message.rs @@ -0,0 +1,37 @@ +use std::{collections::HashSet, path::PathBuf}; + +use tokio::sync::oneshot; +use uuid::Uuid; + +use crate::index::Index; +use super::error::Result; + +pub enum IndexResolverMsg { + Get { + uid: String, + ret: oneshot::Sender>, + }, + Delete { + uid: String, + ret: oneshot::Sender>, + }, + List { + ret: oneshot::Sender>>, + }, + Insert { + uuid: Uuid, + name: String, + ret: oneshot::Sender>, + }, + SnapshotRequest { + path: PathBuf, + ret: oneshot::Sender>>, + }, + GetSize { + ret: oneshot::Sender>, + }, + DumpRequest { + path: PathBuf, + ret: oneshot::Sender>>, + }, +} diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs new file mode 100644 index 000000000..eebb8ef95 --- /dev/null +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -0,0 +1,117 @@ +pub mod uuid_store; +mod index_store; +//mod message; +pub mod error; + +use std::path::Path; + +use uuid::Uuid; +use uuid_store::{UuidStore, HeedUuidStore}; +use index_store::{IndexStore, MapIndexStore}; +use error::{Result, IndexResolverError}; + +use crate::{index::Index, options::IndexerOpts}; + +pub type HardStateIndexResolver = IndexResolver; + +pub fn create_index_resolver(path: impl AsRef, index_size: usize, indexer_opts: &IndexerOpts) -> anyhow::Result { + let uuid_store = HeedUuidStore::new(&path)?; + let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?; + Ok(IndexResolver::new(uuid_store, index_store)) +} + +pub struct IndexResolver { + index_uuid_store: U, + index_store: I, +} + +impl IndexResolver +where U: UuidStore, + I: IndexStore, +{ + pub fn new( + index_uuid_store: U, + index_store: I, + ) -> Self { + Self { + index_uuid_store, + index_store, + } + } + + pub async fn dump(&self, _path: impl AsRef) -> Result> { + todo!() + } + + pub async fn get_size(&self) -> Result { + todo!() + } + + pub async fn perform_snapshot(&self, _path: impl AsRef) -> Result<()> { + todo!() + } + + pub async fn create_index(&self, uid: String, primary_key: Option) -> Result<(Uuid, Index)> { + let uuid = Uuid::new_v4(); + let index = self.index_store.create(uuid, primary_key).await?; + self.index_uuid_store.insert(uid, uuid).await?; + Ok((uuid, index)) + } + + pub async fn list(&self) -> Result> { + let uuids = self.index_uuid_store.list().await?; + let mut indexes = Vec::new(); + for (name, uuid) in uuids { + match self.index_store.get(uuid).await? { + Some(index) => { + indexes.push((name, index)) + }, + None => { + // we found an unexisting index, we remove it from the uuid store + let _ = self.index_uuid_store.delete(name).await; + }, + } + } + + Ok(indexes) + } + + pub async fn delete_index(&self, uid: String) -> Result<()> { + match self.index_uuid_store.delete(uid.clone()).await? { + Some(uuid) => { + let _ = self.index_store.delete(uuid).await; + Ok(()) + } + None => Err(IndexResolverError::UnexistingIndex(uid)), + } + } + + pub async fn get_index_by_uuid(&self, uuid: Uuid) -> Result { + // TODO: Handle this error better. + self.index_store.get(uuid).await?.ok_or(IndexResolverError::UnexistingIndex(String::new())) + } + + pub async fn get_index(&self, uid: String) -> Result { + match self.index_uuid_store.get_uuid(uid).await? { + (name, Some(uuid)) => { + match self.index_store.get(uuid).await? { + Some(index) => Ok(index), + None => { + // For some reason we got a uuid to an unexisting index, we return an error, + // and remove the uuid from th uuid store. + let _ = self.index_uuid_store.delete(name.clone()).await; + Err(IndexResolverError::UnexistingIndex(name)) + }, + } + } + (name, _) => Err(IndexResolverError::UnexistingIndex(name)) + } + } + + pub async fn get_uuid(&self, uid: String) -> Result { + match self.index_uuid_store.get_uuid(uid).await? { + (_, Some(uuid)) => Ok(uuid), + (name, _) => Err(IndexResolverError::UnexistingIndex(name)) + } + } +} diff --git a/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs b/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs new file mode 100644 index 000000000..7974bf4ae --- /dev/null +++ b/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs @@ -0,0 +1,226 @@ +use std::collections::HashSet; +use std::fs::{create_dir_all, File}; +use std::io::{BufRead, BufReader, Write}; +use std::path::{Path, PathBuf}; + +use heed::types::{ByteSlice, Str}; +use heed::{CompactionOption, Database, Env, EnvOpenOptions}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::error::{Result, IndexResolverError}; +use crate::EnvSizer; + +const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB + +#[derive(Serialize, Deserialize)] +struct DumpEntry { + uuid: Uuid, + uid: String, +} + +const UUIDS_DB_PATH: &str = "index_uuids"; + +#[async_trait::async_trait] +pub trait UuidStore: Sized { + // Create a new entry for `name`. Return an error if `err` and the entry already exists, return + // the uuid otherwise. + async fn get_uuid(&self, uid: String) -> Result<(String, Option)>; + async fn delete(&self, uid: String) -> Result>; + async fn list(&self) -> Result>; + async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; + async fn snapshot(&self, path: PathBuf) -> Result>; + async fn get_size(&self) -> Result; + async fn dump(&self, path: PathBuf) -> Result>; +} + +#[derive(Clone)] +pub struct HeedUuidStore { + env: Env, + db: Database, +} + +impl HeedUuidStore { + pub fn new(path: impl AsRef) -> Result { + let path = path.as_ref().join(UUIDS_DB_PATH); + create_dir_all(&path)?; + let mut options = EnvOpenOptions::new(); + options.map_size(UUID_STORE_SIZE); // 1GB + let env = options.open(path)?; + let db = env.create_database(None)?; + Ok(Self { env, db }) + } + + pub fn get_uuid(&self, name: &str) -> Result> { + let env = self.env.clone(); + let db = self.db; + let txn = env.read_txn()?; + match db.get(&txn, name)? { + Some(uuid) => { + let uuid = Uuid::from_slice(uuid)?; + Ok(Some(uuid)) + } + None => Ok(None), + } + } + + pub fn delete(&self, uid: String) -> Result> { + let env = self.env.clone(); + let db = self.db; + let mut txn = env.write_txn()?; + match db.get(&txn, &uid)? { + Some(uuid) => { + let uuid = Uuid::from_slice(uuid)?; + db.delete(&mut txn, &uid)?; + txn.commit()?; + Ok(Some(uuid)) + } + None => Ok(None), + } + } + + pub fn list(&self) -> Result> { + let env = self.env.clone(); + let db = self.db; + let txn = env.read_txn()?; + let mut entries = Vec::new(); + for entry in db.iter(&txn)? { + let (name, uuid) = entry?; + let uuid = Uuid::from_slice(uuid)?; + entries.push((name.to_owned(), uuid)) + } + Ok(entries) + } + + pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> { + let env = self.env.clone(); + let db = self.db; + let mut txn = env.write_txn()?; + + if db.get(&txn, &name)?.is_some() { + return Err(IndexResolverError::IndexAlreadyExists); + } + + db.put(&mut txn, &name, uuid.as_bytes())?; + txn.commit()?; + Ok(()) + } + + pub fn snapshot(&self, mut path: PathBuf) -> Result> { + let env = self.env.clone(); + let db = self.db; + // Write transaction to acquire a lock on the database. + let txn = env.write_txn()?; + let mut entries = HashSet::new(); + for entry in db.iter(&txn)? { + let (_, uuid) = entry?; + let uuid = Uuid::from_slice(uuid)?; + entries.insert(uuid); + } + + // only perform snapshot if there are indexes + if !entries.is_empty() { + path.push(UUIDS_DB_PATH); + create_dir_all(&path).unwrap(); + path.push("data.mdb"); + env.copy_to_path(path, CompactionOption::Enabled)?; + } + Ok(entries) + } + + pub fn get_size(&self) -> Result { + Ok(self.env.size()) + } + + pub fn dump(&self, path: PathBuf) -> Result> { + let dump_path = path.join(UUIDS_DB_PATH); + create_dir_all(&dump_path)?; + let dump_file_path = dump_path.join("data.jsonl"); + let mut dump_file = File::create(&dump_file_path)?; + let mut uuids = HashSet::new(); + + let txn = self.env.read_txn()?; + for entry in self.db.iter(&txn)? { + let (uid, uuid) = entry?; + let uid = uid.to_string(); + let uuid = Uuid::from_slice(uuid)?; + + let entry = DumpEntry { uuid, uid }; + serde_json::to_writer(&mut dump_file, &entry)?; + dump_file.write_all(b"\n").unwrap(); + + uuids.insert(uuid); + } + + Ok(uuids) + } + + pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> Result<()> { + let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH); + std::fs::create_dir_all(&uuid_resolver_path)?; + + let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); + let indexes = File::open(&src_indexes)?; + let mut indexes = BufReader::new(indexes); + let mut line = String::new(); + + let db = Self::new(dst)?; + let mut txn = db.env.write_txn()?; + + loop { + match indexes.read_line(&mut line) { + Ok(0) => break, + Ok(_) => { + let DumpEntry { uuid, uid } = serde_json::from_str(&line)?; + println!("importing {} {}", uid, uuid); + db.db.put(&mut txn, &uid, uuid.as_bytes())?; + } + Err(e) => return Err(e.into()), + } + + line.clear(); + } + txn.commit()?; + + db.env.prepare_for_closing().wait(); + + Ok(()) + } +} + +#[async_trait::async_trait] +impl UuidStore for HeedUuidStore { + async fn get_uuid(&self, name: String) -> Result<(String, Option)> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.get_uuid(&name).map(|res| (name, res))).await? + } + + async fn delete(&self, uid: String) -> Result> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.delete(uid)).await? + } + + async fn list(&self) -> Result> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.list()).await? + } + + async fn insert(&self, name: String, uuid: Uuid) -> Result<()> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.insert(name, uuid)).await? + } + + async fn snapshot(&self, path: PathBuf) -> Result> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.snapshot(path)).await? + } + + async fn get_size(&self) -> Result { + self.get_size() + } + + async fn dump(&self, path: PathBuf) -> Result> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.dump(path)).await? + } +} diff --git a/meilisearch-lib/src/index_controller/indexes/mod.rs b/meilisearch-lib/src/index_controller/indexes/mod.rs index bac492364..48649cf40 100644 --- a/meilisearch-lib/src/index_controller/indexes/mod.rs +++ b/meilisearch-lib/src/index_controller/indexes/mod.rs @@ -42,7 +42,7 @@ pub fn create_indexes_handler( indexer_options: &IndexerOpts, ) -> anyhow::Result { let (sender, receiver) = mpsc::channel(100); - let store = MapIndexStore::new(&db_path, index_size); + let store = MapIndexStore::new(&db_path, index_size, indexer_options); let actor = IndexActor::new(receiver, store, indexer_options)?; tokio::task::spawn(actor.run()); @@ -59,7 +59,7 @@ pub struct IndexMeta { } impl IndexMeta { - fn new(index: &Index) -> Result { + pub fn new(index: &Index) -> Result { let txn = index.read_txn()?; Self::new_txn(index, &txn) } @@ -223,7 +223,7 @@ where None => self.store.create(uuid, None).await?, }; - Ok(spawn_blocking(move || update_handler.handle_update(index, meta)).await?) + Ok(spawn_blocking(move || update_handler.handle_update(&index, meta)).await?) } async fn handle_settings(&self, uuid: Uuid) -> Result> { diff --git a/meilisearch-lib/src/index_controller/indexes/store.rs b/meilisearch-lib/src/index_controller/indexes/store.rs index 252271d51..336ff6e0a 100644 --- a/meilisearch-lib/src/index_controller/indexes/store.rs +++ b/meilisearch-lib/src/index_controller/indexes/store.rs @@ -10,6 +10,7 @@ use uuid::Uuid; use super::error::{IndexActorError, Result}; use crate::index::Index; +use crate::index::update_handler::UpdateHandler; use crate::index_controller::update_file_store::UpdateFileStore; type AsyncMap = Arc>>; @@ -26,10 +27,11 @@ pub struct MapIndexStore { path: PathBuf, index_size: usize, update_file_store: Arc, + update_handler: Arc, } impl MapIndexStore { - pub fn new(path: impl AsRef, index_size: usize) -> Self { + pub fn new(path: impl AsRef, index_size: usize, update_handler: Arc) -> Self { let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap()); let path = path.as_ref().join("indexes/"); let index_store = Arc::new(RwLock::new(HashMap::new())); @@ -38,6 +40,7 @@ impl MapIndexStore { path, index_size, update_file_store, + update_handler, } } } @@ -59,8 +62,9 @@ impl IndexStore for MapIndexStore { let index_size = self.index_size; let file_store = self.update_file_store.clone(); + let update_handler = self.update_handler.clone(); let index = spawn_blocking(move || -> Result { - let index = Index::open(path, index_size, file_store)?; + let index = Index::open(path, index_size, file_store, uuid, update_handler)?; if let Some(primary_key) = primary_key { let mut txn = index.write_txn()?; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index bd3f4c07b..da108fe68 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -9,38 +9,52 @@ use chrono::{DateTime, Utc}; use futures::Stream; use log::info; use milli::update::IndexDocumentsMethod; -use milli::FieldDistribution; use serde::{Deserialize, Serialize}; +use tokio::task::spawn_blocking; use tokio::time::sleep; use uuid::Uuid; use dump_actor::DumpActorHandle; pub use dump_actor::{DumpInfo, DumpStatus}; use snapshot::load_snapshot; -use uuid_resolver::error::UuidResolverError; -use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; +use crate::index::{Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings}; +use crate::index_controller::index_resolver::create_index_resolver; use crate::options::IndexerOpts; use error::Result; +use crate::index::error::Result as IndexResult; use self::dump_actor::load_dump; -use self::indexes::IndexMsg; +use self::index_resolver::HardStateIndexResolver; +use self::index_resolver::error::IndexResolverError; use self::updates::status::UpdateStatus; use self::updates::UpdateMsg; -use self::uuid_resolver::UuidResolverMsg; mod dump_actor; pub mod error; -pub mod indexes; +//pub mod indexes; mod snapshot; pub mod update_file_store; pub mod updates; -mod uuid_resolver; +//mod uuid_resolver; +mod index_resolver; pub type Payload = Box< dyn Stream> + Send + Sync + 'static + Unpin, >; +macro_rules! time { + ($e:expr) => { + { + let now = std::time::Instant::now(); + let result = $e; + let elapsed = now.elapsed(); + println!("elapsed at line {}: {}ms ({}ns)", line!(), elapsed.as_millis(), elapsed.as_nanos()); + result + } + }; +} + #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] pub struct IndexMetadata { @@ -49,7 +63,7 @@ pub struct IndexMetadata { pub uid: String, name: String, #[serde(flatten)] - pub meta: indexes::IndexMeta, + pub meta: IndexMeta, } #[derive(Clone, Debug)] @@ -58,23 +72,9 @@ pub struct IndexSettings { pub primary_key: Option, } -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct IndexStats { - #[serde(skip)] - pub size: u64, - pub number_of_documents: u64, - /// Whether the current index is performing an update. It is initially `None` when the - /// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is - /// later set to either true or false, we we retrieve the information from the `UpdateStore` - pub is_indexing: Option, - pub field_distribution: FieldDistribution, -} - #[derive(Clone)] pub struct IndexController { - uuid_resolver: uuid_resolver::UuidResolverSender, - index_handle: indexes::IndexHandlerSender, + index_resolver: Arc, update_handle: updates::UpdateSender, dump_handle: dump_actor::DumpActorHandleImpl, } @@ -149,17 +149,15 @@ impl IndexControllerBuilder { std::fs::create_dir_all(db_path.as_ref())?; - let uuid_resolver = uuid_resolver::create_uuid_resolver(&db_path)?; - let index_handle = indexes::create_indexes_handler(&db_path, index_size, &indexer_options)?; + let index_resolver = Arc::new(create_index_resolver(&db_path, index_size, &indexer_options)?); #[allow(unreachable_code)] - let update_handle = updates::create_update_handler(index_handle.clone(), &db_path, update_store_size)?; + let update_handle = updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?; + let dump_path = self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; let dump_handle = dump_actor::DumpActorHandleImpl::new( - &self - .dump_dst - .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?, - uuid_resolver.clone(), + dump_path, + index_resolver.clone(), update_handle.clone(), index_size, update_store_size, @@ -182,8 +180,7 @@ impl IndexControllerBuilder { //} Ok(IndexController { - uuid_resolver, - index_handle, + index_resolver, update_handle, dump_handle, }) @@ -246,18 +243,15 @@ impl IndexController { } pub async fn register_update(&self, uid: &str, update: Update) -> Result { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.to_string()).await; - match uuid { + match self.index_resolver.get_uuid(uid.to_string()).await { Ok(uuid) => { let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; Ok(update_result) } - Err(UuidResolverError::UnexistingIndex(name)) => { - let uuid = Uuid::new_v4(); + Err(IndexResolverError::UnexistingIndex(name)) => { + let (uuid, _) = self.index_resolver.create_index(name, None).await?; let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; // ignore if index creation fails now, since it may already have been created - let _ = IndexMsg::create_index(&self.index_handle, uuid, None).await?; - UuidResolverMsg::insert(&self.uuid_resolver, uuid, name).await?; Ok(update_result) } @@ -391,24 +385,24 @@ impl IndexController { //} pub async fn update_status(&self, uid: String, id: u64) -> Result { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; + let uuid = self.index_resolver.get_uuid(uid).await?; let result = UpdateMsg::get_update(&self.update_handle, uuid, id).await?; Ok(result) } pub async fn all_update_status(&self, uid: String) -> Result> { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; + let uuid = self.index_resolver.get_uuid(uid).await?; let result = UpdateMsg::list_updates(&self.update_handle, uuid).await?; Ok(result) } pub async fn list_indexes(&self) -> Result> { - let uuids = UuidResolverMsg::list(&self.uuid_resolver).await?; + let indexes = self.index_resolver.list().await?; let mut ret = Vec::new(); - for (uid, uuid) in uuids { - let meta = IndexMsg::index_meta(&self.index_handle, uuid).await?; + for (uid, index) in indexes { + let meta = index.meta()?; let meta = IndexMetadata { - uuid, + uuid: index.uuid, name: uid.clone(), uid, meta, @@ -420,8 +414,8 @@ impl IndexController { } pub async fn settings(&self, uid: String) -> Result> { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let settings = IndexMsg::settings(&self.index_handle, uuid).await?; + let index = self.index_resolver.get_index(uid).await?; + let settings = spawn_blocking(move || index.settings()).await??; Ok(settings) } @@ -432,15 +426,8 @@ impl IndexController { limit: usize, attributes_to_retrieve: Option>, ) -> Result> { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let documents = IndexMsg::documents( - &self.index_handle, - uuid, - offset, - limit, - attributes_to_retrieve, - ) - .await?; + let index = self.index_resolver.get_index(uid).await?; + let documents = spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)).await??; Ok(documents) } @@ -450,8 +437,8 @@ impl IndexController { doc_id: String, attributes_to_retrieve: Option>, ) -> Result { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let document = IndexMsg::document(&self.index_handle, uuid, attributes_to_retrieve, doc_id).await?; + let index = self.index_resolver.get_index(uid).await?; + let document = spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)).await??; Ok(document) } @@ -460,12 +447,12 @@ impl IndexController { uid: String, mut index_settings: IndexSettings, ) -> Result { - if index_settings.uid.is_some() { - index_settings.uid.take(); - } - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?; - let meta = IndexMsg::update_index(&self.index_handle, uuid, index_settings).await?; + index_settings.uid.take(); + + let index = self.index_resolver.get_index(uid.clone()).await?; + let uuid = index.uuid; + let meta = spawn_blocking(move || index.update_primary_key(index_settings.primary_key)).await??; let meta = IndexMetadata { uuid, name: uid.clone(), @@ -476,14 +463,15 @@ impl IndexController { } pub async fn search(&self, uid: String, query: SearchQuery) -> Result { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; - let result = IndexMsg::search(&self.index_handle, uuid, query).await?; + let index = time!(self.index_resolver.get_index(uid.clone()).await?); + let result = time!(spawn_blocking(move || time!(index.perform_search(query))).await??); Ok(result) } pub async fn get_index(&self, uid: String) -> Result { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid.clone()).await?; - let meta = IndexMsg::index_meta(&self.index_handle, uuid).await?; + let index = self.index_resolver.get_index(uid.clone()).await?; + let uuid = index.uuid; + let meta = spawn_blocking(move || index.meta()).await??; let meta = IndexMetadata { uuid, name: uid.clone(), @@ -494,15 +482,16 @@ impl IndexController { } pub async fn get_uuids_size(&self) -> Result { - let size = UuidResolverMsg::get_size(&self.uuid_resolver).await?; + let size = self.index_resolver.get_size().await?; Ok(size) } pub async fn get_index_stats(&self, uid: String) -> Result { - let uuid = UuidResolverMsg::get(&self.uuid_resolver, uid).await?; let update_infos = UpdateMsg::get_info(&self.update_handle).await?; - let mut stats = IndexMsg::index_stats(&self.index_handle, uuid).await?; - // Check if the currently indexing update is from out index. + let index = self.index_resolver.get_index(uid).await?; + let uuid = index.uuid; + let mut stats = spawn_blocking(move || index.stats()).await??; + // Check if the currently indexing update is from our index. stats.is_indexing = Some(Some(uuid) == update_infos.processing); Ok(stats) } @@ -513,17 +502,24 @@ impl IndexController { let mut last_update: Option> = None; let mut indexes = BTreeMap::new(); - for index in self.list_indexes().await? { - let mut index_stats = IndexMsg::index_stats(&self.index_handle, index.uuid).await?; - database_size += index_stats.size; + for (index_uid, index) in self.index_resolver.list().await? { + let uuid = index.uuid; + let (mut stats, meta) = spawn_blocking::<_, IndexResult<_>>(move || { + let stats = index.stats()?; + let meta = index.meta()?; + Ok((stats, meta)) + }).await??; - last_update = last_update.map_or(Some(index.meta.updated_at), |last| { - Some(last.max(index.meta.updated_at)) + database_size += stats.size; + + last_update = last_update.map_or(Some(meta.updated_at), |last| { + Some(last.max(meta.updated_at)) }); - index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing); + // Check if the currently indexing update is from our index. + stats.is_indexing = Some(Some(uuid) == update_infos.processing); - indexes.insert(index.uid, index_stats); + indexes.insert(index_uid, stats); } Ok(Stats { diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs index 858631f69..58635b3df 100644 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ b/meilisearch-lib/src/index_controller/updates/error.rs @@ -3,19 +3,17 @@ use std::error::Error; use meilisearch_error::{Code, ErrorCode}; -use crate::index_controller::indexes::error::IndexActorError; - -pub type Result = std::result::Result; +pub type Result = std::result::Result; #[derive(Debug, thiserror::Error)] #[allow(clippy::large_enum_variant)] -pub enum UpdateActorError { +pub enum UpdateLoopError { #[error("Update {0} not found.")] UnexistingUpdate(u64), #[error("Internal error: {0}")] Internal(Box), - #[error("{0}")] - IndexActor(#[from] IndexActorError), + //#[error("{0}")] + //IndexActor(#[from] IndexActorError), #[error( "update store was shut down due to a fatal error, please check your logs for more info." )] @@ -26,7 +24,7 @@ pub enum UpdateActorError { PayloadError(#[from] actix_web::error::PayloadError), } -impl From> for UpdateActorError +impl From> for UpdateLoopError where T: Sync + Send + 'static + fmt::Debug { fn from(other: tokio::sync::mpsc::error::SendError) -> Self { @@ -34,28 +32,28 @@ where T: Sync + Send + 'static + fmt::Debug } } -impl From for UpdateActorError { +impl From for UpdateLoopError { fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { Self::Internal(Box::new(other)) } } internal_error!( - UpdateActorError: heed::Error, + UpdateLoopError: heed::Error, std::io::Error, serde_json::Error, tokio::task::JoinError ); -impl ErrorCode for UpdateActorError { +impl ErrorCode for UpdateLoopError { fn error_code(&self) -> Code { match self { - UpdateActorError::UnexistingUpdate(_) => Code::NotFound, - UpdateActorError::Internal(_) => Code::Internal, - UpdateActorError::IndexActor(e) => e.error_code(), - UpdateActorError::FatalUpdateStoreError => Code::Internal, - UpdateActorError::InvalidPayload(_) => Code::BadRequest, - UpdateActorError::PayloadError(error) => match error { + UpdateLoopError::UnexistingUpdate(_) => Code::NotFound, + UpdateLoopError::Internal(_) => Code::Internal, + //UpdateLoopError::IndexActor(e) => e.error_code(), + UpdateLoopError::FatalUpdateStoreError => Code::Internal, + UpdateLoopError::InvalidPayload(_) => Code::BadRequest, + UpdateLoopError::PayloadError(error) => match error { actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge, _ => Code::Internal, }, diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 750ca7c46..7cc38490f 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -21,25 +21,25 @@ use serde_json::{Map, Value}; use tokio::sync::mpsc; use uuid::Uuid; -use self::error::{Result, UpdateActorError}; +use self::error::{Result, UpdateLoopError}; pub use self::message::UpdateMsg; use self::store::{UpdateStore, UpdateStoreInfo}; use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; -use super::indexes::IndexHandlerSender; +use super::index_resolver::HardStateIndexResolver; use super::{DocumentAdditionFormat, Payload, Update}; pub type UpdateSender = mpsc::Sender; pub fn create_update_handler( - index_sender: IndexHandlerSender, + index_resolver: Arc, db_path: impl AsRef, update_store_size: usize, ) -> anyhow::Result { let path = db_path.as_ref().to_owned(); let (sender, receiver) = mpsc::channel(100); - let actor = UpdateLoop::new(update_store_size, receiver, path, index_sender)?; + let actor = UpdateLoop::new(update_store_size, receiver, path, index_resolver)?; tokio::task::spawn_local(actor.run()); @@ -100,7 +100,7 @@ pub struct UpdateLoop { store: Arc, inbox: Option>, update_file_store: UpdateFileStore, - index_handle: IndexHandlerSender, + index_resolver: Arc, must_exit: Arc, } @@ -109,7 +109,7 @@ impl UpdateLoop { update_db_size: usize, inbox: mpsc::Receiver, path: impl AsRef, - index_handle: IndexHandlerSender, + index_resolver: Arc, ) -> anyhow::Result { let path = path.as_ref().to_owned(); std::fs::create_dir_all(&path)?; @@ -119,7 +119,7 @@ impl UpdateLoop { let must_exit = Arc::new(AtomicBool::new(false)); - let store = UpdateStore::open(options, &path, index_handle.clone(), must_exit.clone())?; + let store = UpdateStore::open(options, &path, index_resolver.clone(), must_exit.clone())?; let inbox = Some(inbox); @@ -128,9 +128,9 @@ impl UpdateLoop { Ok(Self { store, inbox, - index_handle, must_exit, update_file_store, + index_resolver, }) } @@ -249,7 +249,7 @@ impl UpdateLoop { tokio::task::spawn_blocking(move || { let result = store .meta(uuid, id)? - .ok_or(UpdateActorError::UnexistingUpdate(id))?; + .ok_or(UpdateLoopError::UnexistingUpdate(id))?; Ok(result) }) .await? @@ -263,18 +263,19 @@ impl UpdateLoop { Ok(()) } - async fn handle_snapshot(&self, uuids: HashSet, path: PathBuf) -> Result<()> { - let index_handle = self.index_handle.clone(); - let update_store = self.store.clone(); + async fn handle_snapshot(&self, _uuids: HashSet,_pathh: PathBuf) -> Result<()> { + todo!() + //let index_handle = self.index_resolver.clone(); + //let update_store = self.store.clone(); - tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle)) - .await??; + //tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle)) + //.await??; - Ok(()) + //Ok(()) } async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { - let index_handle = self.index_handle.clone(); + let index_handle = self.index_resolver.clone(); let update_store = self.store.clone(); tokio::task::spawn_blocking(move || -> Result<()> { diff --git a/meilisearch-lib/src/index_controller/updates/store/dump.rs b/meilisearch-lib/src/index_controller/updates/store/dump.rs index 689678cc4..cf5d7e842 100644 --- a/meilisearch-lib/src/index_controller/updates/store/dump.rs +++ b/meilisearch-lib/src/index_controller/updates/store/dump.rs @@ -1,16 +1,11 @@ -use std::{ - collections::HashSet, - fs::{create_dir_all, File}, - io::Write, - path::{Path, PathBuf}, -}; +use std::{collections::HashSet, fs::{create_dir_all, File}, io::Write, path::{Path, PathBuf}, sync::Arc}; use heed::RoTxn; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::{Result, State, UpdateStore}; -use crate::index_controller::{indexes::{IndexHandlerSender, IndexMsg}, updates::{status::UpdateStatus}}; +use crate::index_controller::{index_resolver::HardStateIndexResolver, updates::status::UpdateStatus}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -23,7 +18,7 @@ impl UpdateStore { &self, uuids: &HashSet, path: PathBuf, - handle: IndexHandlerSender, + handle: Arc, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Dumping); @@ -171,13 +166,14 @@ impl UpdateStore { } async fn dump_indexes( - uuids: &HashSet, - handle: IndexHandlerSender, - path: impl AsRef, + _uuids: &HashSet, + _handle: Arc, + _path: impl AsRef, ) -> Result<()> { - for uuid in uuids { - IndexMsg::dump(&handle, *uuid, path.as_ref().to_owned()).await?; - } + todo!() + //for uuid in uuids { + //IndexMsg::dump(&handle, *uuid, path.as_ref().to_owned()).await?; + //} - Ok(()) + //Ok(()) } diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index 25eb840c9..8d40d8309 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -12,7 +12,6 @@ use std::{ }; use arc_swap::ArcSwap; -use futures::StreamExt; use heed::types::{ByteSlice, OwnedType, SerdeJson}; use heed::zerocopy::U64; use heed::{CompactionOption, Database, Env, EnvOpenOptions}; @@ -30,7 +29,6 @@ use super::RegisterUpdate; use super::error::Result; use super::status::{Enqueued, Processing}; use crate::EnvSizer; -use crate::index_controller::indexes::{CONCURRENT_INDEX_MSG, IndexHandlerSender, IndexMsg}; use crate::index_controller::update_files_path; use crate::index_controller::updates::*; @@ -148,7 +146,7 @@ impl UpdateStore { pub fn open( options: EnvOpenOptions, path: impl AsRef, - index_handle: IndexHandlerSender, + index_resolver: Arc, must_exit: Arc, ) -> anyhow::Result> { let (update_store, mut notification_receiver) = Self::new(options, path)?; @@ -173,7 +171,7 @@ impl UpdateStore { loop { match update_store_weak.upgrade() { Some(update_store) => { - let handler = index_handle.clone(); + let handler = index_resolver.clone(); let res = tokio::task::spawn_blocking(move || { update_store.process_pending_update(handler) }) @@ -286,7 +284,7 @@ impl UpdateStore { /// Executes the user provided function on the next pending update (the one with the lowest id). /// This is asynchronous as it let the user process the update with a read-only txn and /// only writing the result meta to the processed-meta store *after* it has been processed. - fn process_pending_update(&self, index_handle: IndexHandlerSender) -> Result> { + fn process_pending_update(&self, index_resolver: Arc) -> Result> { // Create a read transaction to be able to retrieve the pending update in order. let rtxn = self.env.read_txn()?; let first_meta = self.pending_queue.first(&rtxn)?; @@ -303,7 +301,7 @@ impl UpdateStore { state.swap(State::Processing(index_uuid, processing.clone())); let result = - self.perform_update(processing, index_handle, index_uuid, global_id); + self.perform_update(processing, index_resolver, index_uuid, global_id); state.swap(State::Idle); @@ -316,18 +314,18 @@ impl UpdateStore { fn perform_update( &self, processing: Processing, - index_handle: IndexHandlerSender, + index_resolver: Arc, index_uuid: Uuid, global_id: u64, ) -> Result> { // Process the pending update using the provided user function. let handle = Handle::current(); let update_id = processing.id(); - let result = - match handle.block_on(IndexMsg::update(&index_handle, index_uuid, processing.clone())) { - Ok(result) => result, - Err(e) => Err(processing.fail(e)), - }; + //IndexMsg::update(index_resolver, index_uuid, processing.clone() + let result = match handle.block_on(index_resolver.get_index_by_uuid(index_uuid)) { + Ok(index) => index.handle_update(processing), + Err(e) => Err(processing.fail(e)), + }; // Once the pending update have been successfully processed // we must remove the content from the pending and processing stores and @@ -484,9 +482,9 @@ impl UpdateStore { pub fn snapshot( &self, - uuids: &HashSet, + _uuids: &HashSet, path: impl AsRef, - handle: IndexHandlerSender, + handle: Arc, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Snapshoting); @@ -522,22 +520,23 @@ impl UpdateStore { //} } - let path = &path.as_ref().to_path_buf(); - let handle = &handle; + let _path = &path.as_ref().to_path_buf(); + let _handle = &handle; // Perform the snapshot of each index concurently. Only a third of the capabilities of // the index actor at a time not to put too much pressure on the index actor - let mut stream = futures::stream::iter(uuids.iter()) - .map(move |uuid| IndexMsg::snapshot(handle,*uuid, path.clone())) - .buffer_unordered(CONCURRENT_INDEX_MSG / 3); + todo!() + //let mut stream = futures::stream::iter(uuids.iter()) + //.map(move |uuid| IndexMsg::snapshot(handle,*uuid, path.clone())) + //.buffer_unordered(CONCURRENT_INDEX_MSG / 3); - Handle::current().block_on(async { - while let Some(res) = stream.next().await { - res?; - } - Ok(()) as Result<()> - })?; + //Handle::current().block_on(async { + //while let Some(res) = stream.next().await { + //res?; + //} + //Ok(()) as Result<()> + //})?; - Ok(()) + //Ok(()) } pub fn get_info(&self) -> Result { From dfce44fa3bb1a9dbfa1663e31b58b6443abc7d7e Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 24 Sep 2021 12:03:16 +0200 Subject: [PATCH 08/37] rename data to meilisearch --- meilisearch-http/src/routes/dump.rs | 8 ++++---- meilisearch-http/src/routes/indexes/documents.rs | 16 ++++++++-------- meilisearch-http/src/routes/indexes/mod.rs | 12 ++++++------ meilisearch-http/src/routes/indexes/search.rs | 8 ++++---- meilisearch-http/src/routes/indexes/updates.rs | 8 ++++---- meilisearch-http/src/routes/mod.rs | 10 +++++----- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index a598f875b..494e97516 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -11,8 +11,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status))); } -pub async fn create_dump(data: GuardedData) -> Result { - let res = data.create_dump().await?; +pub async fn create_dump(meilisearch: GuardedData) -> Result { + let res = meilisearch.create_dump().await?; debug!("returns: {:?}", res); Ok(HttpResponse::Accepted().json(res)) @@ -30,10 +30,10 @@ struct DumpParam { } async fn get_dump_status( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, ) -> Result { - let res = data.dump_info(path.dump_uid.clone()).await?; + let res = meilisearch.dump_info(path.dump_uid.clone()).await?; debug!("returns: {:?}", res); Ok(HttpResponse::Ok().json(res)) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index b7d13d16b..b5437c093 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -88,12 +88,12 @@ pub fn configure(cfg: &mut web::ServiceConfig) { } pub async fn get_document( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, ) -> Result { let index = path.index_uid.clone(); let id = path.document_id.clone(); - let document = data + let document = meilisearch .document(index, id, None as Option>) .await?; debug!("returns: {:?}", document); @@ -120,7 +120,7 @@ pub struct BrowseQuery { } pub async fn get_all_documents( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, params: web::Query, ) -> Result { @@ -136,7 +136,7 @@ pub async fn get_all_documents( Some(names) }); - let documents = data + let documents = meilisearch .documents( path.index_uid.clone(), params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET), @@ -157,7 +157,7 @@ pub struct UpdateDocumentsQuery { /// Route used when the payload type is "application/json" /// Used to add or replace documents pub async fn add_documents( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, params: web::Query, body: Payload, @@ -169,7 +169,7 @@ pub async fn add_documents( method: IndexDocumentsMethod::ReplaceDocuments, format: DocumentAdditionFormat::Json, }; - let update_status = data + let update_status = meilisearch .register_update(path.index_uid.as_str(), update) .await?; @@ -180,7 +180,7 @@ pub async fn add_documents( /// Route used when the payload type is "application/json" /// Used to add or replace documents pub async fn update_documents( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, params: web::Query, body: Payload, @@ -192,7 +192,7 @@ pub async fn update_documents( method: IndexDocumentsMethod::UpdateDocuments, format: DocumentAdditionFormat::Json, }; - let update_status = data + let update_status = meilisearch .register_update(path.index_uid.as_str(), update) .await?; diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index da7008640..2bf8afa08 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -76,16 +76,16 @@ pub struct UpdateIndexResponse { } pub async fn get_index( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, ) -> Result { - let meta = data.get_index(path.index_uid.clone()).await?; + let meta = meilisearch.get_index(path.index_uid.clone()).await?; debug!("returns: {:?}", meta); Ok(HttpResponse::Ok().json(meta)) } pub async fn update_index( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, body: web::Json, ) -> Result { @@ -95,7 +95,7 @@ pub async fn update_index( uid: body.uid, primary_key: body.primary_key, }; - let meta = data + let meta = meilisearch .update_index(path.into_inner().index_uid, settings) .await?; debug!("returns: {:?}", meta); @@ -111,10 +111,10 @@ pub async fn update_index( //} pub async fn get_index_stats( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, ) -> Result { - let response = data.get_index_stats(path.index_uid.clone()).await?; + let response = meilisearch.get_index_stats(path.index_uid.clone()).await?; debug!("returns: {:?}", response); Ok(HttpResponse::Ok().json(response)) diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 2b68e6ed6..1ae8eb2f7 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -82,13 +82,13 @@ impl From for SearchQuery { } pub async fn search_with_url_query( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, params: web::Query, ) -> Result { debug!("called with params: {:?}", params); let query = params.into_inner().into(); - let search_result = data.search(path.into_inner().index_uid, query).await?; + let search_result = meilisearch.search(path.into_inner().index_uid, query).await?; // Tests that the nb_hits is always set to false #[cfg(test)] @@ -99,12 +99,12 @@ pub async fn search_with_url_query( } pub async fn search_with_post( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, params: web::Json, ) -> Result { debug!("search called with params: {:?}", params); - let search_result = data + let search_result = meilisearch .search(path.into_inner().index_uid, params.into_inner()) .await?; diff --git a/meilisearch-http/src/routes/indexes/updates.rs b/meilisearch-http/src/routes/indexes/updates.rs index 547977790..cfef5ba63 100644 --- a/meilisearch-http/src/routes/indexes/updates.rs +++ b/meilisearch-http/src/routes/indexes/updates.rs @@ -37,11 +37,11 @@ pub struct UpdateParam { } pub async fn get_update_status( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, ) -> Result { let params = path.into_inner(); - let meta = data + let meta = meilisearch .update_status(params.index_uid, params.update_id) .await?; let meta = UpdateStatusResponse::from(meta); @@ -50,10 +50,10 @@ pub async fn get_update_status( } pub async fn get_all_updates_status( - data: GuardedData, + meilisearch: GuardedData, path: web::Path, ) -> Result { - let metas = data.all_update_status(path.into_inner().index_uid).await?; + let metas = meilisearch.all_update_status(path.into_inner().index_uid).await?; let metas = metas .into_iter() .map(UpdateStatusResponse::from) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index a38689bd9..ff8681498 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -233,8 +233,8 @@ pub async fn running() -> HttpResponse { HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" })) } -async fn get_stats(data: GuardedData) -> Result { - let response = data.get_all_stats().await?; +async fn get_stats(meilisearch: GuardedData) -> Result { + let response = meilisearch.get_all_stats().await?; debug!("returns: {:?}", response); Ok(HttpResponse::Ok().json(response)) @@ -248,7 +248,7 @@ struct VersionResponse { pkg_version: String, } -async fn get_version(_data: GuardedData) -> HttpResponse { +async fn get_version(_meilisearch: GuardedData) -> HttpResponse { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); @@ -265,8 +265,8 @@ struct KeysResponse { public: Option, } -pub async fn list_keys(data: GuardedData) -> HttpResponse { - let api_keys = (*data).clone(); +pub async fn list_keys(meilisearch: GuardedData) -> HttpResponse { + let api_keys = (*meilisearch).clone(); HttpResponse::Ok().json(&KeysResponse { private: api_keys.private, public: api_keys.public, From c32012c44a5f54736e4f0b134f957ebae265bbd5 Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 24 Sep 2021 14:55:57 +0200 Subject: [PATCH 09/37] restore settings updates --- .../src/routes/indexes/documents.rs | 4 +- meilisearch-http/src/routes/indexes/mod.rs | 2 +- .../src/routes/indexes/settings.rs | 331 +++++++++--------- meilisearch-http/src/routes/mod.rs | 6 +- meilisearch-lib/src/index/update_handler.rs | 23 -- meilisearch-lib/src/index/updates.rs | 168 +++++---- meilisearch-lib/src/index_controller/mod.rs | 7 +- .../src/index_controller/updates/mod.rs | 3 + 8 files changed, 264 insertions(+), 280 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index b5437c093..821b962ea 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -170,7 +170,7 @@ pub async fn add_documents( format: DocumentAdditionFormat::Json, }; let update_status = meilisearch - .register_update(path.index_uid.as_str(), update) + .register_update(path.into_inner().index_uid, update) .await?; debug!("returns: {:?}", update_status); @@ -193,7 +193,7 @@ pub async fn update_documents( format: DocumentAdditionFormat::Json, }; let update_status = meilisearch - .register_update(path.index_uid.as_str(), update) + .register_update(path.into_inner().index_uid, update) .await?; debug!("returns: {:?}", update_status); diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 2bf8afa08..59ad6fa0f 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -32,7 +32,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/documents").configure(documents::configure)) .service(web::scope("/search").configure(search::configure)) .service(web::scope("/updates").configure(updates::configure)) - //.service(web::scope("/settings").configure(settings::configure)), + .service(web::scope("/settings").configure(settings::configure)), ); } diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 07a96003f..180be4108 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -1,184 +1,191 @@ -//use log::debug; +use log::debug; -//use crate::extractors::authentication::{policies::*, GuardedData}; -//use crate::index::Settings; -//use crate::Data; -//use crate::error::ResponseError; +use actix_web::{HttpResponse, web}; +use meilisearch_lib::MeiliSearch; +use meilisearch_lib::index::{Settings, Unchecked}; +use meilisearch_lib::index_controller::Update; -//#[macro_export] -//macro_rules! make_setting_route { - //($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal) => { - //pub mod $attr { - //use log::debug; - //use actix_web::{web, HttpResponse, Resource}; +use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::error::ResponseError; - //use milli::update::Setting; +#[macro_export] +macro_rules! make_setting_route { + ($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal) => { + pub mod $attr { + use log::debug; + use actix_web::{web, HttpResponse, Resource}; - //use crate::data; - //use crate::error::ResponseError; - //use crate::index::Settings; - //use crate::extractors::authentication::{GuardedData, policies::*}; + use milli::update::Setting; + use meilisearch_lib::{MeiliSearch, index::Settings, index_controller::Update}; - //pub async fn delete( - //data: GuardedData, - //index_uid: web::Path, - //) -> Result { - //use crate::index::Settings; - //let settings = Settings { - //$attr: Setting::Reset, - //..Default::default() - //}; - //let update_status = data.update_settings(index_uid.into_inner(), settings, false).await?; - //debug!("returns: {:?}", update_status); - //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) - //} + use crate::error::ResponseError; + use crate::extractors::authentication::{GuardedData, policies::*}; - //pub async fn update( - //data: GuardedData, - //index_uid: actix_web::web::Path, - //body: actix_web::web::Json>, - //) -> std::result::Result { - //let settings = Settings { - //$attr: match body.into_inner() { - //Some(inner_body) => Setting::Set(inner_body), - //None => Setting::Reset - //}, - //..Default::default() - //}; + pub async fn delete( + meilisearch: GuardedData, + index_uid: web::Path, + ) -> Result { + let settings = Settings { + $attr: Setting::Reset, + ..Default::default() + }; + let update = Update::Settings(settings); + let update_status = meilisearch.register_update(index_uid.into_inner(), update).await?; + debug!("returns: {:?}", update_status); + Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) + } - //let update_status = data.update_settings(index_uid.into_inner(), settings, true).await?; - //debug!("returns: {:?}", update_status); - //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) - //} + pub async fn update( + meilisearch: GuardedData, + index_uid: actix_web::web::Path, + body: actix_web::web::Json>, + ) -> std::result::Result { + let settings = Settings { + $attr: match body.into_inner() { + Some(inner_body) => Setting::Set(inner_body), + None => Setting::Reset + }, + ..Default::default() + }; - //pub async fn get( - //data: GuardedData, - //index_uid: actix_web::web::Path, - //) -> std::result::Result { - //let settings = data.settings(index_uid.into_inner()).await?; - //debug!("returns: {:?}", settings); - //let mut json = serde_json::json!(&settings); - //let val = json[$camelcase_attr].take(); - //Ok(HttpResponse::Ok().json(val)) - //} + let update = Update::Settings(settings); + let update_status = meilisearch.register_update(index_uid.into_inner(), update).await?; + debug!("returns: {:?}", update_status); + Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) + } - //pub fn resources() -> Resource { - //Resource::new($route) - //.route(web::get().to(get)) - //.route(web::post().to(update)) - //.route(web::delete().to(delete)) - //} - //} - //}; -//} + pub async fn get( + meilisearch: GuardedData, + index_uid: actix_web::web::Path, + ) -> std::result::Result { + let settings = meilisearch.settings(index_uid.into_inner()).await?; + debug!("returns: {:?}", settings); + let mut json = serde_json::json!(&settings); + let val = json[$camelcase_attr].take(); + Ok(HttpResponse::Ok().json(val)) + } -//make_setting_route!( - //"/filterable-attributes", - //std::collections::BTreeSet, - //filterable_attributes, - //"filterableAttributes" -//); + pub fn resources() -> Resource { + Resource::new($route) + .route(web::get().to(get)) + .route(web::post().to(update)) + .route(web::delete().to(delete)) + } + } + }; +} -//make_setting_route!( - //"/sortable-attributes", - //std::collections::BTreeSet, - //sortable_attributes, - //"sortableAttributes" -//); +make_setting_route!( + "/filterable-attributes", + std::collections::BTreeSet, + filterable_attributes, + "filterableAttributes" +); -//make_setting_route!( - //"/displayed-attributes", - //Vec, - //displayed_attributes, - //"displayedAttributes" -//); +make_setting_route!( + "/sortable-attributes", + std::collections::BTreeSet, + sortable_attributes, + "sortableAttributes" +); -//make_setting_route!( - //"/searchable-attributes", - //Vec, - //searchable_attributes, - //"searchableAttributes" -//); +make_setting_route!( + "/displayed-attributes", + Vec, + displayed_attributes, + "displayedAttributes" +); -//make_setting_route!( - //"/stop-words", - //std::collections::BTreeSet, - //stop_words, - //"stopWords" -//); +make_setting_route!( + "/searchable-attributes", + Vec, + searchable_attributes, + "searchableAttributes" +); -//make_setting_route!( - //"/synonyms", - //std::collections::BTreeMap>, - //synonyms, - //"synonyms" -//); +make_setting_route!( + "/stop-words", + std::collections::BTreeSet, + stop_words, + "stopWords" +); -//make_setting_route!( - //"/distinct-attribute", - //String, - //distinct_attribute, - //"distinctAttribute" -//); +make_setting_route!( + "/synonyms", + std::collections::BTreeMap>, + synonyms, + "synonyms" +); -//make_setting_route!("/ranking-rules", Vec, ranking_rules, "rankingRules"); +make_setting_route!( + "/distinct-attribute", + String, + distinct_attribute, + "distinctAttribute" +); -//macro_rules! generate_configure { - //($($mod:ident),*) => { - //pub fn configure(cfg: &mut web::ServiceConfig) { - //cfg.service( - //web::resource("") - ////.route(web::post().to(update_all)) - //.route(web::get().to(get_all)) - ////.route(web::delete().to(delete_all))) - //$(.service($mod::resources()))*; - //} - //}; -//} +make_setting_route!("/ranking-rules", Vec, ranking_rules, "rankingRules"); -//generate_configure!( - //filterable_attributes, - //sortable_attributes, - //displayed_attributes, - //searchable_attributes, - //distinct_attribute, - //stop_words, - //synonyms, - //ranking_rules -//); +macro_rules! generate_configure { + ($($mod:ident),*) => { + pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service( + web::resource("") + .route(web::post().to(update_all)) + .route(web::get().to(get_all)) + .route(web::delete().to(delete_all))) + $(.service($mod::resources()))*; + } + }; +} -//pub async fn update_all( - //data: GuardedData, - //index_uid: web::Path, - //body: web::Json>, -//) -> Result { - //let settings = body.into_inner().check(); - //let update_result = data - //.update_settings(index_uid.into_inner(), settings, true) - //.await?; - //let json = serde_json::json!({ "updateId": update_result.id() }); - //debug!("returns: {:?}", json); - //Ok(HttpResponse::Accepted().json(json)) -//} +generate_configure!( + filterable_attributes, + sortable_attributes, + displayed_attributes, + searchable_attributes, + distinct_attribute, + stop_words, + synonyms, + ranking_rules +); -//pub async fn get_all( - //data: GuardedData, - //index_uid: web::Path, -//) -> Result { - //let settings = data.settings(index_uid.into_inner()).await?; - //debug!("returns: {:?}", settings); - //Ok(HttpResponse::Ok().json(settings)) -//} +pub async fn update_all( + meilisearch: GuardedData, + index_uid: web::Path, + body: web::Json>, +) -> Result { + let settings = body.into_inner(); -//pub async fn delete_all( - //data: GuardedData, - //index_uid: web::Path, -//) -> Result { - //let settings = Settings::cleared(); - //let update_result = data - //.update_settings(index_uid.into_inner(), settings, false) - //.await?; - //let json = serde_json::json!({ "updateId": update_result.id() }); - //debug!("returns: {:?}", json); - //Ok(HttpResponse::Accepted().json(json)) -//} + let update = Update::Settings(settings); + let update_result = meilisearch + .register_update(index_uid.into_inner(), update) + .await?; + let json = serde_json::json!({ "updateId": update_result.id() }); + debug!("returns: {:?}", json); + Ok(HttpResponse::Accepted().json(json)) +} + +pub async fn get_all( + data: GuardedData, + index_uid: web::Path, +) -> Result { + let settings = data.settings(index_uid.into_inner()).await?; + debug!("returns: {:?}", settings); + Ok(HttpResponse::Ok().json(settings)) +} + +pub async fn delete_all( + data: GuardedData, + index_uid: web::Path, +) -> Result { + let settings = Settings::cleared(); + + let update = Update::Settings(settings.into_unchecked()); + let update_result = data + .register_update(index_uid.into_inner(), update) + .await?; + let json = serde_json::json!({ "updateId": update_result.id() }); + debug!("returns: {:?}", json); + Ok(HttpResponse::Accepted().json(json)) +} diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index ff8681498..b30fce164 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -69,13 +69,13 @@ impl From<&UpdateStatus> for UpdateType { _ => unreachable!(), } } + RegisterUpdate::Settings(settings) => UpdateType::Settings { + settings: settings.clone(), + }, //UpdateMeta::ClearDocuments => UpdateType::ClearAll, //UpdateMeta::DeleteDocuments { ids } => UpdateType::DocumentsDeletion { //number: Some(ids.len()), //}, - //UpdateMeta::Settings(settings) => UpdateType::Settings { - //settings: settings.clone(), - //}, } } } diff --git a/meilisearch-lib/src/index/update_handler.rs b/meilisearch-lib/src/index/update_handler.rs index 6969b4567..07d57376d 100644 --- a/meilisearch-lib/src/index/update_handler.rs +++ b/meilisearch-lib/src/index/update_handler.rs @@ -1,10 +1,7 @@ -use crate::index::Index; use milli::update::UpdateBuilder; use milli::CompressionType; use rayon::ThreadPool; -use crate::index_controller::updates::RegisterUpdate; -use crate::index_controller::updates::status::{Failed, Processed, Processing}; use crate::options::IndexerOpts; pub struct UpdateHandler { @@ -49,24 +46,4 @@ impl UpdateHandler { update_builder.chunk_compression_type(self.chunk_compression_type); update_builder } - - pub fn handle_update( - &self, - index: &Index, - meta: Processing, - ) -> Result { - let update_id = meta.id(); - let update_builder = self.update_builder(update_id); - - let result = match meta.meta() { - RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => { - index.update_documents(*method, *content_uuid, update_builder, primary_key.as_deref()) - } - }; - - match result { - Ok(result) => Ok(meta.process(result)), - Err(e) => Err(meta.fail(e)), - } - } } diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 20cf6b2ec..fca925031 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -8,6 +8,7 @@ use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use serde::{Deserialize, Serialize, Serializer}; use uuid::Uuid; +use crate::RegisterUpdate; use crate::index_controller::updates::status::{Failed, Processed, Processing, UpdateResult}; use super::{Index, IndexMeta}; @@ -164,7 +165,27 @@ pub struct Facets { impl Index { pub fn handle_update(&self, update: Processing) -> std::result::Result { - self.update_handler.handle_update(self, update) + let update_id = update.id(); + let update_builder = self.update_handler.update_builder(update_id); + let result = (|| { + let mut txn = self.write_txn()?; + let result = match update.meta() { + RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => { + self.update_documents(&mut txn, *method, *content_uuid, update_builder, primary_key.as_deref()) + } + RegisterUpdate::Settings(settings) => { + let settings = settings.clone().check(); + self.update_settings(&mut txn, &settings, update_builder) + }, + }; + txn.commit()?; + result + })(); + + match result { + Ok(result) => Ok(update.process(result)), + Err(e) => Err(update.fail(e)), + } } pub fn update_primary_key(&self, primary_key: Option) -> Result { @@ -188,21 +209,7 @@ impl Index { } } - pub fn update_documents( - &self, - method: IndexDocumentsMethod, - content_uuid: Uuid, - update_builder: UpdateBuilder, - primary_key: Option<&str>, - ) -> Result { - let mut txn = self.write_txn()?; - let result = self.update_documents_txn(&mut txn, method, content_uuid, update_builder, primary_key)?; - txn.commit()?; - - Ok(result) - } - - pub fn update_documents_txn<'a, 'b>( + fn update_documents<'a, 'b>( &'a self, txn: &mut heed::RwTxn<'a, 'b>, method: IndexDocumentsMethod, @@ -246,86 +253,75 @@ impl Index { //.map_err(Into::into) //} - //pub fn update_settings_txn<'a, 'b>( - //&'a self, - //txn: &mut heed::RwTxn<'a, 'b>, - //settings: &Settings, - //update_builder: UpdateBuilder, - //) -> Result { - //// We must use the write transaction of the update here. - //let mut builder = update_builder.settings(txn, self); + fn update_settings<'a, 'b>( + &'a self, + txn: &mut heed::RwTxn<'a, 'b>, + settings: &Settings, + update_builder: UpdateBuilder, + ) -> Result { + // We must use the write transaction of the update here. + let mut builder = update_builder.settings(txn, self); - //match settings.searchable_attributes { - //Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), - //Setting::Reset => builder.reset_searchable_fields(), - //Setting::NotSet => (), - //} + match settings.searchable_attributes { + Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), + Setting::Reset => builder.reset_searchable_fields(), + Setting::NotSet => (), + } - //match settings.displayed_attributes { - //Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), - //Setting::Reset => builder.reset_displayed_fields(), - //Setting::NotSet => (), - //} + match settings.displayed_attributes { + Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), + Setting::Reset => builder.reset_displayed_fields(), + Setting::NotSet => (), + } - //match settings.filterable_attributes { - //Setting::Set(ref facets) => { - //builder.set_filterable_fields(facets.clone().into_iter().collect()) - //} - //Setting::Reset => builder.reset_filterable_fields(), - //Setting::NotSet => (), - //} + match settings.filterable_attributes { + Setting::Set(ref facets) => { + builder.set_filterable_fields(facets.clone().into_iter().collect()) + } + Setting::Reset => builder.reset_filterable_fields(), + Setting::NotSet => (), + } - //match settings.sortable_attributes { - //Setting::Set(ref fields) => { - //builder.set_sortable_fields(fields.iter().cloned().collect()) - //} - //Setting::Reset => builder.reset_sortable_fields(), - //Setting::NotSet => (), - //} + match settings.sortable_attributes { + Setting::Set(ref fields) => { + builder.set_sortable_fields(fields.iter().cloned().collect()) + } + Setting::Reset => builder.reset_sortable_fields(), + Setting::NotSet => (), + } - //match settings.ranking_rules { - //Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), - //Setting::Reset => builder.reset_criteria(), - //Setting::NotSet => (), - //} + match settings.ranking_rules { + Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), + Setting::Reset => builder.reset_criteria(), + Setting::NotSet => (), + } - //match settings.stop_words { - //Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), - //Setting::Reset => builder.reset_stop_words(), - //Setting::NotSet => (), - //} + match settings.stop_words { + Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), + Setting::Reset => builder.reset_stop_words(), + Setting::NotSet => (), + } - //match settings.synonyms { - //Setting::Set(ref synonyms) => { - //builder.set_synonyms(synonyms.clone().into_iter().collect()) - //} - //Setting::Reset => builder.reset_synonyms(), - //Setting::NotSet => (), - //} + match settings.synonyms { + Setting::Set(ref synonyms) => { + builder.set_synonyms(synonyms.clone().into_iter().collect()) + } + Setting::Reset => builder.reset_synonyms(), + Setting::NotSet => (), + } - //match settings.distinct_attribute { - //Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), - //Setting::Reset => builder.reset_distinct_field(), - //Setting::NotSet => (), - //} + match settings.distinct_attribute { + Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), + Setting::Reset => builder.reset_distinct_field(), + Setting::NotSet => (), + } - //builder.execute(|indexing_step, update_id| { - //debug!("update {}: {:?}", update_id, indexing_step) - //})?; + builder.execute(|indexing_step, update_id| { + debug!("update {}: {:?}", update_id, indexing_step) + })?; - //Ok(UpdateResult::Other) - //} - - //pub fn update_settings( - //&self, - //settings: &Settings, - //update_builder: UpdateBuilder, - //) -> Result { - //let mut txn = self.write_txn()?; - //let result = self.update_settings_txn(&mut txn, settings, update_builder)?; - //txn.commit()?; - //Ok(result) - //} + Ok(UpdateResult::Other) + } //pub fn delete_documents( //&self, diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index da108fe68..f9ff4fbbe 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -18,7 +18,7 @@ use dump_actor::DumpActorHandle; pub use dump_actor::{DumpInfo, DumpStatus}; use snapshot::load_snapshot; -use crate::index::{Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings}; +use crate::index::{Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked}; use crate::index_controller::index_resolver::create_index_resolver; use crate::options::IndexerOpts; use error::Result; @@ -95,6 +95,7 @@ pub struct Stats { #[derive(derivative::Derivative)] #[derivative(Debug)] pub enum Update { + Settings(Settings), DocumentAddition { #[derivative(Debug="ignore")] payload: Payload, @@ -242,8 +243,8 @@ impl IndexController { IndexControllerBuilder::default() } - pub async fn register_update(&self, uid: &str, update: Update) -> Result { - match self.index_resolver.get_uuid(uid.to_string()).await { + pub async fn register_update(&self, uid: String, update: Update) -> Result { + match self.index_resolver.get_uuid(uid).await { Ok(uuid) => { let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; Ok(update_result) diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 7cc38490f..30b6d98f6 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -24,6 +24,7 @@ use uuid::Uuid; use self::error::{Result, UpdateLoopError}; pub use self::message::UpdateMsg; use self::store::{UpdateStore, UpdateStoreInfo}; +use crate::index::{Settings, Unchecked}; use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; @@ -53,6 +54,7 @@ pub enum RegisterUpdate { method: IndexDocumentsMethod, content_uuid: Uuid, }, + Settings(Settings), } /// A wrapper type to implement read on a `Stream>`. @@ -207,6 +209,7 @@ impl UpdateLoop { content_uuid, } } + Update::Settings(settings) => RegisterUpdate::Settings(settings), }; let store = self.store.clone(); From b9d189bf12941aab11efd88bd03bc962f8a07428 Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 24 Sep 2021 15:21:07 +0200 Subject: [PATCH 10/37] restore document deletion routes --- .../src/routes/indexes/documents.rs | 82 ++++++++++--------- meilisearch-http/src/routes/mod.rs | 8 +- meilisearch-lib/src/index/updates.rs | 47 ++++------- meilisearch-lib/src/index_controller/mod.rs | 2 + .../src/index_controller/updates/mod.rs | 4 + 5 files changed, 68 insertions(+), 75 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index 821b962ea..ee86e12ad 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -7,7 +7,7 @@ use meilisearch_lib::MeiliSearch; use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update}; use milli::update::IndexDocumentsMethod; use serde::Deserialize; -//use serde_json::Value; +use serde_json::Value; use tokio::sync::mpsc; use crate::error::ResponseError; @@ -76,14 +76,14 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .route(web::get().to(get_all_documents)) .route(web::post().guard(guard_json).to(add_documents)) .route(web::put().guard(guard_json).to(update_documents)) - //.route(web::delete().to(clear_all_documents)), + .route(web::delete().to(clear_all_documents)), ) // this route needs to be before the /documents/{document_id} to match properly - //.service(web::resource("/delete-batch").route(web::post().to(delete_documents))) + .service(web::resource("/delete-batch").route(web::post().to(delete_documents))) .service( web::resource("/{document_id}") .route(web::get().to(get_document)) - //.route(web::delete().to(delete_document)), + .route(web::delete().to(delete_document)), ); } @@ -100,16 +100,16 @@ pub async fn get_document( Ok(HttpResponse::Ok().json(document)) } -//pub async fn delete_document( - //data: GuardedData, - //path: web::Path, -//) -> Result { - //let update_status = data - //.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()]) - //.await?; - //debug!("returns: {:?}", update_status); - //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) -//} +pub async fn delete_document( + meilisearch: GuardedData, + path: web::Path, +) -> Result { + let DocumentParam { document_id, index_uid } = path.into_inner(); + let update = Update::DeleteDocuments(vec![document_id]); + let update_status = meilisearch.register_update(index_uid, update).await?; + debug!("returns: {:?}", update_status); + Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) +} #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase", deny_unknown_fields)] @@ -200,31 +200,33 @@ pub async fn update_documents( Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } -//pub async fn delete_documents( - //data: GuardedData, - //path: web::Path, - //body: web::Json>, -//) -> Result { - //debug!("called with params: {:?}", body); - //let ids = body - //.iter() - //.map(|v| { - //v.as_str() - //.map(String::from) - //.unwrap_or_else(|| v.to_string()) - //}) - //.collect(); +pub async fn delete_documents( + meilisearch: GuardedData, + path: web::Path, + body: web::Json>, +) -> Result { + debug!("called with params: {:?}", body); + let ids = body + .iter() + .map(|v| { + v.as_str() + .map(String::from) + .unwrap_or_else(|| v.to_string()) + }) + .collect(); - //let update_status = data.delete_documents(path.index_uid.clone(), ids).await?; - //debug!("returns: {:?}", update_status); - //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) -//} + let update = Update::DeleteDocuments(ids); + let update_status = meilisearch.register_update(path.into_inner().index_uid, update).await?; + debug!("returns: {:?}", update_status); + Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) +} -//pub async fn clear_all_documents( - //data: GuardedData, - //path: web::Path, -//) -> Result { - //let update_status = data.clear_documents(path.index_uid.clone()).await?; - //debug!("returns: {:?}", update_status); - //Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) -//} +pub async fn clear_all_documents( + meilisearch: GuardedData, + path: web::Path, +) -> Result { + let update = Update::ClearDocuments; + let update_status = meilisearch.register_update(path.into_inner().index_uid, update).await?; + debug!("returns: {:?}", update_status); + Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) +} diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index b30fce164..9d99a7d0c 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -72,10 +72,10 @@ impl From<&UpdateStatus> for UpdateType { RegisterUpdate::Settings(settings) => UpdateType::Settings { settings: settings.clone(), }, - //UpdateMeta::ClearDocuments => UpdateType::ClearAll, - //UpdateMeta::DeleteDocuments { ids } => UpdateType::DocumentsDeletion { - //number: Some(ids.len()), - //}, + RegisterUpdate::ClearDocuments => UpdateType::ClearAll, + RegisterUpdate::DeleteDocuments(ids) => UpdateType::DocumentsDeletion { + number: Some(ids.len()), + }, } } } diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index fca925031..28d2734f0 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -177,6 +177,22 @@ impl Index { let settings = settings.clone().check(); self.update_settings(&mut txn, &settings, update_builder) }, + RegisterUpdate::ClearDocuments => { + let builder = update_builder.clear_documents(&mut txn, self); + let _count = builder.execute()?; + Ok(UpdateResult::Other) + }, + RegisterUpdate::DeleteDocuments(ids) => { + let mut builder = update_builder.delete_documents(&mut txn, self)?; + + // We ignore unexisting document ids + ids.iter().for_each(|id| { + builder.delete_external_id(id); + }); + + let deleted = builder.execute()?; + Ok(UpdateResult::DocumentDeletion { deleted }) + } }; txn.commit()?; result @@ -241,18 +257,6 @@ impl Index { Ok(UpdateResult::DocumentsAddition(addition)) } - //pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result { - //// We must use the write transaction of the update here. - //let mut wtxn = self.write_txn()?; - //let builder = update_builder.clear_documents(&mut wtxn, self); - - //let _count = builder.execute()?; - - //wtxn.commit() - //.and(Ok(UpdateResult::Other)) - //.map_err(Into::into) - //} - fn update_settings<'a, 'b>( &'a self, txn: &mut heed::RwTxn<'a, 'b>, @@ -322,25 +326,6 @@ impl Index { Ok(UpdateResult::Other) } - - //pub fn delete_documents( - //&self, - //document_ids: &[String], - //update_builder: UpdateBuilder, - //) -> Result { - //let mut txn = self.write_txn()?; - //let mut builder = update_builder.delete_documents(&mut txn, self)?; - - //// We ignore unexisting document ids - //document_ids.iter().for_each(|id| { - //builder.delete_external_id(id); - //}); - - //let deleted = builder.execute()?; - //txn.commit() - //.and(Ok(UpdateResult::DocumentDeletion { deleted })) - //.map_err(Into::into) - //} } #[cfg(test)] diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index f9ff4fbbe..3c53ab9eb 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -95,6 +95,8 @@ pub struct Stats { #[derive(derivative::Derivative)] #[derivative(Debug)] pub enum Update { + DeleteDocuments(Vec), + ClearDocuments, Settings(Settings), DocumentAddition { #[derivative(Debug="ignore")] diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 30b6d98f6..2027f5245 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -49,12 +49,14 @@ pub fn create_update_handler( #[derive(Debug, Clone, Serialize, Deserialize)] pub enum RegisterUpdate { + DeleteDocuments(Vec), DocumentAddition { primary_key: Option, method: IndexDocumentsMethod, content_uuid: Uuid, }, Settings(Settings), + ClearDocuments, } /// A wrapper type to implement read on a `Stream>`. @@ -210,6 +212,8 @@ impl UpdateLoop { } } Update::Settings(settings) => RegisterUpdate::Settings(settings), + Update::ClearDocuments => RegisterUpdate::ClearDocuments, + Update::DeleteDocuments(ids) => RegisterUpdate::DeleteDocuments(ids), }; let store = self.store.clone(); From 90018755c506cd96f5d2ca8bc2ba423d460b1e32 Mon Sep 17 00:00:00 2001 From: mpostma Date: Mon, 27 Sep 2021 16:48:03 +0200 Subject: [PATCH 11/37] restore snapshots --- meilisearch-http/src/helpers/mod.rs | 2 +- meilisearch-http/src/main.rs | 20 +- .../src}/compression.rs | 16 +- meilisearch-lib/src/index/mod.rs | 15 +- .../index_resolver/index_store.rs | 6 +- .../index_controller/index_resolver/mod.rs | 12 +- .../index_resolver/uuid_store.rs | 3 +- meilisearch-lib/src/index_controller/mod.rs | 63 +-- .../src/index_controller/snapshot.rs | 388 +++++++++--------- .../src/index_controller/update_file_store.rs | 47 ++- .../src/index_controller/updates/message.rs | 42 +- .../src/index_controller/updates/mod.rs | 23 +- .../index_controller/updates/store/dump.rs | 15 +- .../src/index_controller/updates/store/mod.rs | 60 ++- meilisearch-lib/src/lib.rs | 2 + 15 files changed, 397 insertions(+), 317 deletions(-) rename {meilisearch-http/src/helpers => meilisearch-lib/src}/compression.rs (67%) diff --git a/meilisearch-http/src/helpers/mod.rs b/meilisearch-http/src/helpers/mod.rs index c664f15aa..0b72c3694 100644 --- a/meilisearch-http/src/helpers/mod.rs +++ b/meilisearch-http/src/helpers/mod.rs @@ -1,4 +1,4 @@ -pub mod compression; +//pub mod compression; mod env; pub use env::EnvSizer; diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 77f439d05..dfa4bcc2d 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,4 +1,4 @@ -use std::env; +use std::{env, path::Path, time::Duration}; use actix_web::HttpServer; use meilisearch_http::{create_app, Opt}; @@ -12,6 +12,7 @@ use meilisearch_http::analytics; #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +/// does all the setup before meilisearch is launched fn setup(opt: &Opt) -> anyhow::Result<()> { let mut log_builder = env_logger::Builder::new(); log_builder.parse_filters(&opt.log_level); @@ -22,12 +23,19 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { log_builder.init(); + + Ok(()) +} + +/// Cleans and setup the temporary file folder in the database directory. This must be done after +/// the meilisearch instance has been created, to not interfere with the snapshot and dump loading. +fn setup_temp_dir(db_path: impl AsRef) -> anyhow::Result<()> { // Set the tempfile directory in the current db path, to avoid cross device references. Also // remove the previous outstanding files found there // // TODO: if two processes open the same db, one might delete the other tmpdir. Need to make // sure that no one is using it before deleting it. - let temp_path = opt.db_path.join("tmp"); + let temp_path = db_path.as_ref().join("tmp"); // Ignore error if tempdir doesn't exist let _ = std::fs::remove_dir_all(&temp_path); std::fs::create_dir_all(&temp_path)?; @@ -48,15 +56,21 @@ fn setup_meilisearch(opt: &Opt) -> anyhow::Result { .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) .set_dump_dst(opt.dumps_dir.clone()) + .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) .set_snapshot_dir(opt.snapshot_dir.clone()); if let Some(ref path) = opt.import_snapshot { meilisearch.set_import_snapshot(path.clone()); } + if let Some(ref path) = opt.import_dump { meilisearch.set_dump_src(path.clone()); } + if opt.schedule_snapshot { + meilisearch.set_schedule_snapshot(); + } + meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) } @@ -78,6 +92,8 @@ async fn main() -> anyhow::Result<()> { let meilisearch = setup_meilisearch(&opt)?; + setup_temp_dir(&opt.db_path)?; + #[cfg(all(not(debug_assertions), feature = "analytics"))] if !opt.no_analytics { let analytics_data = meilisearch.clone(); diff --git a/meilisearch-http/src/helpers/compression.rs b/meilisearch-lib/src/compression.rs similarity index 67% rename from meilisearch-http/src/helpers/compression.rs rename to meilisearch-lib/src/compression.rs index c4747cb21..f9620eb2a 100644 --- a/meilisearch-http/src/helpers/compression.rs +++ b/meilisearch-lib/src/compression.rs @@ -16,11 +16,11 @@ pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Resul Ok(()) } -pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { - let f = File::open(&src)?; - let gz = GzDecoder::new(f); - let mut ar = Archive::new(gz); - create_dir_all(&dest)?; - ar.unpack(&dest)?; - Ok(()) -} +//pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { + //let f = File::open(&src)?; + //let gz = GzDecoder::new(f); + //let mut ar = Archive::new(gz); + //create_dir_all(&dest)?; + //ar.unpack(&dest)?; + //Ok(()) +//} diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index 911a22464..c4fa812b1 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -71,11 +71,15 @@ impl IndexMeta { } } -#[derive(Clone)] +#[derive(Clone, derivative::Derivative)] +#[derivative(Debug)] pub struct Index { pub uuid: Uuid, + #[derivative(Debug="ignore")] pub inner: Arc, + #[derivative(Debug="ignore")] update_file_store: Arc, + #[derivative(Debug="ignore")] update_handler: Arc, } @@ -258,4 +262,13 @@ impl Index { displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid)); Ok(displayed_fields_ids) } + + pub fn snapshot(&self, path: impl AsRef) -> Result<()> { + let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid)); + create_dir_all(&dst)?; + dst.push("data.mdb"); + let _txn = self.write_txn()?; + self.inner.env.copy_to_path(dst, heed::CompactionOption::Enabled)?; + Ok(()) + } } diff --git a/meilisearch-lib/src/index_controller/index_resolver/index_store.rs b/meilisearch-lib/src/index_controller/index_resolver/index_store.rs index c038ceb20..5969108de 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/index_store.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/index_store.rs @@ -57,7 +57,7 @@ impl IndexStore for MapIndexStore { if let Some(index) = lock.get(&uuid) { return Ok(index.clone()); } - let path = self.path.join(format!("index-{}", uuid)); + let path = self.path.join(format!("{}", uuid)); if path.exists() { return Err(IndexResolverError::IndexAlreadyExists); } @@ -92,7 +92,7 @@ impl IndexStore for MapIndexStore { None => { // drop the guard here so we can perform the write after without deadlocking; drop(guard); - let path = self.path.join(format!("index-{}", uuid)); + let path = self.path.join(format!("{}", uuid)); if !path.exists() { return Ok(None); } @@ -108,7 +108,7 @@ impl IndexStore for MapIndexStore { } async fn delete(&self, uuid: Uuid) -> Result> { - let db_path = self.path.join(format!("index-{}", uuid)); + let db_path = self.path.join(format!("{}", uuid)); fs::remove_dir_all(db_path).await?; let index = self.index_store.write().await.remove(&uuid); Ok(index) diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index eebb8ef95..5721fce8a 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -45,10 +45,18 @@ where U: UuidStore, pub async fn get_size(&self) -> Result { todo!() + //Ok(self.index_store.get_size()? + self.index_uuid_store.get_size().await?) } - pub async fn perform_snapshot(&self, _path: impl AsRef) -> Result<()> { - todo!() + pub async fn snapshot(&self, path: impl AsRef) -> Result> { + let uuids = self.index_uuid_store.snapshot(path.as_ref().to_owned()).await?; + let mut indexes = Vec::new(); + + for uuid in uuids { + indexes.push(self.get_index_by_uuid(uuid).await?); + } + + Ok(indexes) } pub async fn create_index(&self, uid: String, primary_key: Option) -> Result<(Uuid, Index)> { diff --git a/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs b/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs index 7974bf4ae..a4bcd17d4 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs @@ -46,8 +46,9 @@ impl HeedUuidStore { create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(UUID_STORE_SIZE); // 1GB + options.max_dbs(1); let env = options.open(path)?; - let db = env.create_database(None)?; + let db = env.create_database(Some("uuids"))?; Ok(Self { env, db }) } diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 3c53ab9eb..29f5348b1 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -20,6 +20,7 @@ use snapshot::load_snapshot; use crate::index::{Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked}; use crate::index_controller::index_resolver::create_index_resolver; +use crate::index_controller::snapshot::SnapshotService; use crate::options::IndexerOpts; use error::Result; use crate::index::error::Result as IndexResult; @@ -75,7 +76,7 @@ pub struct IndexSettings { #[derive(Clone)] pub struct IndexController { index_resolver: Arc, - update_handle: updates::UpdateSender, + update_sender: updates::UpdateSender, dump_handle: dump_actor::DumpActorHandleImpl, } @@ -113,8 +114,10 @@ pub struct IndexControllerBuilder { max_update_store_size: Option, snapshot_dir: Option, import_snapshot: Option, + snapshot_interval: Option, ignore_snapshot_if_db_exists: bool, ignore_missing_snapshot: bool, + schedule_snapshot: bool, dump_src: Option, dump_dst: Option, } @@ -155,36 +158,36 @@ impl IndexControllerBuilder { let index_resolver = Arc::new(create_index_resolver(&db_path, index_size, &indexer_options)?); #[allow(unreachable_code)] - let update_handle = updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?; + let update_sender = updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?; let dump_path = self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; let dump_handle = dump_actor::DumpActorHandleImpl::new( dump_path, index_resolver.clone(), - update_handle.clone(), + update_sender.clone(), index_size, update_store_size, )?; - //if options.schedule_snapshot { - //let snapshot_service = SnapshotService::new( - //uuid_resolver.clone(), - //update_handle.clone(), - //Duration::from_secs(options.snapshot_interval_sec), - //options.snapshot_dir.clone(), - //options - //.db_path - //.file_name() - //.map(|n| n.to_owned().into_string().expect("invalid path")) - //.unwrap_or_else(|| String::from("data.ms")), - //); + if self.schedule_snapshot { + let snapshot_service = SnapshotService::new( + index_resolver.clone(), + update_sender.clone(), + self.snapshot_interval.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?, + self.snapshot_dir.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?, + db_path + .as_ref() + .file_name() + .map(|n| n.to_owned().into_string().expect("invalid path")) + .unwrap_or_else(|| String::from("data.ms")), + ); - //tokio::task::spawn(snapshot_service.run()); - //} + tokio::task::spawn(snapshot_service.run()); + } Ok(IndexController { index_resolver, - update_handle, + update_sender, dump_handle, }) } @@ -238,6 +241,18 @@ impl IndexControllerBuilder { self.import_snapshot.replace(import_snapshot); self } + + /// Set the index controller builder's snapshot interval sec. + pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self { + self.snapshot_interval = Some(snapshot_interval); + self + } + + /// Set the index controller builder's schedule snapshot. + pub fn set_schedule_snapshot(&mut self) -> &mut Self { + self.schedule_snapshot = true; + self + } } impl IndexController { @@ -248,12 +263,12 @@ impl IndexController { pub async fn register_update(&self, uid: String, update: Update) -> Result { match self.index_resolver.get_uuid(uid).await { Ok(uuid) => { - let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; + let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?; Ok(update_result) } Err(IndexResolverError::UnexistingIndex(name)) => { let (uuid, _) = self.index_resolver.create_index(name, None).await?; - let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; + let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?; // ignore if index creation fails now, since it may already have been created Ok(update_result) @@ -389,13 +404,13 @@ impl IndexController { pub async fn update_status(&self, uid: String, id: u64) -> Result { let uuid = self.index_resolver.get_uuid(uid).await?; - let result = UpdateMsg::get_update(&self.update_handle, uuid, id).await?; + let result = UpdateMsg::get_update(&self.update_sender, uuid, id).await?; Ok(result) } pub async fn all_update_status(&self, uid: String) -> Result> { let uuid = self.index_resolver.get_uuid(uid).await?; - let result = UpdateMsg::list_updates(&self.update_handle, uuid).await?; + let result = UpdateMsg::list_updates(&self.update_sender, uuid).await?; Ok(result) } @@ -490,7 +505,7 @@ impl IndexController { } pub async fn get_index_stats(&self, uid: String) -> Result { - let update_infos = UpdateMsg::get_info(&self.update_handle).await?; + let update_infos = UpdateMsg::get_info(&self.update_sender).await?; let index = self.index_resolver.get_index(uid).await?; let uuid = index.uuid; let mut stats = spawn_blocking(move || index.stats()).await??; @@ -500,7 +515,7 @@ impl IndexController { } pub async fn get_all_stats(&self) -> Result { - let update_infos = UpdateMsg::get_info(&self.update_handle).await?; + let update_infos = UpdateMsg::get_info(&self.update_sender).await?; let mut database_size = self.get_uuids_size().await? + update_infos.size; let mut last_update: Option> = None; let mut indexes = BTreeMap::new(); diff --git a/meilisearch-lib/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs index 7c999fd74..132745c96 100644 --- a/meilisearch-lib/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -1,88 +1,94 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; use anyhow::bail; +use log::{error, info, trace}; +use tokio::task::spawn_blocking; +use tokio::time::sleep; +use tokio::fs; -//pub struct SnapshotService { - //uuid_resolver_handle: R, - //update_handle: U, - //snapshot_period: Duration, - //snapshot_path: PathBuf, - //db_name: String, -//} +use crate::index_controller::updates::UpdateMsg; -//impl SnapshotService -//where - //U: UpdateActorHandle, - //R: UuidResolverHandle, -//{ - //pub fn new( - //uuid_resolver_handle: R, - //update_handle: U, - //snapshot_period: Duration, - //snapshot_path: PathBuf, - //db_name: String, - //) -> Self { - //Self { - //uuid_resolver_handle, - //update_handle, - //snapshot_period, - //snapshot_path, - //db_name, - //} - //} +use super::updates::UpdateSender; +use super::index_resolver::HardStateIndexResolver; - //pub async fn run(self) { - //info!( - //"Snapshot scheduled every {}s.", - //self.snapshot_period.as_secs() - //); - //loop { - //if let Err(e) = self.perform_snapshot().await { - //error!("Error while performing snapshot: {}", e); - //} - //sleep(self.snapshot_period).await; - //} - //} +pub struct SnapshotService { + index_resolver: Arc, + update_sender: UpdateSender, + snapshot_period: Duration, + snapshot_path: PathBuf, + db_name: String, +} - //async fn perform_snapshot(&self) -> anyhow::Result<()> { - //trace!("Performing snapshot."); +impl SnapshotService { + pub fn new( + index_resolver: Arc, + update_sender: UpdateSender, + snapshot_period: Duration, + snapshot_path: PathBuf, + db_name: String, + ) -> Self { + Self { + index_resolver, + update_sender, + snapshot_period, + snapshot_path, + db_name, + } + } - //let snapshot_dir = self.snapshot_path.clone(); - //fs::create_dir_all(&snapshot_dir).await?; - //let temp_snapshot_dir = - //spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??; - //let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); + pub async fn run(self) { + info!( + "Snapshot scheduled every {}s.", + self.snapshot_period.as_secs() + ); + loop { + if let Err(e) = self.perform_snapshot().await { + error!("Error while performing snapshot: {}", e); + } + sleep(self.snapshot_period).await; + } + } - //let uuids = self - //.uuid_resolver_handle - //.snapshot(temp_snapshot_path.clone()) - //.await?; + async fn perform_snapshot(&self) -> anyhow::Result<()> { + trace!("Performing snapshot."); - //if uuids.is_empty() { - //return Ok(()); - //} + let snapshot_dir = self.snapshot_path.clone(); + fs::create_dir_all(&snapshot_dir).await?; + let temp_snapshot_dir = + spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??; + let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); - //self.update_handle - //.snapshot(uuids, temp_snapshot_path.clone()) - //.await?; - //let snapshot_dir = self.snapshot_path.clone(); - //let snapshot_path = self - //.snapshot_path - //.join(format!("{}.snapshot", self.db_name)); - //let snapshot_path = spawn_blocking(move || -> anyhow::Result { - //let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?; - //let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); - //compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; - //temp_snapshot_file.persist(&snapshot_path)?; - //Ok(snapshot_path) - //}) - //.await??; + let indexes = self + .index_resolver + .snapshot(temp_snapshot_path.clone()) + .await?; - //trace!("Created snapshot in {:?}.", snapshot_path); + if indexes.is_empty() { + return Ok(()); + } - //Ok(()) - //} -//} + UpdateMsg::snapshot(&self.update_sender, temp_snapshot_path.clone(), indexes).await?; + + let snapshot_dir = self.snapshot_path.clone(); + let snapshot_path = self + .snapshot_path + .join(format!("{}.snapshot", self.db_name)); + let snapshot_path = spawn_blocking(move || -> anyhow::Result { + let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?; + let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); + crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; + temp_snapshot_file.persist(&snapshot_path)?; + Ok(snapshot_path) + }) + .await??; + + trace!("Created snapshot in {:?}.", snapshot_path); + + Ok(()) + } +} pub fn load_snapshot( db_path: impl AsRef, @@ -94,7 +100,7 @@ pub fn load_snapshot( match crate::from_tar_gz(snapshot_path, &db_path) { Ok(()) => Ok(()), Err(e) => { - // clean created db folder + //clean created db folder std::fs::remove_dir_all(&db_path)?; Err(e) } @@ -120,140 +126,140 @@ pub fn load_snapshot( } } -#[cfg(test)] -mod test { - use std::iter::FromIterator; - use std::{collections::HashSet, sync::Arc}; +//#[cfg(test)] +//mod test { + //use std::iter::FromIterator; + //use std::{collections::HashSet, sync::Arc}; - use futures::future::{err, ok}; - use rand::Rng; - use tokio::time::timeout; - use uuid::Uuid; + //use futures::future::{err, ok}; + //use rand::Rng; + //use tokio::time::timeout; + //use uuid::Uuid; - use super::*; - use crate::index_controller::index_actor::MockIndexActorHandle; - use crate::index_controller::updates::{ - error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl, - }; - use crate::index_controller::uuid_resolver::{ - error::UuidResolverError, MockUuidResolverHandle, - }; + //use super::*; + //use crate::index_controller::index_actor::MockIndexActorHandle; + //use crate::index_controller::updates::{ + //error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl, + //}; + //use crate::index_controller::uuid_resolver::{ + //error::UuidResolverError, MockUuidResolverHandle, + //}; - #[actix_rt::test] - async fn test_normal() { - let mut rng = rand::thread_rng(); - let uuids_num: usize = rng.gen_range(5..10); - let uuids = (0..uuids_num) - .map(|_| Uuid::new_v4()) - .collect::>(); + //#[actix_rt::test] + //async fn test_normal() { + //let mut rng = rand::thread_rng(); + //let uuids_num: usize = rng.gen_range(5..10); + //let uuids = (0..uuids_num) + //.map(|_| Uuid::new_v4()) + //.collect::>(); - let mut uuid_resolver = MockUuidResolverHandle::new(); - let uuids_clone = uuids.clone(); - uuid_resolver - .expect_snapshot() - .times(1) - .returning(move |_| Box::pin(ok(uuids_clone.clone()))); + //let mut uuid_resolver = MockUuidResolverHandle::new(); + //let uuids_clone = uuids.clone(); + //uuid_resolver + //.expect_snapshot() + //.times(1) + //.returning(move |_| Box::pin(ok(uuids_clone.clone()))); - let uuids_clone = uuids.clone(); - let mut index_handle = MockIndexActorHandle::new(); - index_handle - .expect_snapshot() - .withf(move |uuid, _path| uuids_clone.contains(uuid)) - .times(uuids_num) - .returning(move |_, _| Box::pin(ok(()))); + //let uuids_clone = uuids.clone(); + //let mut index_handle = MockIndexActorHandle::new(); + //index_handle + //.expect_snapshot() + //.withf(move |uuid, _path| uuids_clone.contains(uuid)) + //.times(uuids_num) + //.returning(move |_, _| Box::pin(ok(()))); - let dir = tempfile::tempdir_in(".").unwrap(); - let handle = Arc::new(index_handle); - let update_handle = - UpdateActorHandleImpl::>::new(handle.clone(), dir.path(), 4096 * 100).unwrap(); + //let dir = tempfile::tempdir_in(".").unwrap(); + //let handle = Arc::new(index_handle); + //let update_handle = + //UpdateActorHandleImpl::>::new(handle.clone(), dir.path(), 4096 * 100).unwrap(); - let snapshot_path = tempfile::tempdir_in(".").unwrap(); - let snapshot_service = SnapshotService::new( - uuid_resolver, - update_handle, - Duration::from_millis(100), - snapshot_path.path().to_owned(), - "data.ms".to_string(), - ); + //let snapshot_path = tempfile::tempdir_in(".").unwrap(); + //let snapshot_service = SnapshotService::new( + //uuid_resolver, + //update_handle, + //Duration::from_millis(100), + //snapshot_path.path().to_owned(), + //"data.ms".to_string(), + //); - snapshot_service.perform_snapshot().await.unwrap(); - } + //snapshot_service.perform_snapshot().await.unwrap(); + //} - #[actix_rt::test] - async fn error_performing_uuid_snapshot() { - let mut uuid_resolver = MockUuidResolverHandle::new(); - uuid_resolver - .expect_snapshot() - .times(1) - // abitrary error - .returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); + //#[actix_rt::test] + //async fn error_performing_uuid_snapshot() { + //let mut uuid_resolver = MockUuidResolverHandle::new(); + //uuid_resolver + //.expect_snapshot() + //.times(1) + //abitrary error + //.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); - let update_handle = MockUpdateActorHandle::new(); + //let update_handle = MockUpdateActorHandle::new(); - let snapshot_path = tempfile::tempdir_in(".").unwrap(); - let snapshot_service = SnapshotService::new( - uuid_resolver, - update_handle, - Duration::from_millis(100), - snapshot_path.path().to_owned(), - "data.ms".to_string(), - ); + //let snapshot_path = tempfile::tempdir_in(".").unwrap(); + //let snapshot_service = SnapshotService::new( + //uuid_resolver, + //update_handle, + //Duration::from_millis(100), + //snapshot_path.path().to_owned(), + //"data.ms".to_string(), + //); - assert!(snapshot_service.perform_snapshot().await.is_err()); - // Nothing was written to the file - assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); - } + //assert!(snapshot_service.perform_snapshot().await.is_err()); + //Nothing was written to the file + //assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); + //} - #[actix_rt::test] - async fn error_performing_index_snapshot() { - let uuid = Uuid::new_v4(); - let mut uuid_resolver = MockUuidResolverHandle::new(); - uuid_resolver - .expect_snapshot() - .times(1) - .returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid))))); + //#[actix_rt::test] + //async fn error_performing_index_snapshot() { + //let uuid = Uuid::new_v4(); + //let mut uuid_resolver = MockUuidResolverHandle::new(); + //uuid_resolver + //.expect_snapshot() + //.times(1) + //.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid))))); - let mut update_handle = MockUpdateActorHandle::new(); - update_handle - .expect_snapshot() - // abitrary error - .returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0)))); + //let mut update_handle = MockUpdateActorHandle::new(); + //update_handle + //.expect_snapshot() + //abitrary error + //.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0)))); - let snapshot_path = tempfile::tempdir_in(".").unwrap(); - let snapshot_service = SnapshotService::new( - uuid_resolver, - update_handle, - Duration::from_millis(100), - snapshot_path.path().to_owned(), - "data.ms".to_string(), - ); + //let snapshot_path = tempfile::tempdir_in(".").unwrap(); + //let snapshot_service = SnapshotService::new( + //uuid_resolver, + //update_handle, + //Duration::from_millis(100), + //snapshot_path.path().to_owned(), + //"data.ms".to_string(), + //); - assert!(snapshot_service.perform_snapshot().await.is_err()); - // Nothing was written to the file - assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); - } + //assert!(snapshot_service.perform_snapshot().await.is_err()); + //Nothing was written to the file + //assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); + //} - #[actix_rt::test] - async fn test_loop() { - let mut uuid_resolver = MockUuidResolverHandle::new(); - uuid_resolver - .expect_snapshot() - // we expect the funtion to be called between 2 and 3 time in the given interval. - .times(2..4) - // abitrary error, to short-circuit the function - .returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); + //#[actix_rt::test] + //async fn test_loop() { + //let mut uuid_resolver = MockUuidResolverHandle::new(); + //uuid_resolver + //.expect_snapshot() + //we expect the funtion to be called between 2 and 3 time in the given interval. + //.times(2..4) + //abitrary error, to short-circuit the function + //.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); - let update_handle = MockUpdateActorHandle::new(); + //let update_handle = MockUpdateActorHandle::new(); - let snapshot_path = tempfile::tempdir_in(".").unwrap(); - let snapshot_service = SnapshotService::new( - uuid_resolver, - update_handle, - Duration::from_millis(100), - snapshot_path.path().to_owned(), - "data.ms".to_string(), - ); + //let snapshot_path = tempfile::tempdir_in(".").unwrap(); + //let snapshot_service = SnapshotService::new( + //uuid_resolver, + //update_handle, + //Duration::from_millis(100), + //snapshot_path.path().to_owned(), + //"data.ms".to_string(), + //); - let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await; - } -} + //let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await; + //} +//} diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index 1c60bcec9..f21560f73 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -2,9 +2,13 @@ use std::fs::File; use std::path::{Path, PathBuf}; use std::ops::{Deref, DerefMut}; +//use milli::documents::DocumentBatchReader; +//use serde_json::Map; use tempfile::NamedTempFile; use uuid::Uuid; +const UPDATE_FILES_PATH: &str = "updates/updates_files"; + use super::error::Result; pub struct UpdateFile { @@ -14,7 +18,6 @@ pub struct UpdateFile { impl UpdateFile { pub fn persist(self) { - println!("persisting in {}", self.path.display()); self.file.persist(&self.path).unwrap(); } } @@ -40,11 +43,14 @@ pub struct UpdateFileStore { impl UpdateFileStore { pub fn new(path: impl AsRef) -> Result { - let path = path.as_ref().join("updates/updates_files"); + let path = path.as_ref().join(UPDATE_FILES_PATH); std::fs::create_dir_all(&path).unwrap(); Ok(Self { path }) } + /// Created a new temporary update file. + /// + /// A call to persist is needed to persist in the database. pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { let file = NamedTempFile::new().unwrap(); let uuid = Uuid::new_v4(); @@ -54,10 +60,45 @@ impl UpdateFileStore { Ok((uuid, update_file)) } + /// Returns a the file corresponding to the requested uuid. pub fn get_update(&self, uuid: Uuid) -> Result { let path = self.path.join(uuid.to_string()); - println!("reading in {}", path.display()); let file = File::open(path).unwrap(); Ok(file) } + + /// Copies the content of the update file poited to by uuid to dst directory. + pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { + let src = self.path.join(uuid.to_string()); + let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); + std::fs::create_dir_all(&dst).unwrap(); + dst.push(uuid.to_string()); + std::fs::copy(src, dst).unwrap(); + Ok(()) + } + + /// Peform a dump of the given update file uuid into the provided snapshot path. + pub fn dump(&self, _uuid: Uuid, _snapshot_path: impl AsRef) -> Result<()> { + todo!() + //let update_file_path = self.path.join(uuid.to_string()); + //let snapshot_file_path: snapshot_path.as_ref().join(format!("update_files/uuid", uuid)); + + //let update_file = File::open(update_file_path).unwrap(); + + + //let mut document_reader = DocumentBatchReader::from_reader(update_file).unwrap(); + + //let mut document_buffer = Map::new(); + //// TODO: we need to find a way to do this more efficiently. (create a custom serializer to + //// jsonl for example...) + //while let Some((index, document)) = document_reader.next_document_with_index().unwrap() { + //for (field_id, content) in document.iter() { + //let field_name = index.get_by_left(&field_id).unwrap(); + //let content = serde_json::from_slice(content).unwrap(); + //document_buffer.insert(field_name.to_string(), content); + //} + + //} + //Ok(()) + } } diff --git a/meilisearch-lib/src/index_controller/updates/message.rs b/meilisearch-lib/src/index_controller/updates/message.rs index 09dc7443a..f96c707fd 100644 --- a/meilisearch-lib/src/index_controller/updates/message.rs +++ b/meilisearch-lib/src/index_controller/updates/message.rs @@ -4,6 +4,8 @@ use std::path::PathBuf; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; +use crate::index::Index; + use super::error::Result; use super::{Update, UpdateStatus, UpdateStoreInfo}; @@ -28,7 +30,7 @@ pub enum UpdateMsg { ret: oneshot::Sender>, }, Snapshot { - uuids: HashSet, + indexes: Vec, path: PathBuf, ret: oneshot::Sender>, }, @@ -43,17 +45,20 @@ pub enum UpdateMsg { } impl UpdateMsg { + pub async fn snapshot(sender: &mpsc::Sender, path: PathBuf, indexes: Vec) -> Result<()> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::Snapshot { path, indexes, ret }; + sender.send(msg).await?; + rcv.await? + } + pub async fn dump( sender: &mpsc::Sender, uuids: HashSet, path: PathBuf, ) -> Result<()> { let (ret, rcv) = oneshot::channel(); - let msg = Self::Dump { - path, - uuids, - ret, - }; + let msg = Self::Dump { path, uuids, ret }; sender.send(msg).await?; rcv.await? } @@ -63,11 +68,7 @@ impl UpdateMsg { update: Update, ) -> Result { let (ret, rcv) = oneshot::channel(); - let msg = Self::Update { - uuid, - update, - ret, - }; + let msg = Self::Update { uuid, update, ret }; sender.send(msg).await?; rcv.await? } @@ -78,11 +79,7 @@ impl UpdateMsg { id: u64, ) -> Result { let (ret, rcv) = oneshot::channel(); - let msg = Self::GetUpdate { - uuid, - id, - ret, - }; + let msg = Self::GetUpdate { uuid, id, ret }; sender.send(msg).await?; rcv.await? } @@ -92,21 +89,14 @@ impl UpdateMsg { uuid: Uuid, ) -> Result> { let (ret, rcv) = oneshot::channel(); - let msg = Self::ListUpdates { - uuid, - ret, - }; + let msg = Self::ListUpdates { uuid, ret }; sender.send(msg).await?; rcv.await? } - pub async fn get_info( - sender: &mpsc::Sender, - ) -> Result { + pub async fn get_info(sender: &mpsc::Sender) -> Result { let (ret, rcv) = oneshot::channel(); - let msg = Self::GetInfo { - ret, - }; + let msg = Self::GetInfo { ret }; sender.send(msg).await?; rcv.await? } diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 2027f5245..63716928f 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -24,7 +24,7 @@ use uuid::Uuid; use self::error::{Result, UpdateLoopError}; pub use self::message::UpdateMsg; use self::store::{UpdateStore, UpdateStoreInfo}; -use crate::index::{Settings, Unchecked}; +use crate::index::{Index, Settings, Unchecked}; use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; @@ -123,12 +123,11 @@ impl UpdateLoop { let must_exit = Arc::new(AtomicBool::new(false)); - let store = UpdateStore::open(options, &path, index_resolver.clone(), must_exit.clone())?; + let update_file_store = UpdateFileStore::new(&path).unwrap(); + let store = UpdateStore::open(options, &path, index_resolver.clone(), must_exit.clone(), update_file_store.clone())?; let inbox = Some(inbox); - let update_file_store = UpdateFileStore::new(&path).unwrap(); - Ok(Self { store, inbox, @@ -179,8 +178,8 @@ impl UpdateLoop { Delete { uuid, ret } => { let _ = ret.send(self.handle_delete(uuid).await); } - Snapshot { uuids, path, ret } => { - let _ = ret.send(self.handle_snapshot(uuids, path).await); + Snapshot { indexes, path, ret } => { + let _ = ret.send(self.handle_snapshot(indexes, path).await); } GetInfo { ret } => { let _ = ret.send(self.handle_get_info().await); @@ -270,15 +269,13 @@ impl UpdateLoop { Ok(()) } - async fn handle_snapshot(&self, _uuids: HashSet,_pathh: PathBuf) -> Result<()> { - todo!() - //let index_handle = self.index_resolver.clone(); - //let update_store = self.store.clone(); + async fn handle_snapshot(&self, indexes: Vec, path: PathBuf) -> Result<()> { + let update_store = self.store.clone(); - //tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle)) - //.await??; + tokio::task::spawn_blocking(move || update_store.snapshot(indexes, path)) + .await??; - //Ok(()) + Ok(()) } async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { diff --git a/meilisearch-lib/src/index_controller/updates/store/dump.rs b/meilisearch-lib/src/index_controller/updates/store/dump.rs index cf5d7e842..996bc3432 100644 --- a/meilisearch-lib/src/index_controller/updates/store/dump.rs +++ b/meilisearch-lib/src/index_controller/updates/store/dump.rs @@ -45,16 +45,17 @@ impl UpdateStore { uuids: &HashSet, path: impl AsRef, ) -> Result<()> { - let dump_data_path = path.as_ref().join("data.jsonl"); - let mut dump_data_file = File::create(dump_data_path)?; + //let dump_data_path = path.as_ref().join("data.jsonl"); + //let mut dump_data_file = File::create(dump_data_path)?; - let update_files_path = path.as_ref().join(super::UPDATE_DIR); - create_dir_all(&update_files_path)?; + //let update_files_path = path.as_ref().join(super::UPDATE_DIR); + //create_dir_all(&update_files_path)?; - self.dump_pending(txn, uuids, &mut dump_data_file, &path)?; - self.dump_completed(txn, uuids, &mut dump_data_file)?; + //self.dump_pending(txn, uuids, &mut dump_data_file, &path)?; + //self.dump_completed(txn, uuids, &mut dump_data_file)?; - Ok(()) + //Ok(()) + todo!() } fn dump_pending( diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index 8d40d8309..b7bf1b457 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -22,6 +22,7 @@ use tokio::sync::mpsc; use tokio::sync::mpsc::error::TrySendError; use tokio::time::timeout; use uuid::Uuid; +use rayon::prelude::*; use codec::*; @@ -31,12 +32,11 @@ use super::status::{Enqueued, Processing}; use crate::EnvSizer; use crate::index_controller::update_files_path; use crate::index_controller::updates::*; +use crate::index::Index; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; -const UPDATE_DIR: &str = "update_files"; - #[derive(Debug)] pub struct UpdateStoreInfo { /// Size of the update store in bytes. @@ -108,6 +108,7 @@ pub struct UpdateStore { state: Arc, /// Wake up the loop when a new event occurs. notification_sender: mpsc::Sender<()>, + update_file_store: UpdateFileStore, path: PathBuf, } @@ -115,6 +116,7 @@ impl UpdateStore { fn new( mut options: EnvOpenOptions, path: impl AsRef, + update_file_store: UpdateFileStore, ) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { options.max_dbs(5); @@ -138,6 +140,7 @@ impl UpdateStore { state, notification_sender, path: path.as_ref().to_owned(), + update_file_store, }, notification_receiver, )) @@ -148,8 +151,9 @@ impl UpdateStore { path: impl AsRef, index_resolver: Arc, must_exit: Arc, + update_file_store: UpdateFileStore, ) -> anyhow::Result> { - let (update_store, mut notification_receiver) = Self::new(options, path)?; + let (update_store, mut notification_receiver) = Self::new(options, path, update_file_store)?; let update_store = Arc::new(update_store); // Send a first notification to trigger the process. @@ -482,13 +486,13 @@ impl UpdateStore { pub fn snapshot( &self, - _uuids: &HashSet, + indexes: Vec, path: impl AsRef, - handle: Arc, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Snapshoting); + let txn = self.env.write_txn()?; let update_path = path.as_ref().join("updates"); @@ -501,42 +505,28 @@ impl UpdateStore { // create db snapshot self.env.copy_to_path(&db_path, CompactionOption::Enabled)?; - let update_files_path = update_path.join(UPDATE_DIR); - create_dir_all(&update_files_path)?; - let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); + let uuids: HashSet<_> = indexes.iter().map(|i| i.uuid).collect(); for entry in pendings { - let ((_, _uuid, _), _pending) = entry?; - //if uuids.contains(&uuid) { - //if let Enqueued { - //content: Some(uuid), - //.. - //} = pending.decode()? - //{ - //let path = update_uuid_to_file_path(&self.path, uuid); - //copy(path, &update_files_path)?; - //} - //} + let ((_, uuid, _), pending) = entry?; + if uuids.contains(&uuid) { + if let Enqueued { + meta: RegisterUpdate::DocumentAddition { + content_uuid, .. + }, + .. + } = pending.decode()? + { + self.update_file_store.snapshot(content_uuid, &path).unwrap(); + } + } } - let _path = &path.as_ref().to_path_buf(); - let _handle = &handle; - // Perform the snapshot of each index concurently. Only a third of the capabilities of - // the index actor at a time not to put too much pressure on the index actor - todo!() - //let mut stream = futures::stream::iter(uuids.iter()) - //.map(move |uuid| IndexMsg::snapshot(handle,*uuid, path.clone())) - //.buffer_unordered(CONCURRENT_INDEX_MSG / 3); + let path = path.as_ref().to_owned(); + indexes.par_iter().try_for_each(|index| index.snapshot(&path)).unwrap(); - //Handle::current().block_on(async { - //while let Some(res) = stream.next().await { - //res?; - //} - //Ok(()) as Result<()> - //})?; - - //Ok(()) + Ok(()) } pub fn get_info(&self) -> Result { diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 64f93695e..23538099c 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -7,6 +7,8 @@ pub mod index_controller; pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate}; +mod compression; + use walkdir::WalkDir; pub trait EnvSizer { From 6a1964f146e217f9d8dfa3365ce8169f75c82463 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 11:59:55 +0200 Subject: [PATCH 12/37] restore dumps --- meilisearch-lib/src/compression.rs | 6 +- meilisearch-lib/src/document_formats.rs | 52 ++++++ meilisearch-lib/src/index/dump.rs | 174 ++++++++++-------- meilisearch-lib/src/index/updates.rs | 110 +++++------ .../index_controller/dump_actor/loaders/v2.rs | 16 +- .../src/index_controller/dump_actor/mod.rs | 3 +- .../index_controller/index_resolver/mod.rs | 36 +++- .../src/index_controller/update_file_store.rs | 71 +++++-- .../src/index_controller/updates/message.rs | 7 +- .../src/index_controller/updates/mod.rs | 12 +- .../index_controller/updates/store/dump.rs | 164 +++++++---------- .../src/index_controller/updates/store/mod.rs | 44 ++--- meilisearch-lib/src/lib.rs | 1 + 13 files changed, 395 insertions(+), 301 deletions(-) create mode 100644 meilisearch-lib/src/document_formats.rs diff --git a/meilisearch-lib/src/compression.rs b/meilisearch-lib/src/compression.rs index f9620eb2a..cd60854c6 100644 --- a/meilisearch-lib/src/compression.rs +++ b/meilisearch-lib/src/compression.rs @@ -1,9 +1,9 @@ -use std::fs::{create_dir_all, File}; +use std::fs::File; use std::io::Write; use std::path::Path; -use flate2::{read::GzDecoder, write::GzEncoder, Compression}; -use tar::{Archive, Builder}; +use flate2::{write::GzEncoder, Compression}; +use tar::Builder; pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { let mut f = File::create(dest)?; diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs new file mode 100644 index 000000000..8540ce4b2 --- /dev/null +++ b/meilisearch-lib/src/document_formats.rs @@ -0,0 +1,52 @@ +use std::{fmt, io::{Read, Seek, Write}}; + +use milli::documents::DocumentBatchBuilder; +use serde_json::{Deserializer, Map, Value}; + +type Result = std::result::Result; + +#[derive(Debug)] +pub enum PayloadType { + Jsonl, +} + +impl fmt::Display for PayloadType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PayloadType::Jsonl => write!(f, "ndjson"), + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum DocumentFormatError { + #[error("Internal error: {0}")] + Internal(Box), + #[error("{0}. The {1} payload provided is malformed.")] + MalformedPayload(Box, PayloadType), +} + +internal_error!( + DocumentFormatError: milli::documents::Error +); + +macro_rules! malformed { + ($type:path, $e:expr) => { + $e.map_err(|e| DocumentFormatError::MalformedPayload(Box::new(e), $type)) + }; +} + +/// read jsonl from input and write an obkv batch to writer. +pub fn read_jsonl(input: impl Read, writer: impl Write + Seek) -> Result<()> { + let mut builder = DocumentBatchBuilder::new(writer)?; + let stream = Deserializer::from_reader(input).into_iter::>(); + + for value in stream { + let value = malformed!(PayloadType::Jsonl, value)?; + builder.add_documents(&value)?; + } + + builder.finish()?; + + Ok(()) +} diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index 018ae6d2f..8049df500 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -1,12 +1,18 @@ -use std::fs::File; -use std::io::Write; +use std::fs::{create_dir_all, File}; +use std::io::{BufReader, Seek, SeekFrom, Write}; use std::path::Path; -use heed::RoTxn; +use anyhow::Context; +use heed::{EnvOpenOptions, RoTxn}; use indexmap::IndexMap; +use milli::documents::DocumentBatchReader; use serde::{Deserialize, Serialize}; +use serde_json::Value; -use crate::options::IndexerOpts; +use crate::document_formats::read_jsonl; +use crate::index::update_handler::UpdateHandler; +use crate::index::updates::apply_settings_to_builder; +use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use super::error::Result; use super::{Index, Settings, Unchecked}; @@ -24,6 +30,11 @@ impl Index { pub fn dump(&self, path: impl AsRef) -> Result<()> { // acquire write txn make sure any ongoing write is finished before we start. let txn = self.env.write_txn()?; + let path = path + .as_ref() + .join(format!("indexes/{}", self.uuid.to_string())); + + create_dir_all(&path)?; self.dump_documents(&txn, &path)?; self.dump_meta(&txn, &path)?; @@ -75,92 +86,101 @@ impl Index { } pub fn load_dump( - _src: impl AsRef, - _dst: impl AsRef, - _size: usize, - _indexing_options: &IndexerOpts, + src: impl AsRef, + dst: impl AsRef, + size: usize, + update_handler: &UpdateHandler, ) -> anyhow::Result<()> { - //let dir_name = src - //.as_ref() - //.file_name() - //.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; + let dir_name = src + .as_ref() + .file_name() + .with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; - //let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); - //create_dir_all(&dst_dir_path)?; + let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); + create_dir_all(&dst_dir_path)?; - //let meta_path = src.as_ref().join(META_FILE_NAME); - //let mut meta_file = File::open(meta_path)?; + let meta_path = src.as_ref().join(META_FILE_NAME); + let mut meta_file = File::open(meta_path)?; - //// We first deserialize the dump meta into a serde_json::Value and change - //// the custom ranking rules settings from the old format to the new format. - //let mut meta: Value = serde_json::from_reader(&mut meta_file)?; - //if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { - //convert_custom_ranking_rules(ranking_rules); - //} + // We first deserialize the dump meta into a serde_json::Value and change + // the custom ranking rules settings from the old format to the new format. + let mut meta: Value = serde_json::from_reader(&mut meta_file)?; + if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { + convert_custom_ranking_rules(ranking_rules); + } - //// Then we serialize it back into a vec to deserialize it - //// into a `DumpMeta` struct with the newly patched `rankingRules` format. - //let patched_meta = serde_json::to_vec(&meta)?; + // Then we serialize it back into a vec to deserialize it + // into a `DumpMeta` struct with the newly patched `rankingRules` format. + let patched_meta = serde_json::to_vec(&meta)?; - //let DumpMeta { - //settings, - //primary_key, - //} = serde_json::from_slice(&patched_meta)?; - //let settings = settings.check(); - //let index = Self::open(&dst_dir_path, size)?; - //let mut txn = index.write_txn()?; + let DumpMeta { + settings, + primary_key, + } = serde_json::from_slice(&patched_meta)?; + let settings = settings.check(); - //let handler = UpdateHandler::new(indexing_options)?; + let mut options = EnvOpenOptions::new(); + options.map_size(size); + let index = milli::Index::new(options, &dst_dir_path)?; - //index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?; + let mut txn = index.write_txn()?; - //let document_file_path = src.as_ref().join(DATA_FILE_NAME); - //let reader = File::open(&document_file_path)?; - //let mut reader = BufReader::new(reader); - //reader.fill_buf()?; - // If the document file is empty, we don't perform the document addition, to prevent - // a primary key error to be thrown. + // Apply settings first + let builder = update_handler.update_builder(0); + let mut builder = builder.settings(&mut txn, &index); - todo!("fix obk document dumps") - //if !reader.buffer().is_empty() { - //index.update_documents_txn( - //&mut txn, - //IndexDocumentsMethod::UpdateDocuments, - //Some(reader), - //handler.update_builder(0), - //primary_key.as_deref(), - //)?; - //} + if let Some(primary_key) = primary_key { + builder.set_primary_key(primary_key); + } - //txn.commit()?; + apply_settings_to_builder(&settings, &mut builder); - //match Arc::try_unwrap(index.0) { - //Ok(inner) => inner.prepare_for_closing().wait(), - //Err(_) => bail!("Could not close index properly."), - //} + builder.execute(|_, _| ())?; - //Ok(()) + let document_file_path = src.as_ref().join(DATA_FILE_NAME); + let reader = BufReader::new(File::open(&document_file_path)?); + + let mut tmp_doc_file = tempfile::tempfile()?; + + read_jsonl(reader, &mut tmp_doc_file)?; + + tmp_doc_file.seek(SeekFrom::Start(0))?; + + let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?; + + //If the document file is empty, we don't perform the document addition, to prevent + //a primary key error to be thrown. + if !documents_reader.is_empty() { + let builder = update_handler.update_builder(0).index_documents(&mut txn, &index); + builder.execute(documents_reader, |_, _| ())?; + } + + txn.commit()?; + + index.prepare_for_closing().wait(); + + Ok(()) } } -// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. -// /// -// /// This is done for compatibility reasons, and to avoid a new dump version, -// /// since the new syntax was introduced soon after the new dump version. -//fn convert_custom_ranking_rules(ranking_rules: &mut Value) { - //*ranking_rules = match ranking_rules.take() { - //Value::Array(values) => values - //.into_iter() - //.filter_map(|value| match value { - //Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s) - //.map(|f| format!("{}:asc", f)) - //.map(Value::String), - //Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s) - //.map(|f| format!("{}:desc", f)) - //.map(Value::String), - //otherwise => Some(otherwise), - //}) - //.collect(), - //otherwise => otherwise, - //} -//} +/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. +/// +/// This is done for compatibility reasons, and to avoid a new dump version, +/// since the new syntax was introduced soon after the new dump version. +fn convert_custom_ranking_rules(ranking_rules: &mut Value) { + *ranking_rules = match ranking_rules.take() { + Value::Array(values) => values + .into_iter() + .filter_map(|value| match value { + Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s) + .map(|f| format!("{}:asc", f)) + .map(Value::String), + Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s) + .map(|f| format!("{}:desc", f)) + .map(Value::String), + otherwise => Some(otherwise), + }) + .collect(), + otherwise => otherwise, + } +} diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 28d2734f0..44558fdae 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -266,59 +266,7 @@ impl Index { // We must use the write transaction of the update here. let mut builder = update_builder.settings(txn, self); - match settings.searchable_attributes { - Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), - Setting::Reset => builder.reset_searchable_fields(), - Setting::NotSet => (), - } - - match settings.displayed_attributes { - Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), - Setting::Reset => builder.reset_displayed_fields(), - Setting::NotSet => (), - } - - match settings.filterable_attributes { - Setting::Set(ref facets) => { - builder.set_filterable_fields(facets.clone().into_iter().collect()) - } - Setting::Reset => builder.reset_filterable_fields(), - Setting::NotSet => (), - } - - match settings.sortable_attributes { - Setting::Set(ref fields) => { - builder.set_sortable_fields(fields.iter().cloned().collect()) - } - Setting::Reset => builder.reset_sortable_fields(), - Setting::NotSet => (), - } - - match settings.ranking_rules { - Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), - Setting::Reset => builder.reset_criteria(), - Setting::NotSet => (), - } - - match settings.stop_words { - Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), - Setting::Reset => builder.reset_stop_words(), - Setting::NotSet => (), - } - - match settings.synonyms { - Setting::Set(ref synonyms) => { - builder.set_synonyms(synonyms.clone().into_iter().collect()) - } - Setting::Reset => builder.reset_synonyms(), - Setting::NotSet => (), - } - - match settings.distinct_attribute { - Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), - Setting::Reset => builder.reset_distinct_field(), - Setting::NotSet => (), - } + apply_settings_to_builder(settings, &mut builder); builder.execute(|indexing_step, update_id| { debug!("update {}: {:?}", update_id, indexing_step) @@ -328,6 +276,62 @@ impl Index { } } +pub fn apply_settings_to_builder(settings: &Settings, builder: &mut milli::update::Settings) { + match settings.searchable_attributes { + Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), + Setting::Reset => builder.reset_searchable_fields(), + Setting::NotSet => (), + } + + match settings.displayed_attributes { + Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), + Setting::Reset => builder.reset_displayed_fields(), + Setting::NotSet => (), + } + + match settings.filterable_attributes { + Setting::Set(ref facets) => { + builder.set_filterable_fields(facets.clone().into_iter().collect()) + } + Setting::Reset => builder.reset_filterable_fields(), + Setting::NotSet => (), + } + + match settings.sortable_attributes { + Setting::Set(ref fields) => { + builder.set_sortable_fields(fields.iter().cloned().collect()) + } + Setting::Reset => builder.reset_sortable_fields(), + Setting::NotSet => (), + } + + match settings.ranking_rules { + Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), + Setting::Reset => builder.reset_criteria(), + Setting::NotSet => (), + } + + match settings.stop_words { + Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), + Setting::Reset => builder.reset_stop_words(), + Setting::NotSet => (), + } + + match settings.synonyms { + Setting::Set(ref synonyms) => { + builder.set_synonyms(synonyms.clone().into_iter().collect()) + } + Setting::Reset => builder.reset_synonyms(), + Setting::NotSet => (), + } + + match settings.distinct_attribute { + Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), + Setting::Reset => builder.reset_distinct_field(), + Setting::NotSet => (), + } +} + #[cfg(test)] mod test { use super::*; diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs index 94b7321ae..8280e9613 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v2.rs @@ -4,8 +4,8 @@ use chrono::{DateTime, Utc}; use log::info; use serde::{Deserialize, Serialize}; -use crate::index::Index; -use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; +use crate::index_controller::index_resolver::IndexResolver; +use crate::index_controller::update_file_store::UpdateFileStore; use crate::index_controller::updates::store::UpdateStore; use crate::options::IndexerOpts; @@ -41,19 +41,11 @@ impl MetadataV2 { self.dump_date, self.db_version ); - info!("Loading index database."); - HeedUuidStore::load_dump(src.as_ref(), &dst)?; - - info!("Loading updates."); + IndexResolver::load_dump(src.as_ref(), &dst, index_db_size, indexing_options)?; + UpdateFileStore::load_dump(src.as_ref(), &dst)?; UpdateStore::load_dump(&src, &dst, update_db_size)?; info!("Loading indexes."); - let indexes_path = src.as_ref().join("indexes"); - let indexes = indexes_path.read_dir()?; - for index in indexes { - let index = index?; - Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?; - } Ok(()) } diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index b7c61f568..c2410107d 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -115,6 +115,7 @@ pub fn load_dump( let tmp_src = tempfile::tempdir_in(".")?; let tmp_src_path = tmp_src.path(); + println!("importing to {}", dst_path.as_ref().display()); crate::from_tar_gz(&src_path, tmp_src_path)?; let meta_path = tmp_src_path.join(META_FILE_NAME); @@ -179,7 +180,7 @@ impl DumpTask { let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?; - UpdateMsg::dump(&self.update_handle, uuids.into_iter().collect(), temp_dump_path.clone()).await?; + UpdateMsg::dump(&self.update_handle, uuids, temp_dump_path.clone()).await?; let dump_path = tokio::task::spawn_blocking(move || -> Result { let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index 5721fce8a..dcd1ed512 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -1,6 +1,5 @@ pub mod uuid_store; mod index_store; -//mod message; pub mod error; use std::path::Path; @@ -10,7 +9,7 @@ use uuid_store::{UuidStore, HeedUuidStore}; use index_store::{IndexStore, MapIndexStore}; use error::{Result, IndexResolverError}; -use crate::{index::Index, options::IndexerOpts}; +use crate::{index::{Index, update_handler::UpdateHandler}, options::IndexerOpts}; pub type HardStateIndexResolver = IndexResolver; @@ -25,6 +24,28 @@ pub struct IndexResolver { index_store: I, } +impl IndexResolver { + pub fn load_dump( + src: impl AsRef, + dst: impl AsRef, + index_db_size: usize, + indexer_opts: &IndexerOpts, + ) -> anyhow::Result<()> { + HeedUuidStore::load_dump(&src, &dst)?; + + let indexes_path = src.as_ref().join("indexes"); + let indexes = indexes_path.read_dir()?; + + let update_handler = UpdateHandler::new(indexer_opts).unwrap(); + for index in indexes { + let index = index?; + Index::load_dump(&index.path(), &dst, index_db_size, &update_handler)?; + } + + Ok(()) + } +} + impl IndexResolver where U: UuidStore, I: IndexStore, @@ -39,8 +60,14 @@ where U: UuidStore, } } - pub async fn dump(&self, _path: impl AsRef) -> Result> { - todo!() + pub async fn dump(&self, path: impl AsRef) -> Result> { + let uuids = self.index_uuid_store.dump(path.as_ref().to_owned()).await?; + let mut indexes = Vec::new(); + for uuid in uuids { + indexes.push(self.get_index_by_uuid(uuid).await?); + } + + Ok(indexes) } pub async fn get_size(&self) -> Result { @@ -51,7 +78,6 @@ where U: UuidStore, pub async fn snapshot(&self, path: impl AsRef) -> Result> { let uuids = self.index_uuid_store.snapshot(path.as_ref().to_owned()).await?; let mut indexes = Vec::new(); - for uuid in uuids { indexes.push(self.get_index_by_uuid(uuid).await?); } diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index f21560f73..d7b3e2560 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -1,14 +1,17 @@ -use std::fs::File; +use std::fs::{File, create_dir_all}; +use std::io::{BufReader, BufWriter, Write}; use std::path::{Path, PathBuf}; use std::ops::{Deref, DerefMut}; -//use milli::documents::DocumentBatchReader; -//use serde_json::Map; +use milli::documents::DocumentBatchReader; +use serde_json::Map; use tempfile::NamedTempFile; use uuid::Uuid; const UPDATE_FILES_PATH: &str = "updates/updates_files"; +use crate::document_formats::read_jsonl; + use super::error::Result; pub struct UpdateFile { @@ -42,6 +45,27 @@ pub struct UpdateFileStore { } impl UpdateFileStore { + pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { + let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH); + let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH); + + create_dir_all(&dst_update_files_path).unwrap(); + + let entries = std::fs::read_dir(src_update_files_path).unwrap(); + + for entry in entries { + let entry = entry.unwrap(); + let update_file = BufReader::new(File::open(entry.path()).unwrap()); + let file_uuid = entry.file_name(); + let file_uuid = file_uuid.to_str().ok_or_else(|| anyhow::anyhow!("invalid update file name"))?; + let dst_path = dst_update_files_path.join(file_uuid); + let dst_file = BufWriter::new(File::create(dst_path)?); + read_jsonl(update_file, dst_file)?; + } + + Ok(()) + } + pub fn new(path: impl AsRef) -> Result { let path = path.as_ref().join(UPDATE_FILES_PATH); std::fs::create_dir_all(&path).unwrap(); @@ -78,27 +102,34 @@ impl UpdateFileStore { } /// Peform a dump of the given update file uuid into the provided snapshot path. - pub fn dump(&self, _uuid: Uuid, _snapshot_path: impl AsRef) -> Result<()> { - todo!() - //let update_file_path = self.path.join(uuid.to_string()); - //let snapshot_file_path: snapshot_path.as_ref().join(format!("update_files/uuid", uuid)); + pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef) -> Result<()> { + let uuid_string = uuid.to_string(); + let update_file_path = self.path.join(&uuid_string); + let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH); + std::fs::create_dir_all(&dst).unwrap(); + dst.push(&uuid_string); - //let update_file = File::open(update_file_path).unwrap(); + let update_file = File::open(update_file_path).unwrap(); + let mut dst_file = NamedTempFile::new().unwrap(); + let mut document_reader = DocumentBatchReader::from_reader(update_file).unwrap(); + let mut document_buffer = Map::new(); + // TODO: we need to find a way to do this more efficiently. (create a custom serializer to + // jsonl for example...) + while let Some((index, document)) = document_reader.next_document_with_index().unwrap() { + for (field_id, content) in document.iter() { + let field_name = index.get_by_left(&field_id).unwrap(); + let content = serde_json::from_slice(content).unwrap(); + document_buffer.insert(field_name.to_string(), content); + } - //let mut document_reader = DocumentBatchReader::from_reader(update_file).unwrap(); + serde_json::to_writer(&mut dst_file, &document_buffer).unwrap(); + dst_file.write(b"\n").unwrap(); + document_buffer.clear(); + } - //let mut document_buffer = Map::new(); - //// TODO: we need to find a way to do this more efficiently. (create a custom serializer to - //// jsonl for example...) - //while let Some((index, document)) = document_reader.next_document_with_index().unwrap() { - //for (field_id, content) in document.iter() { - //let field_name = index.get_by_left(&field_id).unwrap(); - //let content = serde_json::from_slice(content).unwrap(); - //document_buffer.insert(field_name.to_string(), content); - //} + dst_file.persist(dst).unwrap(); - //} - //Ok(()) + Ok(()) } } diff --git a/meilisearch-lib/src/index_controller/updates/message.rs b/meilisearch-lib/src/index_controller/updates/message.rs index f96c707fd..22a920e12 100644 --- a/meilisearch-lib/src/index_controller/updates/message.rs +++ b/meilisearch-lib/src/index_controller/updates/message.rs @@ -1,4 +1,3 @@ -use std::collections::HashSet; use std::path::PathBuf; use tokio::sync::{mpsc, oneshot}; @@ -35,7 +34,7 @@ pub enum UpdateMsg { ret: oneshot::Sender>, }, Dump { - uuids: HashSet, + indexes: Vec, path: PathBuf, ret: oneshot::Sender>, }, @@ -54,11 +53,11 @@ impl UpdateMsg { pub async fn dump( sender: &mpsc::Sender, - uuids: HashSet, + indexes: Vec, path: PathBuf, ) -> Result<()> { let (ret, rcv) = oneshot::channel(); - let msg = Self::Dump { path, uuids, ret }; + let msg = Self::Dump { path, indexes, ret }; sender.send(msg).await?; rcv.await? } diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 63716928f..733bda8e6 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -3,7 +3,6 @@ mod message; pub mod status; pub mod store; -use std::collections::HashSet; use std::io; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -104,7 +103,6 @@ pub struct UpdateLoop { store: Arc, inbox: Option>, update_file_store: UpdateFileStore, - index_resolver: Arc, must_exit: Arc, } @@ -133,7 +131,6 @@ impl UpdateLoop { inbox, must_exit, update_file_store, - index_resolver, }) } @@ -184,8 +181,8 @@ impl UpdateLoop { GetInfo { ret } => { let _ = ret.send(self.handle_get_info().await); } - Dump { uuids, path, ret } => { - let _ = ret.send(self.handle_dump(uuids, path).await); + Dump { indexes, path, ret } => { + let _ = ret.send(self.handle_dump(indexes, path).await); } } }) @@ -278,12 +275,11 @@ impl UpdateLoop { Ok(()) } - async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { - let index_handle = self.index_resolver.clone(); + async fn handle_dump(&self, indexes: Vec, path: PathBuf) -> Result<()> { let update_store = self.store.clone(); tokio::task::spawn_blocking(move || -> Result<()> { - update_store.dump(&uuids, path.to_path_buf(), index_handle)?; + update_store.dump(&indexes, path.to_path_buf())?; Ok(()) }) .await??; diff --git a/meilisearch-lib/src/index_controller/updates/store/dump.rs b/meilisearch-lib/src/index_controller/updates/store/dump.rs index 996bc3432..68380a9d4 100644 --- a/meilisearch-lib/src/index_controller/updates/store/dump.rs +++ b/meilisearch-lib/src/index_controller/updates/store/dump.rs @@ -1,11 +1,17 @@ -use std::{collections::HashSet, fs::{create_dir_all, File}, io::Write, path::{Path, PathBuf}, sync::Arc}; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use std::io::{BufReader, Write}; +use std::fs::{File, create_dir_all}; -use heed::RoTxn; +use heed::{EnvOpenOptions, RoTxn}; +use rayon::prelude::*; use serde::{Deserialize, Serialize}; +use serde_json::Deserializer; +use tempfile::{NamedTempFile, TempDir}; use uuid::Uuid; use super::{Result, State, UpdateStore}; -use crate::index_controller::{index_resolver::HardStateIndexResolver, updates::status::UpdateStatus}; +use crate::{RegisterUpdate, index::Index, index_controller::{update_file_store::UpdateFileStore, updates::status::{Enqueued, UpdateStatus}}}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -16,9 +22,8 @@ struct UpdateEntry { impl UpdateStore { pub fn dump( &self, - uuids: &HashSet, + indexes: &[Index], path: PathBuf, - handle: Arc, ) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Dumping); @@ -26,15 +31,11 @@ impl UpdateStore { // txn must *always* be acquired after state lock, or it will dead lock. let txn = self.env.write_txn()?; - let dump_path = path.join("updates"); - create_dir_all(&dump_path)?; + let uuids = indexes.iter().map(|i| i.uuid).collect(); - self.dump_updates(&txn, uuids, &dump_path)?; + self.dump_updates(&txn, &uuids, &path)?; - let fut = dump_indexes(uuids, handle, &path); - tokio::runtime::Handle::current().block_on(fut)?; - - state_lock.swap(State::Idle); + indexes.par_iter().try_for_each(|index| index.dump(&path)).unwrap(); Ok(()) } @@ -45,58 +46,59 @@ impl UpdateStore { uuids: &HashSet, path: impl AsRef, ) -> Result<()> { - //let dump_data_path = path.as_ref().join("data.jsonl"); - //let mut dump_data_file = File::create(dump_data_path)?; + let mut dump_data_file = NamedTempFile::new()?; - //let update_files_path = path.as_ref().join(super::UPDATE_DIR); - //create_dir_all(&update_files_path)?; + self.dump_pending(txn, uuids, &mut dump_data_file, &path)?; + self.dump_completed(txn, uuids, &mut dump_data_file)?; - //self.dump_pending(txn, uuids, &mut dump_data_file, &path)?; - //self.dump_completed(txn, uuids, &mut dump_data_file)?; + let mut dst_path = path.as_ref().join("updates"); + create_dir_all(&dst_path)?; + dst_path.push("data.jsonl"); + dump_data_file.persist(dst_path).unwrap(); - //Ok(()) - todo!() + Ok(()) } fn dump_pending( &self, - _txn: &RoTxn, - _uuids: &HashSet, - _file: &mut File, - _dst_path: impl AsRef, + txn: &RoTxn, + uuids: &HashSet, + mut file: impl Write, + dst_path: impl AsRef, ) -> Result<()> { - todo!() - //let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); + let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); - //for pending in pendings { - //let ((_, uuid, _), data) = pending?; - //if uuids.contains(&uuid) { - //let update = data.decode()?; + for pending in pendings { + let ((_, uuid, _), data) = pending?; + if uuids.contains(&uuid) { + let update = data.decode()?; - //if let Some(ref update_uuid) = update.content { - //let src = super::update_uuid_to_file_path(&self.path, *update_uuid); - //let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid); - //std::fs::copy(src, dst)?; - //} + if let Enqueued { + meta: RegisterUpdate::DocumentAddition { + content_uuid, .. + }, .. + } = update { + self.update_file_store.dump(content_uuid, &dst_path).unwrap(); + } - //let update_json = UpdateEntry { - //uuid, - //update: update.into(), - //}; + let update_json = UpdateEntry { + uuid, + update: update.into(), + }; - //serde_json::to_writer(&mut file, &update_json)?; - //file.write_all(b"\n")?; - //} - //} + serde_json::to_writer(&mut file, &update_json)?; + file.write_all(b"\n")?; + } + } - //Ok(()) + Ok(()) } fn dump_completed( &self, txn: &RoTxn, uuids: &HashSet, - mut file: &mut File, + mut file: impl Write, ) -> Result<()> { let updates = self.updates.iter(txn)?.lazily_decode_data(); @@ -116,65 +118,35 @@ impl UpdateStore { } pub fn load_dump( - _src: impl AsRef, - _dst: impl AsRef, - _db_size: usize, + src: impl AsRef, + dst: impl AsRef, + db_size: usize, ) -> anyhow::Result<()> { - todo!() - //let dst_update_path = dst.as_ref().join("updates/"); - //create_dir_all(&dst_update_path)?; - //let mut options = EnvOpenOptions::new(); - //options.map_size(db_size as usize); - //let (store, _) = UpdateStore::new(options, &dst_update_path)?; + println!("target path: {}", dst.as_ref().display()); - //let src_update_path = src.as_ref().join("updates"); - //let update_data = File::open(&src_update_path.join("data.jsonl"))?; - //let mut update_data = BufReader::new(update_data); + let mut options = EnvOpenOptions::new(); + options.map_size(db_size as usize); - //std::fs::create_dir_all(dst_update_path.join("update_files/"))?; + // create a dummy update fiel store, since it is not needed right now. + let tmp = TempDir::new().unwrap(); + let update_file_store = UpdateFileStore::new(tmp.path()).unwrap(); + let (store, _) = UpdateStore::new(options, &dst, update_file_store)?; - //let mut wtxn = store.env.write_txn()?; - //let mut line = String::new(); - //loop { - //match update_data.read_line(&mut line) { - //Ok(0) => break, - //Ok(_) => { - //let UpdateEntry { uuid, update } = serde_json::from_str(&line)?; - //store.register_raw_updates(&mut wtxn, &update, uuid)?; + let src_update_path = src.as_ref().join("updates"); + let update_data = File::open(&src_update_path.join("data.jsonl"))?; + let update_data = BufReader::new(update_data); - //// Copy ascociated update path if it exists - //if let UpdateStatus::Enqueued(Enqueued { - //content: Some(uuid), - //.. - //}) = update - //{ - //let src = update_uuid_to_file_path(&src_update_path, uuid); - //let dst = update_uuid_to_file_path(&dst_update_path, uuid); - //std::fs::copy(src, dst)?; - //} - //} - //_ => break, - //} + let stream = Deserializer::from_reader(update_data).into_iter::(); + let mut wtxn = store.env.write_txn()?; - //line.clear(); - //} + for entry in stream { + let UpdateEntry { uuid, update } = entry?; + store.register_raw_updates(&mut wtxn, &update, uuid)?; + } - //wtxn.commit()?; + wtxn.commit()?; - //Ok(()) + Ok(()) } } - -async fn dump_indexes( - _uuids: &HashSet, - _handle: Arc, - _path: impl AsRef, -) -> Result<()> { - todo!() - //for uuid in uuids { - //IndexMsg::dump(&handle, *uuid, path.as_ref().to_owned()).await?; - //} - - //Ok(()) -} diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index b7bf1b457..01e7fd989 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -262,28 +262,28 @@ impl UpdateStore { // /// Push already processed update in the UpdateStore without triggering the notification // /// process. This is useful for the dumps. - //pub fn register_raw_updates( - //&self, - //wtxn: &mut heed::RwTxn, - //update: &UpdateStatus, - //index_uuid: Uuid, - //) -> heed::Result<()> { - //match update { - //UpdateStatus::Enqueued(enqueued) => { - //let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?; - //self.pending_queue.remap_key_type::().put( - //wtxn, - //&(global_id, index_uuid, enqueued.id()), - //enqueued, - //)?; - //} - //_ => { - //let _update_id = self.next_update_id_raw(wtxn, index_uuid)?; - //self.updates.put(wtxn, &(index_uuid, update.id()), update)?; - //} - //} - //Ok(()) - //} + pub fn register_raw_updates( + &self, + wtxn: &mut heed::RwTxn, + update: &UpdateStatus, + index_uuid: Uuid, + ) -> heed::Result<()> { + match update { + UpdateStatus::Enqueued(enqueued) => { + let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?; + self.pending_queue.remap_key_type::().put( + wtxn, + &(global_id, index_uuid, enqueued.id()), + enqueued, + )?; + } + _ => { + let _update_id = self.next_update_id_raw(wtxn, index_uuid)?; + self.updates.put(wtxn, &(index_uuid, update.id()), update)?; + } + } + Ok(()) + } /// Executes the user provided function on the next pending update (the one with the lowest id). /// This is asynchronous as it let the user process the update with a read-only txn and diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 23538099c..93fd2f094 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -8,6 +8,7 @@ pub mod index_controller; pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate}; mod compression; +mod document_formats; use walkdir::WalkDir; From 9ac999ca59bf1d28921d369e9beffbf0e74ceb29 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 12:00:35 +0200 Subject: [PATCH 13/37] remove uuid resolver and index actor --- .../src/index_controller/indexes/error.rs | 64 --- .../src/index_controller/indexes/message.rs | 212 -------- .../src/index_controller/indexes/mod.rs | 483 ------------------ .../src/index_controller/indexes/store.rs | 113 ---- .../index_controller/uuid_resolver/error.rs | 50 -- .../index_controller/uuid_resolver/message.rs | 89 ---- .../src/index_controller/uuid_resolver/mod.rs | 118 ----- .../index_controller/uuid_resolver/store.rs | 225 -------- 8 files changed, 1354 deletions(-) delete mode 100644 meilisearch-lib/src/index_controller/indexes/error.rs delete mode 100644 meilisearch-lib/src/index_controller/indexes/message.rs delete mode 100644 meilisearch-lib/src/index_controller/indexes/mod.rs delete mode 100644 meilisearch-lib/src/index_controller/indexes/store.rs delete mode 100644 meilisearch-lib/src/index_controller/uuid_resolver/error.rs delete mode 100644 meilisearch-lib/src/index_controller/uuid_resolver/message.rs delete mode 100644 meilisearch-lib/src/index_controller/uuid_resolver/mod.rs delete mode 100644 meilisearch-lib/src/index_controller/uuid_resolver/store.rs diff --git a/meilisearch-lib/src/index_controller/indexes/error.rs b/meilisearch-lib/src/index_controller/indexes/error.rs deleted file mode 100644 index 51fe273f7..000000000 --- a/meilisearch-lib/src/index_controller/indexes/error.rs +++ /dev/null @@ -1,64 +0,0 @@ -use std::fmt; - -use meilisearch_error::{Code, ErrorCode}; - -use crate::{error::MilliError, index::error::IndexError}; - -pub type Result = std::result::Result; - -#[derive(thiserror::Error, Debug)] -pub enum IndexActorError { - #[error("{0}")] - IndexError(#[from] IndexError), - #[error("Index already exists")] - IndexAlreadyExists, - #[error("Index not found")] - UnexistingIndex, - #[error("A primary key is already present. It's impossible to update it")] - ExistingPrimaryKey, - #[error("Internal Error: {0}")] - Internal(Box), - #[error("{0}")] - Milli(#[from] milli::Error), -} - -impl From> for IndexActorError -where T: Send + Sync + 'static + fmt::Debug -{ - fn from(other: tokio::sync::mpsc::error::SendError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for IndexActorError { - fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { - Self::Internal(Box::new(other)) - } -} - -macro_rules! internal_error { - ($($other:path), *) => { - $( - impl From<$other> for IndexActorError { - fn from(other: $other) -> Self { - Self::Internal(Box::new(other)) - } - } - )* - } -} - -internal_error!(heed::Error, tokio::task::JoinError, std::io::Error); - -impl ErrorCode for IndexActorError { - fn error_code(&self) -> Code { - match self { - IndexActorError::IndexError(e) => e.error_code(), - IndexActorError::IndexAlreadyExists => Code::IndexAlreadyExists, - IndexActorError::UnexistingIndex => Code::IndexNotFound, - IndexActorError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent, - IndexActorError::Internal(_) => Code::Internal, - IndexActorError::Milli(e) => MilliError(e).error_code(), - } - } -} diff --git a/meilisearch-lib/src/index_controller/indexes/message.rs b/meilisearch-lib/src/index_controller/indexes/message.rs deleted file mode 100644 index e9c67d0ab..000000000 --- a/meilisearch-lib/src/index_controller/indexes/message.rs +++ /dev/null @@ -1,212 +0,0 @@ -use std::path::PathBuf; - -use tokio::sync::{mpsc, oneshot}; -use uuid::Uuid; - -use super::error::Result; -use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::updates::status::{Failed, Processed, Processing}; -use crate::index_controller::{IndexSettings, IndexStats}; - -use super::IndexMeta; - -#[allow(clippy::large_enum_variant)] -#[derive(Debug)] -pub enum IndexMsg { - CreateIndex { - uuid: Uuid, - primary_key: Option, - ret: oneshot::Sender>, - }, - Update { - uuid: Uuid, - meta: Processing, - ret: oneshot::Sender>>, - }, - Search { - uuid: Uuid, - query: SearchQuery, - ret: oneshot::Sender>, - }, - Settings { - uuid: Uuid, - ret: oneshot::Sender>>, - }, - Documents { - uuid: Uuid, - attributes_to_retrieve: Option>, - offset: usize, - limit: usize, - ret: oneshot::Sender>>, - }, - Document { - uuid: Uuid, - attributes_to_retrieve: Option>, - doc_id: String, - ret: oneshot::Sender>, - }, - Delete { - uuid: Uuid, - ret: oneshot::Sender>, - }, - GetMeta { - uuid: Uuid, - ret: oneshot::Sender>, - }, - UpdateIndex { - uuid: Uuid, - index_settings: IndexSettings, - ret: oneshot::Sender>, - }, - Snapshot { - uuid: Uuid, - path: PathBuf, - ret: oneshot::Sender>, - }, - Dump { - uuid: Uuid, - path: PathBuf, - ret: oneshot::Sender>, - }, - GetStats { - uuid: Uuid, - ret: oneshot::Sender>, - }, -} - -impl IndexMsg { - pub async fn search( - sender: &mpsc::Sender, - uuid: Uuid, - query: SearchQuery, - ) -> Result { - let (ret, rcv) = oneshot::channel(); - let msg = Self::Search { - ret, - uuid, - query, - }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn update_index( - sender: &mpsc::Sender, - uuid: Uuid, - index_settings: IndexSettings, - ) -> Result { - let (ret, rcv) = oneshot::channel(); - let msg = Self::UpdateIndex { - ret, - uuid, - index_settings, - }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn create_index( - sender: &mpsc::Sender, - uuid: Uuid, - primary_key: Option, - ) -> Result { - let (ret, rcv) = oneshot::channel(); - let msg = Self::CreateIndex { - ret, - uuid, - primary_key, - }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn index_meta(sender: &mpsc::Sender, uuid: Uuid) -> Result { - let (ret, rcv) = oneshot::channel(); - let msg = Self::GetMeta { ret, uuid }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn index_stats(sender: &mpsc::Sender, uuid: Uuid) -> Result { - let (ret, rcv) = oneshot::channel(); - let msg = Self::GetStats { ret, uuid }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn settings(sender: &mpsc::Sender, uuid: Uuid) -> Result> { - let (ret, rcv) = oneshot::channel(); - let msg = Self::Settings { ret, uuid }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn documents( - sender: &mpsc::Sender, - uuid: Uuid, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result> { - let (ret, rcv) = oneshot::channel(); - let msg = Self::Documents { - ret, - uuid, - attributes_to_retrieve, - offset, - limit, - }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn document( - sender: &mpsc::Sender, - uuid: Uuid, - attributes_to_retrieve: Option>, - doc_id: String, - ) -> Result { - let (ret, rcv) = oneshot::channel(); - let msg = Self::Document { - ret, - uuid, - attributes_to_retrieve, - doc_id, - }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn update(sender: &mpsc::Sender, uuid: Uuid, meta: Processing) -> Result> { - let (ret, rcv) = oneshot::channel(); - let msg = Self::Update { - ret, - uuid, - meta, - }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn snapshot(sender: &mpsc::Sender, uuid: Uuid, path: PathBuf) -> Result<()> { - let (ret, rcv) = oneshot::channel(); - let msg = Self::Snapshot { - uuid, - path, - ret, - }; - sender.send(msg).await?; - rcv.await? - } - - pub async fn dump(sender: &mpsc::Sender, uuid: Uuid, path: PathBuf) -> Result<()> { - let (ret, rcv) = oneshot::channel(); - let msg = Self::Dump { - uuid, - ret, - path, - }; - sender.send(msg).await?; - rcv.await? - } -} diff --git a/meilisearch-lib/src/index_controller/indexes/mod.rs b/meilisearch-lib/src/index_controller/indexes/mod.rs deleted file mode 100644 index 48649cf40..000000000 --- a/meilisearch-lib/src/index_controller/indexes/mod.rs +++ /dev/null @@ -1,483 +0,0 @@ -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use async_stream::stream; -use futures::stream::StreamExt; -use heed::CompactionOption; -use log::debug; -use milli::update::UpdateBuilder; -use tokio::task::spawn_blocking; -use tokio::{fs, sync::mpsc}; - -use crate::index::update_handler::UpdateHandler; -use crate::index_controller::updates::status::{Failed, Processed, Processing}; -use crate::index_controller::{get_arc_ownership_blocking, IndexStats}; -use crate::options::IndexerOpts; - -pub const CONCURRENT_INDEX_MSG: usize = 10; - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -pub use message::IndexMsg; - -use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings}; -use error::Result; - -use self::error::IndexActorError; -use self::store::{IndexStore, MapIndexStore}; - -use super::IndexSettings; - -pub mod error; -mod message; -mod store; - -pub type IndexHandlerSender = mpsc::Sender; - -pub fn create_indexes_handler( - db_path: impl AsRef, - index_size: usize, - indexer_options: &IndexerOpts, -) -> anyhow::Result { - let (sender, receiver) = mpsc::channel(100); - let store = MapIndexStore::new(&db_path, index_size, indexer_options); - let actor = IndexActor::new(receiver, store, indexer_options)?; - - tokio::task::spawn(actor.run()); - - Ok(sender) -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct IndexMeta { - created_at: DateTime, - pub updated_at: DateTime, - pub primary_key: Option, -} - -impl IndexMeta { - pub fn new(index: &Index) -> Result { - let txn = index.read_txn()?; - Self::new_txn(index, &txn) - } - - fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result { - let created_at = index.created_at(txn)?; - let updated_at = index.updated_at(txn)?; - let primary_key = index.primary_key(txn)?.map(String::from); - Ok(Self { - created_at, - updated_at, - primary_key, - }) - } -} - -pub struct IndexActor { - receiver: Option>, - update_handler: Arc, - store: S, -} - -impl IndexActor -where - S: IndexStore + Sync + Send, -{ - pub fn new( - receiver: mpsc::Receiver, - store: S, - options: &IndexerOpts, - ) -> anyhow::Result { - let update_handler = Arc::new(UpdateHandler::new(options)?); - let receiver = Some(receiver); - - Ok(Self { - receiver, - update_handler, - store, - }) - } - - /// `run` poll the write_receiver and read_receiver concurrently, but while messages send - /// through the read channel are processed concurrently, the messages sent through the write - /// channel are processed one at a time. - pub async fn run(mut self) { - let mut receiver = self - .receiver - .take() - .expect("Index Actor must have a inbox at this point."); - - let stream = stream! { - loop { - match receiver.recv().await { - Some(msg) => yield msg, - None => break, - } - } - }; - - stream - .for_each_concurrent(Some(CONCURRENT_INDEX_MSG), |msg| self.handle_message(msg)) - .await; - } - - async fn handle_message(&self, msg: IndexMsg) { - use IndexMsg::*; - match msg { - CreateIndex { - uuid, - primary_key, - ret, - } => { - let _ = ret.send(self.handle_create_index(uuid, primary_key).await); - } - Update { ret, meta, uuid } => { - let _ = ret.send(self.handle_update(uuid, meta).await); - } - Search { ret, query, uuid } => { - let _ = ret.send(self.handle_search(uuid, query).await); - } - Settings { ret, uuid } => { - let _ = ret.send(self.handle_settings(uuid).await); - } - Documents { - ret, - uuid, - attributes_to_retrieve, - offset, - limit, - } => { - let _ = ret.send( - self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve) - .await, - ); - } - Document { - uuid, - attributes_to_retrieve, - doc_id, - ret, - } => { - let _ = ret.send( - self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve) - .await, - ); - } - Delete { uuid, ret } => { - let _ = ret.send(self.handle_delete(uuid).await); - } - GetMeta { uuid, ret } => { - let _ = ret.send(self.handle_get_meta(uuid).await); - } - UpdateIndex { - uuid, - index_settings, - ret, - } => { - let _ = ret.send(self.handle_update_index(uuid, index_settings).await); - } - Snapshot { uuid, path, ret } => { - let _ = ret.send(self.handle_snapshot(uuid, path).await); - } - Dump { uuid, path, ret } => { - let _ = ret.send(self.handle_dump(uuid, path).await); - } - GetStats { uuid, ret } => { - let _ = ret.send(self.handle_get_stats(uuid).await); - } - } - } - - async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> Result { - let index = self - .store - .get(uuid) - .await? - .ok_or(IndexActorError::UnexistingIndex)?; - let result = spawn_blocking(move || index.perform_search(query)).await??; - Ok(result) - } - - async fn handle_create_index( - &self, - uuid: Uuid, - primary_key: Option, - ) -> Result { - let index = self.store.create(uuid, primary_key).await?; - let meta = spawn_blocking(move || IndexMeta::new(&index)).await??; - Ok(meta) - } - - async fn handle_update( - &self, - uuid: Uuid, - meta: Processing, - ) -> Result> { - debug!("Processing update {}", meta.id()); - let update_handler = self.update_handler.clone(); - let index = match self.store.get(uuid).await? { - Some(index) => index, - None => self.store.create(uuid, None).await?, - }; - - Ok(spawn_blocking(move || update_handler.handle_update(&index, meta)).await?) - } - - async fn handle_settings(&self, uuid: Uuid) -> Result> { - let index = self - .store - .get(uuid) - .await? - .ok_or(IndexActorError::UnexistingIndex)?; - let result = spawn_blocking(move || index.settings()).await??; - Ok(result) - } - - async fn handle_fetch_documents( - &self, - uuid: Uuid, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result> { - let index = self - .store - .get(uuid) - .await? - .ok_or(IndexActorError::UnexistingIndex)?; - let result = - spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)) - .await??; - - Ok(result) - } - - async fn handle_fetch_document( - &self, - uuid: Uuid, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - let index = self - .store - .get(uuid) - .await? - .ok_or(IndexActorError::UnexistingIndex)?; - - let result = - spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)) - .await??; - - Ok(result) - } - - async fn handle_delete(&self, uuid: Uuid) -> Result<()> { - let index = self.store.delete(uuid).await?; - - if let Some(index) = index { - tokio::task::spawn(async move { - let index = index.inner; - let store = get_arc_ownership_blocking(index).await; - spawn_blocking(move || { - store.prepare_for_closing().wait(); - debug!("Index closed"); - }); - }); - } - - Ok(()) - } - - async fn handle_get_meta(&self, uuid: Uuid) -> Result { - match self.store.get(uuid).await? { - Some(index) => { - let meta = spawn_blocking(move || IndexMeta::new(&index)).await??; - Ok(meta) - } - None => Err(IndexActorError::UnexistingIndex), - } - } - - async fn handle_update_index( - &self, - uuid: Uuid, - index_settings: IndexSettings, - ) -> Result { - let index = self - .store - .get(uuid) - .await? - .ok_or(IndexActorError::UnexistingIndex)?; - - let result = spawn_blocking(move || match index_settings.primary_key { - Some(primary_key) => { - let mut txn = index.write_txn()?; - if index.primary_key(&txn)?.is_some() { - return Err(IndexActorError::ExistingPrimaryKey); - } - let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index); - builder.set_primary_key(primary_key); - builder.execute(|_, _| ())?; - let meta = IndexMeta::new_txn(&index, &txn)?; - txn.commit()?; - Ok(meta) - } - None => { - let meta = IndexMeta::new(&index)?; - Ok(meta) - } - }) - .await??; - - Ok(result) - } - - async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> { - use tokio::fs::create_dir_all; - - path.push("indexes"); - create_dir_all(&path).await?; - - if let Some(index) = self.store.get(uuid).await? { - let mut index_path = path.join(format!("index-{}", uuid)); - - create_dir_all(&index_path).await?; - - index_path.push("data.mdb"); - spawn_blocking(move || -> Result<()> { - // Get write txn to wait for ongoing write transaction before snapshot. - let _txn = index.write_txn()?; - index - .env - .copy_to_path(index_path, CompactionOption::Enabled)?; - Ok(()) - }) - .await??; - } - - Ok(()) - } - - /// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the - /// documents and all the settings. - async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - let index = self - .store - .get(uuid) - .await? - .ok_or(IndexActorError::UnexistingIndex)?; - - let path = path.join(format!("indexes/index-{}/", uuid)); - fs::create_dir_all(&path).await?; - - tokio::task::spawn_blocking(move || index.dump(path)).await??; - - Ok(()) - } - - async fn handle_get_stats(&self, uuid: Uuid) -> Result { - let index = self - .store - .get(uuid) - .await? - .ok_or(IndexActorError::UnexistingIndex)?; - - spawn_blocking(move || { - let rtxn = index.read_txn()?; - - Ok(IndexStats { - size: index.size(), - number_of_documents: index.number_of_documents(&rtxn)?, - is_indexing: None, - field_distribution: index.field_distribution(&rtxn)?, - }) - }) - .await? - } -} - -#[cfg(test)] -mod test { - use std::sync::Arc; - - use super::*; - - #[async_trait::async_trait] - /// Useful for passing around an `Arc` in tests. - impl IndexActorHandle for Arc { - async fn create_index(&self, uuid: Uuid, primary_key: Option) -> Result { - self.as_ref().create_index(uuid, primary_key).await - } - - async fn update( - &self, - uuid: Uuid, - meta: Processing, - data: Option, - ) -> Result> { - self.as_ref().update(uuid, meta, data).await - } - - async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result { - self.as_ref().search(uuid, query).await - } - - async fn settings(&self, uuid: Uuid) -> Result> { - self.as_ref().settings(uuid).await - } - - async fn documents( - &self, - uuid: Uuid, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result> { - self.as_ref() - .documents(uuid, offset, limit, attributes_to_retrieve) - .await - } - - async fn document( - &self, - uuid: Uuid, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - self.as_ref() - .document(uuid, doc_id, attributes_to_retrieve) - .await - } - - async fn delete(&self, uuid: Uuid) -> Result<()> { - self.as_ref().delete(uuid).await - } - - async fn get_index_meta(&self, uuid: Uuid) -> Result { - self.as_ref().get_index_meta(uuid).await - } - - async fn update_index( - &self, - uuid: Uuid, - index_settings: IndexSettings, - ) -> Result { - self.as_ref().update_index(uuid, index_settings).await - } - - async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - self.as_ref().snapshot(uuid, path).await - } - - async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - self.as_ref().dump(uuid, path).await - } - - async fn get_index_stats(&self, uuid: Uuid) -> Result { - self.as_ref().get_index_stats(uuid).await - } - } -} diff --git a/meilisearch-lib/src/index_controller/indexes/store.rs b/meilisearch-lib/src/index_controller/indexes/store.rs deleted file mode 100644 index 336ff6e0a..000000000 --- a/meilisearch-lib/src/index_controller/indexes/store.rs +++ /dev/null @@ -1,113 +0,0 @@ -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use milli::update::UpdateBuilder; -use tokio::fs; -use tokio::sync::RwLock; -use tokio::task::spawn_blocking; -use uuid::Uuid; - -use super::error::{IndexActorError, Result}; -use crate::index::Index; -use crate::index::update_handler::UpdateHandler; -use crate::index_controller::update_file_store::UpdateFileStore; - -type AsyncMap = Arc>>; - -#[async_trait::async_trait] -pub trait IndexStore { - async fn create(&self, uuid: Uuid, primary_key: Option) -> Result; - async fn get(&self, uuid: Uuid) -> Result>; - async fn delete(&self, uuid: Uuid) -> Result>; -} - -pub struct MapIndexStore { - index_store: AsyncMap, - path: PathBuf, - index_size: usize, - update_file_store: Arc, - update_handler: Arc, -} - -impl MapIndexStore { - pub fn new(path: impl AsRef, index_size: usize, update_handler: Arc) -> Self { - let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap()); - let path = path.as_ref().join("indexes/"); - let index_store = Arc::new(RwLock::new(HashMap::new())); - Self { - index_store, - path, - index_size, - update_file_store, - update_handler, - } - } -} - -#[async_trait::async_trait] -impl IndexStore for MapIndexStore { - async fn create(&self, uuid: Uuid, primary_key: Option) -> Result { - // We need to keep the lock until we are sure the db file has been opened correclty, to - // ensure that another db is not created at the same time. - let mut lock = self.index_store.write().await; - - if let Some(index) = lock.get(&uuid) { - return Ok(index.clone()); - } - let path = self.path.join(format!("index-{}", uuid)); - if path.exists() { - return Err(IndexActorError::IndexAlreadyExists); - } - - let index_size = self.index_size; - let file_store = self.update_file_store.clone(); - let update_handler = self.update_handler.clone(); - let index = spawn_blocking(move || -> Result { - let index = Index::open(path, index_size, file_store, uuid, update_handler)?; - if let Some(primary_key) = primary_key { - let mut txn = index.write_txn()?; - - let mut builder = UpdateBuilder::new(0).settings(&mut txn, &index); - builder.set_primary_key(primary_key); - builder.execute(|_, _| ())?; - - txn.commit()?; - } - Ok(index) - }) - .await??; - - lock.insert(uuid, index.clone()); - - Ok(index) - } - - async fn get(&self, uuid: Uuid) -> Result> { - let guard = self.index_store.read().await; - match guard.get(&uuid) { - Some(index) => Ok(Some(index.clone())), - None => { - // drop the guard here so we can perform the write after without deadlocking; - drop(guard); - let path = self.path.join(format!("index-{}", uuid)); - if !path.exists() { - return Ok(None); - } - - let index_size = self.index_size; - let file_store = self.update_file_store.clone(); - let index = spawn_blocking(move || Index::open(path, index_size, file_store)).await??; - self.index_store.write().await.insert(uuid, index.clone()); - Ok(Some(index)) - } - } - } - - async fn delete(&self, uuid: Uuid) -> Result> { - let db_path = self.path.join(format!("index-{}", uuid)); - fs::remove_dir_all(db_path).await?; - let index = self.index_store.write().await.remove(&uuid); - Ok(index) - } -} diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/error.rs b/meilisearch-lib/src/index_controller/uuid_resolver/error.rs deleted file mode 100644 index 8f32fa35d..000000000 --- a/meilisearch-lib/src/index_controller/uuid_resolver/error.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::fmt; - -use meilisearch_error::{Code, ErrorCode}; -use tokio::sync::mpsc::error::SendError as MpscSendError; -use tokio::sync::oneshot::error::RecvError as OneshotRecvError; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum UuidResolverError { - #[error("Index already exists.")] - NameAlreadyExist, - #[error("Index \"{0}\" not found.")] - UnexistingIndex(String), - #[error("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")] - BadlyFormatted(String), - #[error("Internal error: {0}")] - Internal(Box), -} - -internal_error!( - UuidResolverError: heed::Error, - uuid::Error, - std::io::Error, - tokio::task::JoinError, - serde_json::Error -); - -impl From> for UuidResolverError { - fn from(other: MpscSendError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for UuidResolverError { - fn from(other: OneshotRecvError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl ErrorCode for UuidResolverError { - fn error_code(&self) -> Code { - match self { - UuidResolverError::NameAlreadyExist => Code::IndexAlreadyExists, - UuidResolverError::UnexistingIndex(_) => Code::IndexNotFound, - UuidResolverError::BadlyFormatted(_) => Code::InvalidIndexUid, - UuidResolverError::Internal(_) => Code::Internal, - } - } -} diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/message.rs b/meilisearch-lib/src/index_controller/uuid_resolver/message.rs deleted file mode 100644 index e9da56d5e..000000000 --- a/meilisearch-lib/src/index_controller/uuid_resolver/message.rs +++ /dev/null @@ -1,89 +0,0 @@ -use std::collections::HashSet; -use std::path::PathBuf; - -use tokio::sync::{mpsc, oneshot}; -use uuid::Uuid; - -use super::error::Result; - -#[derive(Debug)] -pub enum UuidResolverMsg { - Get { - uid: String, - ret: oneshot::Sender>, - }, - Delete { - uid: String, - ret: oneshot::Sender>, - }, - List { - ret: oneshot::Sender>>, - }, - Insert { - uuid: Uuid, - name: String, - ret: oneshot::Sender>, - }, - SnapshotRequest { - path: PathBuf, - ret: oneshot::Sender>>, - }, - GetSize { - ret: oneshot::Sender>, - }, - DumpRequest { - path: PathBuf, - ret: oneshot::Sender>>, - }, -} - -impl UuidResolverMsg { - pub async fn get(channel: &mpsc::Sender, uid: String) -> Result { - let (ret, recv) = oneshot::channel(); - let msg = Self::Get { uid, ret }; - channel.send(msg).await?; - recv.await? - } - - pub async fn insert(channel: &mpsc::Sender, uuid: Uuid, name: String) -> Result<()> { - let (ret, recv) = oneshot::channel(); - let msg = Self::Insert { name, uuid, ret }; - channel.send(msg).await?; - recv.await? - } - - pub async fn list(channel: &mpsc::Sender) -> Result> { - let (ret, recv) = oneshot::channel(); - let msg = Self::List { ret }; - channel.send(msg).await?; - recv.await? - } - - pub async fn get_size(channel: &mpsc::Sender) -> Result { - let (ret, recv) = oneshot::channel(); - let msg = Self::GetSize { ret }; - channel.send(msg).await?; - recv.await? - } - - pub async fn dump(channel: &mpsc::Sender, path: PathBuf) -> Result> { - let (ret, recv) = oneshot::channel(); - let msg = Self::DumpRequest { ret, path }; - channel.send(msg).await?; - recv.await? - } - - pub async fn snapshot(channel: &mpsc::Sender, path: PathBuf) -> Result> { - let (ret, recv) = oneshot::channel(); - let msg = Self::SnapshotRequest { ret, path }; - channel.send(msg).await?; - recv.await? - } - - pub async fn delete(channel: &mpsc::Sender, uid: String) -> Result { - let (ret, recv) = oneshot::channel(); - let msg = Self::Delete { ret, uid }; - channel.send(msg).await?; - recv.await? - } -} diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs b/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs deleted file mode 100644 index 7157c1b41..000000000 --- a/meilisearch-lib/src/index_controller/uuid_resolver/mod.rs +++ /dev/null @@ -1,118 +0,0 @@ -pub mod error; -mod message; -pub mod store; - -use std::path::Path; -use std::{collections::HashSet, path::PathBuf}; - -use log::{trace, warn}; -use tokio::sync::mpsc; -use uuid::Uuid; - -pub use self::error::UuidResolverError; -pub use self::message::UuidResolverMsg; -pub use self::store::{HeedUuidStore, UuidStore}; -use self::error::Result; - -pub type UuidResolverSender = mpsc::Sender; - -const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB - -pub fn create_uuid_resolver(path: impl AsRef) -> Result> { - let (sender, reveiver) = mpsc::channel(100); - let store = HeedUuidStore::new(path)?; - let actor = UuidResolver::new(reveiver, store); - tokio::spawn(actor.run()); - Ok(sender) -} - -pub struct UuidResolver { - inbox: mpsc::Receiver, - store: S, -} - -impl UuidResolver { - pub fn new(inbox: mpsc::Receiver, store: S) -> Self { - Self { inbox, store } - } - - pub async fn run(mut self) { - use UuidResolverMsg::*; - - trace!("uuid resolver started"); - - loop { - match self.inbox.recv().await { - Some(Get { uid: name, ret }) => { - let _ = ret.send(self.handle_get(name).await); - } - Some(Delete { uid: name, ret }) => { - let _ = ret.send(self.handle_delete(name).await); - } - Some(List { ret }) => { - let _ = ret.send(self.handle_list().await); - } - Some(Insert { ret, uuid, name }) => { - let _ = ret.send(self.handle_insert(name, uuid).await); - } - Some(SnapshotRequest { path, ret }) => { - let _ = ret.send(self.handle_snapshot(path).await); - } - Some(GetSize { ret }) => { - let _ = ret.send(self.handle_get_size().await); - } - Some(DumpRequest { path, ret }) => { - let _ = ret.send(self.handle_dump(path).await); - } - // all senders have been dropped, need to quit. - None => break, - } - } - - warn!("exiting uuid resolver loop"); - } - - async fn handle_get(&self, uid: String) -> Result { - self.store - .get_uuid(uid.clone()) - .await? - .ok_or(UuidResolverError::UnexistingIndex(uid)) - } - - async fn handle_delete(&self, uid: String) -> Result { - self.store - .delete(uid.clone()) - .await? - .ok_or(UuidResolverError::UnexistingIndex(uid)) - } - - async fn handle_list(&self) -> Result> { - let result = self.store.list().await?; - Ok(result) - } - - async fn handle_snapshot(&self, path: PathBuf) -> Result> { - self.store.snapshot(path).await - } - - async fn handle_dump(&self, path: PathBuf) -> Result> { - self.store.dump(path).await - } - - async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> { - if !is_index_uid_valid(&uid) { - return Err(UuidResolverError::BadlyFormatted(uid)); - } - self.store.insert(uid, uuid).await?; - Ok(()) - } - - async fn handle_get_size(&self) -> Result { - self.store.get_size().await - } -} - -fn is_index_uid_valid(uid: &str) -> bool { - uid.chars() - .all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') -} diff --git a/meilisearch-lib/src/index_controller/uuid_resolver/store.rs b/meilisearch-lib/src/index_controller/uuid_resolver/store.rs deleted file mode 100644 index 34ba8ced5..000000000 --- a/meilisearch-lib/src/index_controller/uuid_resolver/store.rs +++ /dev/null @@ -1,225 +0,0 @@ -use std::collections::HashSet; -use std::fs::{create_dir_all, File}; -use std::io::{BufRead, BufReader, Write}; -use std::path::{Path, PathBuf}; - -use heed::types::{ByteSlice, Str}; -use heed::{CompactionOption, Database, Env, EnvOpenOptions}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::UUID_STORE_SIZE; -use super::error::{UuidResolverError, Result}; -use crate::EnvSizer; - -#[derive(Serialize, Deserialize)] -struct DumpEntry { - uuid: Uuid, - uid: String, -} - -const UUIDS_DB_PATH: &str = "index_uuids"; - -#[async_trait::async_trait] -pub trait UuidStore: Sized { - // Create a new entry for `name`. Return an error if `err` and the entry already exists, return - // the uuid otherwise. - async fn get_uuid(&self, uid: String) -> Result>; - async fn delete(&self, uid: String) -> Result>; - async fn list(&self) -> Result>; - async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; - async fn snapshot(&self, path: PathBuf) -> Result>; - async fn get_size(&self) -> Result; - async fn dump(&self, path: PathBuf) -> Result>; -} - -#[derive(Clone)] -pub struct HeedUuidStore { - env: Env, - db: Database, -} - -impl HeedUuidStore { - pub fn new(path: impl AsRef) -> Result { - let path = path.as_ref().join(UUIDS_DB_PATH); - create_dir_all(&path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(UUID_STORE_SIZE); // 1GB - let env = options.open(path)?; - let db = env.create_database(None)?; - Ok(Self { env, db }) - } - - pub fn get_uuid(&self, name: String) -> Result> { - let env = self.env.clone(); - let db = self.db; - let txn = env.read_txn()?; - match db.get(&txn, &name)? { - Some(uuid) => { - let uuid = Uuid::from_slice(uuid)?; - Ok(Some(uuid)) - } - None => Ok(None), - } - } - - pub fn delete(&self, uid: String) -> Result> { - let env = self.env.clone(); - let db = self.db; - let mut txn = env.write_txn()?; - match db.get(&txn, &uid)? { - Some(uuid) => { - let uuid = Uuid::from_slice(uuid)?; - db.delete(&mut txn, &uid)?; - txn.commit()?; - Ok(Some(uuid)) - } - None => Ok(None), - } - } - - pub fn list(&self) -> Result> { - let env = self.env.clone(); - let db = self.db; - let txn = env.read_txn()?; - let mut entries = Vec::new(); - for entry in db.iter(&txn)? { - let (name, uuid) = entry?; - let uuid = Uuid::from_slice(uuid)?; - entries.push((name.to_owned(), uuid)) - } - Ok(entries) - } - - pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> { - let env = self.env.clone(); - let db = self.db; - let mut txn = env.write_txn()?; - - if db.get(&txn, &name)?.is_some() { - return Err(UuidResolverError::NameAlreadyExist); - } - - db.put(&mut txn, &name, uuid.as_bytes())?; - txn.commit()?; - Ok(()) - } - - pub fn snapshot(&self, mut path: PathBuf) -> Result> { - let env = self.env.clone(); - let db = self.db; - // Write transaction to acquire a lock on the database. - let txn = env.write_txn()?; - let mut entries = HashSet::new(); - for entry in db.iter(&txn)? { - let (_, uuid) = entry?; - let uuid = Uuid::from_slice(uuid)?; - entries.insert(uuid); - } - - // only perform snapshot if there are indexes - if !entries.is_empty() { - path.push(UUIDS_DB_PATH); - create_dir_all(&path).unwrap(); - path.push("data.mdb"); - env.copy_to_path(path, CompactionOption::Enabled)?; - } - Ok(entries) - } - - pub fn get_size(&self) -> Result { - Ok(self.env.size()) - } - - pub fn dump(&self, path: PathBuf) -> Result> { - let dump_path = path.join(UUIDS_DB_PATH); - create_dir_all(&dump_path)?; - let dump_file_path = dump_path.join("data.jsonl"); - let mut dump_file = File::create(&dump_file_path)?; - let mut uuids = HashSet::new(); - - let txn = self.env.read_txn()?; - for entry in self.db.iter(&txn)? { - let (uid, uuid) = entry?; - let uid = uid.to_string(); - let uuid = Uuid::from_slice(uuid)?; - - let entry = DumpEntry { uuid, uid }; - serde_json::to_writer(&mut dump_file, &entry)?; - dump_file.write_all(b"\n").unwrap(); - - uuids.insert(uuid); - } - - Ok(uuids) - } - - pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> Result<()> { - let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH); - std::fs::create_dir_all(&uuid_resolver_path)?; - - let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); - let indexes = File::open(&src_indexes)?; - let mut indexes = BufReader::new(indexes); - let mut line = String::new(); - - let db = Self::new(dst)?; - let mut txn = db.env.write_txn()?; - - loop { - match indexes.read_line(&mut line) { - Ok(0) => break, - Ok(_) => { - let DumpEntry { uuid, uid } = serde_json::from_str(&line)?; - println!("importing {} {}", uid, uuid); - db.db.put(&mut txn, &uid, uuid.as_bytes())?; - } - Err(e) => return Err(e.into()), - } - - line.clear(); - } - txn.commit()?; - - db.env.prepare_for_closing().wait(); - - Ok(()) - } -} - -#[async_trait::async_trait] -impl UuidStore for HeedUuidStore { - async fn get_uuid(&self, name: String) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.get_uuid(name)).await? - } - - async fn delete(&self, uid: String) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.delete(uid)).await? - } - - async fn list(&self) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.list()).await? - } - - async fn insert(&self, name: String, uuid: Uuid) -> Result<()> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.insert(name, uuid)).await? - } - - async fn snapshot(&self, path: PathBuf) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.snapshot(path)).await? - } - - async fn get_size(&self) -> Result { - self.get_size() - } - - async fn dump(&self, path: PathBuf) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.dump(path)).await? - } -} From c1376a9f2a9c6546626b669ea698bd0d90b01f44 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 27 Sep 2021 15:41:14 +0200 Subject: [PATCH 14/37] add the geosearch to Meilisearch --- Cargo.lock | 87 +++++++++++++-- meilisearch-error/src/lib.rs | 9 +- meilisearch-http/Cargo.toml | 2 +- meilisearch-http/src/error.rs | 9 +- meilisearch-http/src/routes/indexes/search.rs | 73 ++++++++++++- meilisearch-lib/Cargo.toml | 3 +- meilisearch-lib/src/error.rs | 9 +- meilisearch-lib/src/index/search.rs | 101 +++++++++++++++++- 8 files changed, 263 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d08b1a83c..b97e712f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -300,6 +300,18 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5ab7d9e73059c86c36473f459b52adbd99c3554a4fec492caef460806006f00" +[[package]] +name = "as-slice" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45403b49e3954a4b8428a0ac21a4b7afadccf92bfd96273f1a58cd4812496ae0" +dependencies = [ + "generic-array 0.12.4", + "generic-array 0.13.3", + "generic-array 0.14.4", + "stable_deref_trait", +] + [[package]] name = "assert-json-diff" version = "1.0.1" @@ -1089,6 +1101,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "generic-array" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f797e67af32588215eaaab8327027ee8e71b9dd0b2b26996aedf20c030fce309" +dependencies = [ + "typenum", +] + [[package]] name = "generic-array" version = "0.14.4" @@ -1099,6 +1120,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "geoutils" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e006f616a407d396ace1d2ebb3f43ed73189db8b098079bd129928d7645dd1e" + [[package]] name = "getrandom" version = "0.2.3" @@ -1177,6 +1204,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hash32" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4041af86e63ac4298ce40e5cca669066e75b6f1aa3390fe2561ffa5e1d9f4cc" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.7.2" @@ -1193,6 +1229,18 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +[[package]] +name = "heapless" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634bd4d29cbf24424d0a4bfcbf80c6960129dc24424752a7d1d1390607023422" +dependencies = [ + "as-slice", + "generic-array 0.14.4", + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.3.3" @@ -1697,6 +1745,7 @@ dependencies = [ "http", "indexmap", "itertools", + "lazy_static", "log", "main_error", "meilisearch-error", @@ -1780,8 +1829,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.13.1" -source = "git+https://github.com/meilisearch/milli.git?rev=6de1b41#6de1b41f791e7d117634e63783d78b29b5228a99" +version = "0.16.0" +source = "git+https://github.com/meilisearch/milli.git#0f8320bdc24d76781e596d96d3b2e788a55655c6" dependencies = [ "bimap", "bincode", @@ -1794,6 +1843,7 @@ dependencies = [ "flate2", "fst", "fxhash", + "geoutils", "grenad", "heed", "human_format", @@ -1811,6 +1861,7 @@ dependencies = [ "pest_derive", "rayon", "roaring", + "rstar", "serde", "serde_json", "slice-group-by", @@ -1818,7 +1869,6 @@ dependencies = [ "smallvec", "tempfile", "uuid", - "vec-utils", ] [[package]] @@ -2038,6 +2088,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cacbb3c4ff353b534a67fb8d7524d00229da4cb1dc8c79f4db96e375ab5b619" +[[package]] +name = "pdqselect" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec91767ecc0a0bbe558ce8c9da33c068066c57ecc8bb8477ef8c1ad3ef77c27" + [[package]] name = "percent-encoding" version = "2.1.0" @@ -2489,6 +2545,19 @@ dependencies = [ "retain_mut", ] +[[package]] +name = "rstar" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d535e658ada8c1987a113e5261f8b907f721b2854d666e72820671481b7ee125" +dependencies = [ + "heapless", + "num-traits", + "pdqselect", + "serde", + "smallvec", +] + [[package]] name = "rustc-demangle" version = "0.1.21" @@ -2762,6 +2831,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "standback" version = "0.2.17" @@ -3265,12 +3340,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec-utils" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dac984aa016c26ef4ed7b2c30d6a1bd570fd40a078caccaf6415a2ac5d96161" - [[package]] name = "vec_map" version = "0.8.2" diff --git a/meilisearch-error/src/lib.rs b/meilisearch-error/src/lib.rs index 9d5b79f69..2e4b50ef6 100644 --- a/meilisearch-error/src/lib.rs +++ b/meilisearch-error/src/lib.rs @@ -1,7 +1,7 @@ use std::fmt; use actix_http::http::StatusCode; -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; pub trait ErrorCode: std::error::Error { fn error_code(&self) -> Code; @@ -71,6 +71,8 @@ pub enum Code { BadRequest, DocumentNotFound, Internal, + InvalidGeoField, + InvalidRankingRule, InvalidToken, MissingAuthorizationHeader, NotFound, @@ -108,6 +110,8 @@ impl Code { PrimaryKeyAlreadyPresent => { ErrCode::invalid("primary_key_already_present", StatusCode::BAD_REQUEST) } + // invalid ranking rule + InvalidRankingRule => ErrCode::invalid("invalid_request", StatusCode::BAD_REQUEST), // invalid document MaxFieldsLimitExceeded => { @@ -126,6 +130,9 @@ impl Code { BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND), Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR), + InvalidGeoField => { + ErrCode::authentication("invalid_geo_field", StatusCode::BAD_REQUEST) + } InvalidToken => ErrCode::authentication("invalid_token", StatusCode::FORBIDDEN), MissingAuthorizationHeader => { ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED) diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index eb3d550ab..f3c11e29f 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -49,7 +49,7 @@ meilisearch-lib = { path = "../meilisearch-lib" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", rev = "6de1b41" } +milli = { git = "https://github.com/meilisearch/milli.git", version = "0.16.0" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index c18c32ea5..52538c862 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -74,13 +74,11 @@ impl ErrorCode for MilliError<'_> { milli::Error::UserError(ref error) => { match error { // TODO: wait for spec for new error codes. - | UserError::SerdeJson(_) + UserError::SerdeJson(_) | UserError::MaxDatabaseSizeReached - | UserError::InvalidCriterionName { .. } | UserError::InvalidDocumentId { .. } | UserError::InvalidStoreFile | UserError::NoSpaceLeftOnDevice - | UserError::InvalidAscDescSyntax { .. } | UserError::DocumentLimitReached => Code::Internal, UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::Filter, @@ -93,7 +91,10 @@ impl ErrorCode for MilliError<'_> { UserError::SortRankingRuleMissing => Code::Sort, UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, - UserError::InvalidSortableAttribute { .. } => Code::Sort, + UserError::InvalidGeoField { .. } => Code::InvalidGeoField, + UserError::InvalidSortableAttribute { .. } + | UserError::InvalidReservedSortName { .. } => Code::Sort, + UserError::CriterionError(_) => Code::BadRequest, } } } diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 1ae8eb2f7..c7e987840 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -1,7 +1,7 @@ use actix_web::{web, HttpResponse}; use log::debug; +use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use meilisearch_lib::MeiliSearch; -use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use serde::Deserialize; use serde_json::Value; @@ -61,9 +61,7 @@ impl From for SearchQuery { None => None, }; - let sort = other - .sort - .map(|attrs| attrs.split(',').map(String::from).collect()); + let sort = other.sort.map(|attr| fix_sort_query_parameters(&attr)); Self { q: other.q, @@ -81,6 +79,30 @@ impl From for SearchQuery { } } +/// Transform the sort query parameter into something that matches the post expected format. +fn fix_sort_query_parameters(sort_query: &str) -> Vec { + let mut sort_parameters = Vec::new(); + let mut merge = false; + for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) { + if current_sort.starts_with("_geoPoint(") { + sort_parameters.push(current_sort.to_string()); + merge = true; + } else if merge && !sort_parameters.is_empty() { + sort_parameters + .last_mut() + .unwrap() + .push_str(&format!(",{}", current_sort)); + if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") { + merge = false; + } + } else { + sort_parameters.push(current_sort.to_string()); + merge = false; + } + } + sort_parameters +} + pub async fn search_with_url_query( meilisearch: GuardedData, path: web::Path, @@ -88,7 +110,9 @@ pub async fn search_with_url_query( ) -> Result { debug!("called with params: {:?}", params); let query = params.into_inner().into(); - let search_result = meilisearch.search(path.into_inner().index_uid, query).await?; + let search_result = meilisearch + .search(path.into_inner().index_uid, query) + .await?; // Tests that the nb_hits is always set to false #[cfg(test)] @@ -115,3 +139,42 @@ pub async fn search_with_post( debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_fix_sort_query_parameters() { + let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc"); + assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]); + let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc"); + assert_eq!( + sort, + vec![ + "doggo:asc".to_string(), + "_geoPoint(12.45,13.56):desc".to_string(), + ] + ); + let sort = fix_sort_query_parameters( + "doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc", + ); + assert_eq!( + sort, + vec![ + "doggo:asc".to_string(), + "_geoPoint(12.45,13.56,2590352):desc".to_string(), + "catto:desc".to_string(), + ] + ); + let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc"); + // This is ugly but eh, I don't want to write a full parser just for this unused route + assert_eq!( + sort, + vec![ + "doggo:asc".to_string(), + "_geoPoint(1,2),catto:desc".to_string(), + ] + ); + } +} diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 0d9f6520b..78cbd1b96 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -27,12 +27,13 @@ heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } http = "0.2.4" indexmap = { version = "1.7.0", features = ["serde-1"] } itertools = "0.10.1" +lazy_static = "1.4.0" log = "0.4.14" main_error = "0.1.1" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", rev = "6de1b41" } +milli = { git = "https://github.com/meilisearch/milli.git", version = "0.16.0" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs index 80141dae5..a369381fe 100644 --- a/meilisearch-lib/src/error.rs +++ b/meilisearch-lib/src/error.rs @@ -35,13 +35,11 @@ impl ErrorCode for MilliError<'_> { milli::Error::UserError(ref error) => { match error { // TODO: wait for spec for new error codes. - | UserError::SerdeJson(_) + UserError::SerdeJson(_) | UserError::MaxDatabaseSizeReached - | UserError::InvalidCriterionName { .. } | UserError::InvalidDocumentId { .. } | UserError::InvalidStoreFile | UserError::NoSpaceLeftOnDevice - | UserError::InvalidAscDescSyntax { .. } | UserError::DocumentLimitReached => Code::Internal, UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::Filter, @@ -54,7 +52,10 @@ impl ErrorCode for MilliError<'_> { UserError::SortRankingRuleMissing => Code::Sort, UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, - UserError::InvalidSortableAttribute { .. } => Code::Sort, + UserError::InvalidSortableAttribute { .. } + | UserError::InvalidReservedSortName { .. } => Code::Sort, + UserError::CriterionError(_) => Code::InvalidRankingRule, + UserError::InvalidGeoField { .. } => Code::InvalidGeoField, } } } diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index c7949fea6..37f4e2a33 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -6,9 +6,12 @@ use either::Either; use heed::RoTxn; use indexmap::IndexMap; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; -use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, UserError}; +use milli::{ + AscDesc, AscDescError, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, UserError, +}; +use regex::Regex; use serde::{Deserialize, Serialize}; -use serde_json::Value; +use serde_json::{json, Value}; use crate::index::error::FacetError; use crate::index::IndexError; @@ -110,12 +113,16 @@ impl Index { if let Some(ref sort) = query.sort { let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { Ok(sorts) => sorts, - Err(UserError::InvalidAscDescSyntax { name }) => { + Err(AscDescError::InvalidSyntax { name }) => { return Err(IndexError::Milli( UserError::InvalidSortName { name }.into(), )) } - Err(err) => return Err(IndexError::Milli(err.into())), + Err(AscDescError::ReservedKeyword { name }) => { + return Err(IndexError::Milli( + UserError::InvalidReservedSortName { name }.into(), + )) + } }; search.sort_criteria(sort); @@ -193,7 +200,7 @@ impl Index { let documents_iter = self.documents(&rtxn, documents_ids)?; for (_id, obkv) in documents_iter { - let document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; + let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; let matches_info = query .matches @@ -207,6 +214,10 @@ impl Index { &formatted_options, )?; + if let Some(sort) = query.sort.as_ref() { + insert_geo_distance(sort, &mut document); + } + let hit = SearchHit { document, formatted, @@ -247,6 +258,25 @@ impl Index { } } +fn insert_geo_distance(sorts: &[String], document: &mut Document) { + lazy_static::lazy_static! { + static ref GEO_REGEX: Regex = + Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap(); + }; + if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) { + // TODO: TAMO: milli encountered an internal error, what do we want to do? + let base = [ + capture_group[1].parse().unwrap(), + capture_group[2].parse().unwrap(), + ]; + let geo_point = &document.get("_geo").unwrap_or(&json!(null)); + if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) { + let distance = milli::distance_between_two_points(&base, &[lat, lng]); + document.insert("_geoDistance".to_string(), json!(distance.round() as usize)); + } + } +} + fn compute_matches>( matcher: &impl Matcher, document: &Document, @@ -1332,4 +1362,65 @@ mod test { r##"{"about": [MatchInfo { start: 0, length: 6 }, MatchInfo { start: 31, length: 7 }, MatchInfo { start: 191, length: 7 }, MatchInfo { start: 225, length: 7 }, MatchInfo { start: 233, length: 6 }], "color": [MatchInfo { start: 0, length: 3 }]}"## ); } + + #[test] + fn test_insert_geo_distance() { + let value: Document = serde_json::from_str( + r#"{ + "_geo": { + "lat": 50.629973371633746, + "lng": 3.0569447399419567 + }, + "city": "Lille", + "id": "1" + }"#, + ) + .unwrap(); + + let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + let sorters = + &["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + let sorters = &[ + "prix:asc", + "villeneuve:desc", + "_geoPoint(50.629973371633746, 3.0569447399419567):asc", + "ubu:asc", + ] + .map(|s| s.to_string()); + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + // only the first geoPoint is used to compute the distance + let sorters = &[ + "chien:desc", + "_geoPoint(50.629973371633746, 3.0569447399419567):asc", + "pangolin:desc", + "_geoPoint(100.0, -80.0):asc", + "chat:asc", + ] + .map(|s| s.to_string()); + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + // there was no _geoPoint so nothing is inserted in the document + let sorters = &["chien:asc".to_string()]; + let mut document = value; + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), None); + } } From 654f49ccecf1f33bced6cf2cc61dd4691297d4ca Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 28 Sep 2021 14:48:15 +0200 Subject: [PATCH 15/37] [WIP] put milli on branch main --- Cargo.lock | 2 +- meilisearch-http/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b97e712f8..0a97f899e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1830,7 +1830,7 @@ dependencies = [ [[package]] name = "milli" version = "0.16.0" -source = "git+https://github.com/meilisearch/milli.git#0f8320bdc24d76781e596d96d3b2e788a55655c6" +source = "git+https://github.com/meilisearch/milli.git?branch=main#b2a332599ebdbf492360ddd3e98c3a14fb84608e" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index f3c11e29f..d9f030122 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -49,7 +49,7 @@ meilisearch-lib = { path = "../meilisearch-lib" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", version = "0.16.0" } +milli = { git = "https://github.com/meilisearch/milli.git", branch = "main" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 78cbd1b96..54cedd6d5 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -33,7 +33,7 @@ main_error = "0.1.1" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", version = "0.16.0" } +milli = { git = "https://github.com/meilisearch/milli.git", branch = "main" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" From 539a57026deb5d40362246d2129b2569a1ffbeed Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 28 Sep 2021 14:49:13 +0200 Subject: [PATCH 16/37] fix the sort error messages --- meilisearch-http/src/error.rs | 7 +++---- meilisearch-lib/src/error.rs | 5 ++--- meilisearch-lib/src/index/search.rs | 14 ++++---------- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 52538c862..00229960e 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -83,7 +83,6 @@ impl ErrorCode for MilliError<'_> { UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::Filter, UserError::InvalidFilterAttribute(_) => Code::Filter, - UserError::InvalidSortName { .. } => Code::Sort, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::MissingPrimaryKey => Code::MissingPrimaryKey, UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, @@ -92,9 +91,9 @@ impl ErrorCode for MilliError<'_> { UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, UserError::InvalidGeoField { .. } => Code::InvalidGeoField, - UserError::InvalidSortableAttribute { .. } - | UserError::InvalidReservedSortName { .. } => Code::Sort, - UserError::CriterionError(_) => Code::BadRequest, + UserError::InvalidSortableAttribute { .. } => Code::Sort, + UserError::SortError(_) => Code::Sort, + UserError::CriterionError(_) => Code::InvalidRankingRule, } } } diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs index a369381fe..d29c18d25 100644 --- a/meilisearch-lib/src/error.rs +++ b/meilisearch-lib/src/error.rs @@ -44,7 +44,6 @@ impl ErrorCode for MilliError<'_> { UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::Filter, UserError::InvalidFilterAttribute(_) => Code::Filter, - UserError::InvalidSortName { .. } => Code::Sort, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::MissingPrimaryKey => Code::MissingPrimaryKey, UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, @@ -52,10 +51,10 @@ impl ErrorCode for MilliError<'_> { UserError::SortRankingRuleMissing => Code::Sort, UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, - UserError::InvalidSortableAttribute { .. } - | UserError::InvalidReservedSortName { .. } => Code::Sort, + UserError::InvalidSortableAttribute { .. } => Code::Sort, UserError::CriterionError(_) => Code::InvalidRankingRule, UserError::InvalidGeoField { .. } => Code::InvalidGeoField, + UserError::SortError(_) => Code::Sort, } } } diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index 37f4e2a33..edc672897 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -7,7 +7,8 @@ use heed::RoTxn; use indexmap::IndexMap; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{ - AscDesc, AscDescError, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, UserError, + AscDesc, AscDescError, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, SortError, + UserError, }; use regex::Regex; use serde::{Deserialize, Serialize}; @@ -113,15 +114,8 @@ impl Index { if let Some(ref sort) = query.sort { let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { Ok(sorts) => sorts, - Err(AscDescError::InvalidSyntax { name }) => { - return Err(IndexError::Milli( - UserError::InvalidSortName { name }.into(), - )) - } - Err(AscDescError::ReservedKeyword { name }) => { - return Err(IndexError::Milli( - UserError::InvalidReservedSortName { name }.into(), - )) + Err(asc_desc_error) => { + return Err(IndexError::Milli(SortError::from(asc_desc_error).into())) } }; From bcaee4d179939126584e077447e00facd1ca4dae Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 12:05:22 +0200 Subject: [PATCH 17/37] fix uuid store size --- meilisearch-lib/src/index_controller/index_resolver/mod.rs | 5 ++--- meilisearch-lib/src/index_controller/mod.rs | 7 +------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index dcd1ed512..eea8e7a2c 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -70,9 +70,8 @@ where U: UuidStore, Ok(indexes) } - pub async fn get_size(&self) -> Result { - todo!() - //Ok(self.index_store.get_size()? + self.index_uuid_store.get_size().await?) + pub async fn get_uuids_size(&self) -> Result { + Ok(self.index_uuid_store.get_size().await?) } pub async fn snapshot(&self, path: impl AsRef) -> Result> { diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 29f5348b1..325082074 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -499,11 +499,6 @@ impl IndexController { Ok(meta) } - pub async fn get_uuids_size(&self) -> Result { - let size = self.index_resolver.get_size().await?; - Ok(size) - } - pub async fn get_index_stats(&self, uid: String) -> Result { let update_infos = UpdateMsg::get_info(&self.update_sender).await?; let index = self.index_resolver.get_index(uid).await?; @@ -516,7 +511,7 @@ impl IndexController { pub async fn get_all_stats(&self) -> Result { let update_infos = UpdateMsg::get_info(&self.update_sender).await?; - let mut database_size = self.get_uuids_size().await? + update_infos.size; + let mut database_size = self.index_resolver.get_uuids_size().await? + update_infos.size; let mut last_update: Option> = None; let mut indexes = BTreeMap::new(); From ddfd7def358d82267a52d47d11f63359610c98f1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 28 Sep 2021 18:10:45 +0200 Subject: [PATCH 18/37] add a TODO while waiting for the tests to be fixed --- meilisearch-http/src/routes/indexes/search.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index c7e987840..9cbdcf2c0 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -79,6 +79,8 @@ impl From for SearchQuery { } } +// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things + /// Transform the sort query parameter into something that matches the post expected format. fn fix_sort_query_parameters(sort_query: &str) -> Vec { let mut sort_parameters = Vec::new(); From 692c6766257d7cffbdb01dc69c5468c238d8ed27 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 18:10:09 +0200 Subject: [PATCH 19/37] fix tests --- Cargo.lock | 1 - meilisearch-http/Cargo.toml | 2 +- meilisearch-http/src/helpers/mod.rs | 1 - meilisearch-http/src/lib.rs | 50 ++++ meilisearch-http/src/main.rs | 55 +--- meilisearch-http/src/option.rs | 62 ----- .../src/routes/indexes/documents.rs | 10 +- meilisearch-http/src/routes/indexes/mod.rs | 34 +-- .../src/routes/indexes/settings.rs | 8 +- meilisearch-http/src/routes/mod.rs | 9 +- meilisearch-http/tests/common/server.rs | 32 ++- meilisearch-http/tests/common/service.rs | 19 +- .../tests/documents/add_documents.rs | 6 +- .../tests/settings/get_settings.rs | 8 +- meilisearch-http/tests/snapshot/mod.rs | 6 +- meilisearch-lib/Cargo.toml | 3 +- meilisearch-lib/src/index/search.rs | 3 +- .../src/index_controller/dump_actor/mod.rs | 16 +- .../index_controller/index_resolver/mod.rs | 16 +- meilisearch-lib/src/index_controller/mod.rs | 186 +++---------- .../src/index_controller/snapshot.rs | 24 +- .../src/index_controller/updates/message.rs | 9 +- .../src/index_controller/updates/mod.rs | 2 +- .../src/index_controller/updates/store/mod.rs | 244 +++++++++--------- 24 files changed, 325 insertions(+), 481 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a97f899e..e763ecaa5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1830,7 +1830,6 @@ dependencies = [ [[package]] name = "milli" version = "0.16.0" -source = "git+https://github.com/meilisearch/milli.git?branch=main#b2a332599ebdbf492360ddd3e98c3a14fb84608e" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index d9f030122..085a40b09 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -49,7 +49,7 @@ meilisearch-lib = { path = "../meilisearch-lib" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", branch = "main" } +milli = { path = "../../milli/milli" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" diff --git a/meilisearch-http/src/helpers/mod.rs b/meilisearch-http/src/helpers/mod.rs index 0b72c3694..3908c440c 100644 --- a/meilisearch-http/src/helpers/mod.rs +++ b/meilisearch-http/src/helpers/mod.rs @@ -1,4 +1,3 @@ -//pub mod compression; mod env; pub use env::EnvSizer; diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 307bbcefa..481d38e1c 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -47,6 +47,9 @@ pub mod analytics; pub mod helpers; pub mod option; pub mod routes; +use std::path::Path; +use std::time::Duration; + use crate::extractors::authentication::AuthConfig; pub use option::Opt; @@ -81,6 +84,53 @@ impl ApiKeys { } } +pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result { + let mut meilisearch = MeiliSearch::builder(); + meilisearch + .set_max_index_size(opt.max_index_size.get_bytes() as usize) + .set_max_update_store_size(opt.max_udb_size.get_bytes() as usize) + .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) + .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) + .set_dump_dst(opt.dumps_dir.clone()) + .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) + .set_snapshot_dir(opt.snapshot_dir.clone()); + + if let Some(ref path) = opt.import_snapshot { + meilisearch.set_import_snapshot(path.clone()); + } + + if let Some(ref path) = opt.import_dump { + meilisearch.set_dump_src(path.clone()); + } + + if opt.schedule_snapshot { + meilisearch.set_schedule_snapshot(); + } + + meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) +} + +/// Cleans and setup the temporary file folder in the database directory. This must be done after +/// the meilisearch instance has been created, to not interfere with the snapshot and dump loading. +pub fn setup_temp_dir(db_path: impl AsRef) -> anyhow::Result<()> { + // Set the tempfile directory in the current db path, to avoid cross device references. Also + // remove the previous outstanding files found there + // + // TODO: if two processes open the same db, one might delete the other tmpdir. Need to make + // sure that no one is using it before deleting it. + let temp_path = db_path.as_ref().join("tmp"); + // Ignore error if tempdir doesn't exist + let _ = std::fs::remove_dir_all(&temp_path); + std::fs::create_dir_all(&temp_path)?; + if cfg!(windows) { + std::env::set_var("TMP", temp_path); + } else { + std::env::set_var("TMPDIR", temp_path); + } + + Ok(()) +} + pub fn configure_data( config: &mut web::ServiceConfig, data: MeiliSearch, diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index dfa4bcc2d..3c7a34ddf 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,7 +1,7 @@ -use std::{env, path::Path, time::Duration}; +use std::env; use actix_web::HttpServer; -use meilisearch_http::{create_app, Opt}; +use meilisearch_http::{Opt, create_app, setup_meilisearch}; use meilisearch_lib::MeiliSearch; use structopt::StructOpt; @@ -27,53 +27,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { Ok(()) } -/// Cleans and setup the temporary file folder in the database directory. This must be done after -/// the meilisearch instance has been created, to not interfere with the snapshot and dump loading. -fn setup_temp_dir(db_path: impl AsRef) -> anyhow::Result<()> { - // Set the tempfile directory in the current db path, to avoid cross device references. Also - // remove the previous outstanding files found there - // - // TODO: if two processes open the same db, one might delete the other tmpdir. Need to make - // sure that no one is using it before deleting it. - let temp_path = db_path.as_ref().join("tmp"); - // Ignore error if tempdir doesn't exist - let _ = std::fs::remove_dir_all(&temp_path); - std::fs::create_dir_all(&temp_path)?; - if cfg!(windows) { - std::env::set_var("TMP", temp_path); - } else { - std::env::set_var("TMPDIR", temp_path); - } - - Ok(()) -} - -fn setup_meilisearch(opt: &Opt) -> anyhow::Result { - let mut meilisearch = MeiliSearch::builder(); - meilisearch - .set_max_index_size(opt.max_index_size.get_bytes() as usize) - .set_max_update_store_size(opt.max_udb_size.get_bytes() as usize) - .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) - .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) - .set_dump_dst(opt.dumps_dir.clone()) - .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) - .set_snapshot_dir(opt.snapshot_dir.clone()); - - if let Some(ref path) = opt.import_snapshot { - meilisearch.set_import_snapshot(path.clone()); - } - - if let Some(ref path) = opt.import_dump { - meilisearch.set_dump_src(path.clone()); - } - - if opt.schedule_snapshot { - meilisearch.set_schedule_snapshot(); - } - - meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) -} - #[actix_web::main] async fn main() -> anyhow::Result<()> { let opt = Opt::from_args(); @@ -92,7 +45,9 @@ async fn main() -> anyhow::Result<()> { let meilisearch = setup_meilisearch(&opt)?; - setup_temp_dir(&opt.db_path)?; + // Setup the temp directory to be in the db folder. This is important, since temporary file + // don't support to be persisted accross filesystem boundaries. + meilisearch_http::setup_temp_dir(&opt.db_path)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] if !opt.no_analytics { diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 2a4d425e9..72fbeab44 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -1,9 +1,5 @@ -use byte_unit::ByteError; -use std::fmt; use std::io::{BufReader, Read}; -use std::ops::Deref; use std::path::PathBuf; -use std::str::FromStr; use std::sync::Arc; use std::fs; @@ -14,7 +10,6 @@ use rustls::{ RootCertStore, }; use structopt::StructOpt; -use sysinfo::{RefreshKind, System, SystemExt}; use meilisearch_lib::options::IndexerOpts; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; @@ -177,63 +172,6 @@ impl Opt { } } -/// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy)] -pub struct MaxMemory(Option); - -impl FromStr for MaxMemory { - type Err = ByteError; - - fn from_str(s: &str) -> Result { - Byte::from_str(s).map(Some).map(MaxMemory) - } -} - -impl Default for MaxMemory { - fn default() -> MaxMemory { - MaxMemory( - total_memory_bytes() - .map(|bytes| bytes * 2 / 3) - .map(Byte::from_bytes), - ) - } -} - -impl fmt::Display for MaxMemory { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), - None => f.write_str("unknown"), - } - } -} - -impl Deref for MaxMemory { - type Target = Option; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl MaxMemory { - pub fn unlimited() -> Self { - Self(None) - } -} - -/// Returns the total amount of bytes available or `None` if this system isn't supported. -fn total_memory_bytes() -> Option { - if System::IS_SUPPORTED { - let memory_kind = RefreshKind::new().with_memory(); - let mut system = System::new_with_specifics(memory_kind); - system.refresh_memory(); - Some(system.total_memory() * 1024) // KiB into bytes - } else { - None - } -} - fn load_certs(filename: PathBuf) -> anyhow::Result> { let certfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; let mut reader = BufReader::new(certfile); diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index ee86e12ad..f7aa4f485 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -106,7 +106,7 @@ pub async fn delete_document( ) -> Result { let DocumentParam { document_id, index_uid } = path.into_inner(); let update = Update::DeleteDocuments(vec![document_id]); - let update_status = meilisearch.register_update(index_uid, update).await?; + let update_status = meilisearch.register_update(index_uid, update, false).await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } @@ -170,7 +170,7 @@ pub async fn add_documents( format: DocumentAdditionFormat::Json, }; let update_status = meilisearch - .register_update(path.into_inner().index_uid, update) + .register_update(path.into_inner().index_uid, update, true) .await?; debug!("returns: {:?}", update_status); @@ -193,7 +193,7 @@ pub async fn update_documents( format: DocumentAdditionFormat::Json, }; let update_status = meilisearch - .register_update(path.into_inner().index_uid, update) + .register_update(path.into_inner().index_uid, update, true) .await?; debug!("returns: {:?}", update_status); @@ -216,7 +216,7 @@ pub async fn delete_documents( .collect(); let update = Update::DeleteDocuments(ids); - let update_status = meilisearch.register_update(path.into_inner().index_uid, update).await?; + let update_status = meilisearch.register_update(path.into_inner().index_uid, update, false).await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } @@ -226,7 +226,7 @@ pub async fn clear_all_documents( path: web::Path, ) -> Result { let update = Update::ClearDocuments; - let update_status = meilisearch.register_update(path.into_inner().index_uid, update).await?; + let update_status = meilisearch.register_update(path.into_inner().index_uid, update, false).await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 59ad6fa0f..b10b5a004 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -18,7 +18,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") .route(web::get().to(list_indexes)) - //.route(web::post().to(create_index)), + .route(web::post().to(create_index)), ) .service( web::scope("/{index_uid}") @@ -26,7 +26,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { web::resource("") .route(web::get().to(get_index)) .route(web::put().to(update_index)) - //.route(web::delete().to(delete_index)), + .route(web::delete().to(delete_index)), ) .service(web::resource("/stats").route(web::get().to(get_index_stats))) .service(web::scope("/documents").configure(documents::configure)) @@ -49,14 +49,14 @@ pub struct IndexCreateRequest { primary_key: Option, } -//pub async fn create_index( - //data: GuardedData, - //body: web::Json, -//) -> Result { - //let body = body.into_inner(); - //let meta = data.create_index(body.uid, body.primary_key).await?; - //Ok(HttpResponse::Created().json(meta)) -//} +pub async fn create_index( + meilisearch: GuardedData, + body: web::Json, +) -> Result { + let body = body.into_inner(); + let meta = meilisearch.create_index(body.uid, body.primary_key).await?; + Ok(HttpResponse::Created().json(meta)) +} #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] @@ -102,13 +102,13 @@ pub async fn update_index( Ok(HttpResponse::Ok().json(meta)) } -//pub async fn delete_index( - //data: GuardedData, - //path: web::Path, -//) -> Result { - //data.delete_index(path.index_uid.clone()).await?; - //Ok(HttpResponse::NoContent().finish()) -//} +pub async fn delete_index( + meilisearch: GuardedData, + path: web::Path, +) -> Result { + meilisearch.delete_index(path.index_uid.clone()).await?; + Ok(HttpResponse::NoContent().finish()) +} pub async fn get_index_stats( meilisearch: GuardedData, diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 180be4108..4a1e26426 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -30,7 +30,7 @@ macro_rules! make_setting_route { ..Default::default() }; let update = Update::Settings(settings); - let update_status = meilisearch.register_update(index_uid.into_inner(), update).await?; + let update_status = meilisearch.register_update(index_uid.into_inner(), update, false).await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } @@ -49,7 +49,7 @@ macro_rules! make_setting_route { }; let update = Update::Settings(settings); - let update_status = meilisearch.register_update(index_uid.into_inner(), update).await?; + let update_status = meilisearch.register_update(index_uid.into_inner(), update, true).await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } @@ -159,7 +159,7 @@ pub async fn update_all( let update = Update::Settings(settings); let update_result = meilisearch - .register_update(index_uid.into_inner(), update) + .register_update(index_uid.into_inner(), update, true) .await?; let json = serde_json::json!({ "updateId": update_result.id() }); debug!("returns: {:?}", json); @@ -183,7 +183,7 @@ pub async fn delete_all( let update = Update::Settings(settings.into_unchecked()); let update_result = data - .register_update(index_uid.into_inner(), update) + .register_update(index_uid.into_inner(), update, false) .await?; let json = serde_json::json!({ "updateId": update_result.id() }); debug!("returns: {:?}", json); diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 9d99a7d0c..3066b3492 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -280,18 +280,17 @@ pub async fn get_health() -> Result { #[cfg(test)] mod test { use super::*; - use crate::data::Data; use crate::extractors::authentication::GuardedData; /// A type implemented for a route that uses a authentication policy `Policy`. /// /// This trait is used for regression testing of route authenticaton policies. - trait Is {} + trait Is {} macro_rules! impl_is_policy { ($($param:ident)*) => { - impl Is for Func - where Func: Fn(GuardedData, $($param,)*) -> Res {} + impl Is for Func + where Func: Fn(GuardedData, $($param,)*) -> Res {} }; } @@ -310,7 +309,7 @@ mod test { ($($policy:ident => { $($route:expr,)*})*) => { #[test] fn test_auth() { - $($(let _: &dyn Is<$policy, _> = &$route;)*)* + $($(let _: &dyn Is<$policy, _, _> = &$route;)*)* } }; } diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 5fb423ffd..ef2e51355 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -2,12 +2,14 @@ use std::path::Path; use actix_web::http::StatusCode; use byte_unit::{Byte, ByteUnit}; +use meilisearch_http::setup_meilisearch; +use meilisearch_lib::options::{IndexerOpts, MaxMemory}; +use once_cell::sync::Lazy; use serde_json::Value; -use tempdir::TempDir; +use tempfile::TempDir; use urlencoding::encode; -use meilisearch_http::data::Data; -use meilisearch_http::option::{IndexerOpts, MaxMemory, Opt}; +use meilisearch_http::option::Opt; use super::index::Index; use super::service::Service; @@ -15,17 +17,25 @@ use super::service::Service; pub struct Server { pub service: Service, // hold ownership to the tempdir while we use the server instance. - _dir: Option, + _dir: Option, } +static TEST_TEMP_DIR: Lazy = Lazy::new(|| TempDir::new().unwrap()); + impl Server { pub async fn new() -> Self { - let dir = TempDir::new("meilisearch").unwrap(); + let dir = TempDir::new().unwrap(); - let opt = default_settings(dir.path()); + if cfg!(windows) { + std::env::set_var("TMP", TEST_TEMP_DIR.path()); + } else { + std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()); + } - let data = Data::new(opt).unwrap(); - let service = Service(data); + let options = default_settings(dir.path()); + + let meilisearch = setup_meilisearch(&options).unwrap(); + let service = Service { meilisearch, options }; Server { service, @@ -33,9 +43,9 @@ impl Server { } } - pub async fn new_with_options(opt: Opt) -> Self { - let data = Data::new(opt).unwrap(); - let service = Service(data); + pub async fn new_with_options(options: Opt) -> Self { + let meilisearch = setup_meilisearch(&options).unwrap(); + let service = Service { meilisearch, options }; Server { service, diff --git a/meilisearch-http/tests/common/service.rs b/meilisearch-http/tests/common/service.rs index 08db5b9dc..1450a6dd9 100644 --- a/meilisearch-http/tests/common/service.rs +++ b/meilisearch-http/tests/common/service.rs @@ -1,14 +1,17 @@ use actix_web::{http::StatusCode, test}; +use meilisearch_lib::MeiliSearch; use serde_json::Value; -use meilisearch_http::create_app; -use meilisearch_http::data::Data; +use meilisearch_http::{Opt, create_app}; -pub struct Service(pub Data); +pub struct Service { + pub meilisearch: MeiliSearch, + pub options: Opt, +} impl Service { pub async fn post(&self, url: impl AsRef, body: Value) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.0, true)).await; + let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; let req = test::TestRequest::post() .uri(url.as_ref()) @@ -28,7 +31,7 @@ impl Service { url: impl AsRef, body: impl AsRef, ) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.0, true)).await; + let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; let req = test::TestRequest::post() .uri(url.as_ref()) @@ -44,7 +47,7 @@ impl Service { } pub async fn get(&self, url: impl AsRef) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.0, true)).await; + let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; let req = test::TestRequest::get().uri(url.as_ref()).to_request(); let res = test::call_service(&app, req).await; @@ -56,7 +59,7 @@ impl Service { } pub async fn put(&self, url: impl AsRef, body: Value) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.0, true)).await; + let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; let req = test::TestRequest::put() .uri(url.as_ref()) @@ -71,7 +74,7 @@ impl Service { } pub async fn delete(&self, url: impl AsRef) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.0, true)).await; + let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; let req = test::TestRequest::delete().uri(url.as_ref()).to_request(); let res = test::call_service(&app, req).await; diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 4c94cf194..a0436c67d 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -16,7 +16,7 @@ async fn add_documents_test_json_content_types() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!(&server.service.0, true)).await; + let app = test::init_service(create_app!(&server.service.meilisearch, true, &server.service.options)).await; let req = test::TestRequest::post() .uri("/indexes/dog/documents") .set_payload(document.to_string()) @@ -41,7 +41,7 @@ async fn add_documents_test_no_content_types() { ]); let server = Server::new().await; - let app = test::init_service(create_app!(&server.service.0, true)).await; + let app = test::init_service(create_app!(&server.service.meilisearch, true, &server.service.options)).await; let req = test::TestRequest::post() .uri("/indexes/dog/documents") .set_payload(document.to_string()) @@ -67,7 +67,7 @@ async fn add_documents_test_bad_content_types() { ]); let server = Server::new().await; - let app = test::init_service(create_app!(&server.service.0, true)).await; + let app = test::init_service(create_app!(&server.service.meilisearch, true, &server.service.options)).await; let req = test::TestRequest::post() .uri("/indexes/dog/documents") .set_payload(document.to_string()) diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index c7bcd4376..37ea6bc82 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -30,8 +30,8 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| #[actix_rt::test] async fn get_settings_unexisting_index() { let server = Server::new().await; - let (_response, code) = server.index("test").settings().await; - assert_eq!(code, 404) + let (response, code) = server.index("test").settings().await; + assert_eq!(code, 404, "{}", response) } #[actix_rt::test] @@ -167,8 +167,8 @@ async fn update_setting_unexisting_index() { async fn update_setting_unexisting_index_invalid_uid() { let server = Server::new().await; let index = server.index("test##! "); - let (_response, code) = index.update_settings(json!({})).await; - assert_eq!(code, 400); + let (response, code) = index.update_settings(json!({})).await; + assert_eq!(code, 400, "{}", response); } macro_rules! test_setting_routes { diff --git a/meilisearch-http/tests/snapshot/mod.rs b/meilisearch-http/tests/snapshot/mod.rs index b5602c508..fb6713779 100644 --- a/meilisearch-http/tests/snapshot/mod.rs +++ b/meilisearch-http/tests/snapshot/mod.rs @@ -9,8 +9,8 @@ use meilisearch_http::Opt; #[actix_rt::test] async fn perform_snapshot() { - let temp = tempfile::tempdir_in(".").unwrap(); - let snapshot_dir = tempfile::tempdir_in(".").unwrap(); + let temp = tempfile::tempdir().unwrap(); + let snapshot_dir = tempfile::tempdir().unwrap(); let options = Opt { snapshot_dir: snapshot_dir.path().to_owned(), @@ -29,7 +29,7 @@ async fn perform_snapshot() { sleep(Duration::from_secs(2)).await; - let temp = tempfile::tempdir_in(".").unwrap(); + let temp = tempfile::tempdir().unwrap(); let snapshot_path = snapshot_dir .path() diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 54cedd6d5..446a20fb4 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -33,7 +33,8 @@ main_error = "0.1.1" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", branch = "main" } +#milli = { git = "https://github.com/meilisearch/milli.git", branch = "main" } +milli = { path = "../../milli/milli" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index edc672897..70d0510ac 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -7,8 +7,7 @@ use heed::RoTxn; use indexmap::IndexMap; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{ - AscDesc, AscDescError, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, SortError, - UserError, + AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, SortError }; use regex::Regex; use serde::{Deserialize, Serialize}; diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index c2410107d..802f872cd 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -2,7 +2,6 @@ use std::fs::File; use std::path::{Path, PathBuf}; use std::sync::Arc; -use anyhow::Context; use chrono::{DateTime, Utc}; use log::{info, trace, warn}; #[cfg(test)] @@ -112,22 +111,16 @@ pub fn load_dump( update_db_size: usize, indexer_opts: &IndexerOpts, ) -> anyhow::Result<()> { - let tmp_src = tempfile::tempdir_in(".")?; + let tmp_src = tempfile::tempdir()?; let tmp_src_path = tmp_src.path(); - println!("importing to {}", dst_path.as_ref().display()); crate::from_tar_gz(&src_path, tmp_src_path)?; let meta_path = tmp_src_path.join(META_FILE_NAME); let mut meta_file = File::open(&meta_path)?; let meta: Metadata = serde_json::from_reader(&mut meta_file)?; - let dst_dir = dst_path - .as_ref() - .parent() - .with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?; - - let tmp_dst = tempfile::tempdir_in(dst_dir)?; + let tmp_dst = tempfile::tempdir()?; match meta { Metadata::V1(meta) => { @@ -168,9 +161,8 @@ impl DumpTask { create_dir_all(&self.path).await?; - let path_clone = self.path.clone(); let temp_dump_dir = - tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??; + tokio::task::spawn_blocking(|| tempfile::TempDir::new()).await??; let temp_dump_path = temp_dump_dir.path().to_owned(); let meta = Metadata::new_v2(self.index_db_size, self.update_db_size); @@ -183,7 +175,7 @@ impl DumpTask { UpdateMsg::dump(&self.update_handle, uuids, temp_dump_path.clone()).await?; let dump_path = tokio::task::spawn_blocking(move || -> Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; + let temp_dump_file = tempfile::NamedTempFile::new()?; crate::to_tar_gz(temp_dump_path, temp_dump_file.path()) .map_err(|e| DumpActorError::Internal(e.into()))?; diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index eea8e7a2c..d41c37ac6 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -84,11 +84,14 @@ where U: UuidStore, Ok(indexes) } - pub async fn create_index(&self, uid: String, primary_key: Option) -> Result<(Uuid, Index)> { + pub async fn create_index(&self, uid: String, primary_key: Option) -> Result { + if !is_index_uid_valid(&uid) { + return Err(IndexResolverError::BadlyFormatted(uid)); + } let uuid = Uuid::new_v4(); let index = self.index_store.create(uuid, primary_key).await?; self.index_uuid_store.insert(uid, uuid).await?; - Ok((uuid, index)) + Ok(index) } pub async fn list(&self) -> Result> { @@ -109,11 +112,11 @@ where U: UuidStore, Ok(indexes) } - pub async fn delete_index(&self, uid: String) -> Result<()> { + pub async fn delete_index(&self, uid: String) -> Result { match self.index_uuid_store.delete(uid.clone()).await? { Some(uuid) => { let _ = self.index_store.delete(uuid).await; - Ok(()) + Ok(uuid) } None => Err(IndexResolverError::UnexistingIndex(uid)), } @@ -148,3 +151,8 @@ where U: UuidStore, } } } + +fn is_index_uid_valid(uid: &str) -> bool { + uid.chars() + .all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') +} diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 325082074..0dee6521f 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -23,7 +23,7 @@ use crate::index_controller::index_resolver::create_index_resolver; use crate::index_controller::snapshot::SnapshotService; use crate::options::IndexerOpts; use error::Result; -use crate::index::error::Result as IndexResult; +use crate::index::error::{Result as IndexResult}; use self::dump_actor::load_dump; use self::index_resolver::HardStateIndexResolver; @@ -33,29 +33,15 @@ use self::updates::UpdateMsg; mod dump_actor; pub mod error; -//pub mod indexes; mod snapshot; pub mod update_file_store; pub mod updates; -//mod uuid_resolver; mod index_resolver; pub type Payload = Box< dyn Stream> + Send + Sync + 'static + Unpin, >; -macro_rules! time { - ($e:expr) => { - { - let now = std::time::Instant::now(); - let result = $e; - let elapsed = now.elapsed(); - println!("elapsed at line {}: {}ms ({}ns)", line!(), elapsed.as_millis(), elapsed.as_nanos()); - result - } - }; -} - #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] pub struct IndexMetadata { @@ -260,148 +246,27 @@ impl IndexController { IndexControllerBuilder::default() } - pub async fn register_update(&self, uid: String, update: Update) -> Result { + pub async fn register_update(&self, uid: String, update: Update, create_index: bool) -> Result { match self.index_resolver.get_uuid(uid).await { Ok(uuid) => { let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?; Ok(update_result) } Err(IndexResolverError::UnexistingIndex(name)) => { - let (uuid, _) = self.index_resolver.create_index(name, None).await?; - let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?; - // ignore if index creation fails now, since it may already have been created + if create_index { + let index = self.index_resolver.create_index(name, None).await?; + let update_result = UpdateMsg::update(&self.update_sender, index.uuid, update).await?; + // ignore if index creation fails now, since it may already have been created - Ok(update_result) + Ok(update_result) + } else { + Err(IndexResolverError::UnexistingIndex(name).into()) + } } Err(e) => Err(e.into()), } } - //pub async fn add_documents( - //&self, - //uid: String, - //method: milli::update::IndexDocumentsMethod, - //payload: Payload, - //primary_key: Option, - //) -> Result { - //let perform_update = |uuid| async move { - //let meta = UpdateMeta::DocumentsAddition { - //method, - //primary_key, - //}; - //let (sender, receiver) = mpsc::channel(10); - - //// It is necessary to spawn a local task to send the payload to the update handle to - //// prevent dead_locking between the update_handle::update that waits for the update to be - //// registered and the update_actor that waits for the the payload to be sent to it. - //tokio::task::spawn_local(async move { - //payload - //.for_each(|r| async { - //let _ = sender.send(r).await; - //}) - //.await - //}); - - //// This must be done *AFTER* spawning the task. - //self.update_handle.update(meta, receiver, uuid).await - //}; - - //match self.uuid_resolver.get(uid).await { - //Ok(uuid) => Ok(perform_update(uuid).await?), - //Err(UuidResolverError::UnexistingIndex(name)) => { - //let uuid = Uuid::new_v4(); - //let status = perform_update(uuid).await?; - //// ignore if index creation fails now, since it may already have been created - //let _ = self.index_handle.create_index(uuid, None).await; - //self.uuid_resolver.insert(name, uuid).await?; - //Ok(status) - //} - //Err(e) => Err(e.into()), - //} - //} - - //pub async fn clear_documents(&self, uid: String) -> Result { - //let uuid = self.uuid_resolver.get(uid).await?; - //let meta = UpdateMeta::ClearDocuments; - //let (_, receiver) = mpsc::channel(1); - //let status = self.update_handle.update(meta, receiver, uuid).await?; - //Ok(status) - //} - - //pub async fn delete_documents( - //&self, - //uid: String, - //documents: Vec, - //) -> Result { - //let uuid = self.uuid_resolver.get(uid).await?; - //let meta = UpdateMeta::DeleteDocuments { ids: documents }; - //let (_, receiver) = mpsc::channel(1); - //let status = self.update_handle.update(meta, receiver, uuid).await?; - //Ok(status) - //} - - //pub async fn update_settings( - //&self, - //uid: String, - //settings: Settings, - //create: bool, - //) -> Result { - //let perform_udpate = |uuid| async move { - //let meta = UpdateMeta::Settings(settings.into_unchecked()); - //// Nothing so send, drop the sender right away, as not to block the update actor. - //let (_, receiver) = mpsc::channel(1); - //self.update_handle.update(meta, receiver, uuid).await - //}; - - //match self.uuid_resolver.get(uid).await { - //Ok(uuid) => Ok(perform_udpate(uuid).await?), - //Err(UuidResolverError::UnexistingIndex(name)) if create => { - //let uuid = Uuid::new_v4(); - //let status = perform_udpate(uuid).await?; - //// ignore if index creation fails now, since it may already have been created - //let _ = self.index_handle.create_index(uuid, None).await; - //self.uuid_resolver.insert(name, uuid).await?; - //Ok(status) - //} - //Err(e) => Err(e.into()), - //} - //} - - //pub async fn create_index(&self, index_settings: IndexSettings) -> Result { - //let IndexSettings { uid, primary_key } = index_settings; - //let uid = uid.ok_or(IndexControllerError::MissingUid)?; - //let uuid = Uuid::new_v4(); - //let meta = self.index_handle.create_index(uuid, primary_key).await?; - //self.uuid_resolver.insert(uid.clone(), uuid).await?; - //let meta = IndexMetadata { - //uuid, - //name: uid.clone(), - //uid, - //meta, - //}; - - //Ok(meta) - //} - - //pub async fn delete_index(&self, uid: String) -> Result<()> { - //let uuid = self.uuid_resolver.delete(uid).await?; - - //// We remove the index from the resolver synchronously, and effectively perform the index - //// deletion as a background task. - //let update_handle = self.update_handle.clone(); - //let index_handle = self.index_handle.clone(); - //tokio::spawn(async move { - //if let Err(e) = update_handle.delete(uuid).await { - //error!("Error while deleting index: {}", e); - //} - //if let Err(e) = index_handle.delete(uuid).await { - //error!("Error while deleting index: {}", e); - //} - //}); - - //Ok(()) - //} - pub async fn update_status(&self, uid: String, id: u64) -> Result { let uuid = self.index_resolver.get_uuid(uid).await?; let result = UpdateMsg::get_update(&self.update_sender, uuid, id).await?; @@ -481,8 +346,8 @@ impl IndexController { } pub async fn search(&self, uid: String, query: SearchQuery) -> Result { - let index = time!(self.index_resolver.get_index(uid.clone()).await?); - let result = time!(spawn_blocking(move || time!(index.perform_search(query))).await??); + let index = self.index_resolver.get_index(uid.clone()).await?; + let result = spawn_blocking(move || index.perform_search(query)).await??; Ok(result) } @@ -549,6 +414,33 @@ impl IndexController { pub async fn dump_info(&self, uid: String) -> Result { Ok(self.dump_handle.dump_info(uid).await?) } + + pub async fn create_index(&self, uid: String, primary_key: Option) -> Result { + let index = self.index_resolver.create_index(uid.clone(), primary_key).await?; + let meta = spawn_blocking(move || -> IndexResult<_> { + let meta = index.meta()?; + let meta = IndexMetadata { + uuid: index.uuid, + uid: uid.clone(), + name: uid, + meta, + }; + Ok(meta) + }).await??; + + Ok(meta) + } + + pub async fn delete_index(&self, uid: String) -> Result<()> { + let uuid = self.index_resolver.delete_index(uid).await?; + + let update_sender = self.update_sender.clone(); + tokio::spawn(async move { + let _ = UpdateMsg::delete(&update_sender, uuid).await; + }); + + Ok(()) + } } pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { diff --git a/meilisearch-lib/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs index 132745c96..66bdfe60e 100644 --- a/meilisearch-lib/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -57,7 +57,7 @@ impl SnapshotService { let snapshot_dir = self.snapshot_path.clone(); fs::create_dir_all(&snapshot_dir).await?; let temp_snapshot_dir = - spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??; + spawn_blocking(move || tempfile::tempdir()).await??; let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); let indexes = self @@ -71,12 +71,11 @@ impl SnapshotService { UpdateMsg::snapshot(&self.update_sender, temp_snapshot_path.clone(), indexes).await?; - let snapshot_dir = self.snapshot_path.clone(); let snapshot_path = self .snapshot_path .join(format!("{}.snapshot", self.db_name)); let snapshot_path = spawn_blocking(move || -> anyhow::Result { - let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?; + let temp_snapshot_file = tempfile::NamedTempFile::new()?; let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; temp_snapshot_file.persist(&snapshot_path)?; @@ -137,13 +136,6 @@ pub fn load_snapshot( //use uuid::Uuid; //use super::*; - //use crate::index_controller::index_actor::MockIndexActorHandle; - //use crate::index_controller::updates::{ - //error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl, - //}; - //use crate::index_controller::uuid_resolver::{ - //error::UuidResolverError, MockUuidResolverHandle, - //}; //#[actix_rt::test] //async fn test_normal() { @@ -191,7 +183,7 @@ pub fn load_snapshot( //uuid_resolver //.expect_snapshot() //.times(1) - //abitrary error + ////abitrary error //.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); //let update_handle = MockUpdateActorHandle::new(); @@ -206,7 +198,7 @@ pub fn load_snapshot( //); //assert!(snapshot_service.perform_snapshot().await.is_err()); - //Nothing was written to the file + ////Nothing was written to the file //assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); //} @@ -222,7 +214,7 @@ pub fn load_snapshot( //let mut update_handle = MockUpdateActorHandle::new(); //update_handle //.expect_snapshot() - //abitrary error + ////abitrary error //.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0)))); //let snapshot_path = tempfile::tempdir_in(".").unwrap(); @@ -235,7 +227,7 @@ pub fn load_snapshot( //); //assert!(snapshot_service.perform_snapshot().await.is_err()); - //Nothing was written to the file + ////Nothing was written to the file //assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); //} @@ -244,9 +236,9 @@ pub fn load_snapshot( //let mut uuid_resolver = MockUuidResolverHandle::new(); //uuid_resolver //.expect_snapshot() - //we expect the funtion to be called between 2 and 3 time in the given interval. + ////we expect the funtion to be called between 2 and 3 time in the given interval. //.times(2..4) - //abitrary error, to short-circuit the function + ////abitrary error, to short-circuit the function //.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); //let update_handle = MockUpdateActorHandle::new(); diff --git a/meilisearch-lib/src/index_controller/updates/message.rs b/meilisearch-lib/src/index_controller/updates/message.rs index 22a920e12..3b157e568 100644 --- a/meilisearch-lib/src/index_controller/updates/message.rs +++ b/meilisearch-lib/src/index_controller/updates/message.rs @@ -24,7 +24,7 @@ pub enum UpdateMsg { ret: oneshot::Sender>, id: u64, }, - Delete { + DeleteIndex { uuid: Uuid, ret: oneshot::Sender>, }, @@ -99,4 +99,11 @@ impl UpdateMsg { sender.send(msg).await?; rcv.await? } + + pub async fn delete(sender: &mpsc::Sender, uuid: Uuid) -> Result<()> { + let (ret, rcv) = oneshot::channel(); + let msg = Self::DeleteIndex { ret, uuid }; + sender.send(msg).await?; + rcv.await? + } } diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 733bda8e6..dc707c24b 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -172,7 +172,7 @@ impl UpdateLoop { GetUpdate { uuid, ret, id } => { let _ = ret.send(self.handle_get_update(uuid, id).await); } - Delete { uuid, ret } => { + DeleteIndex { uuid, ret } => { let _ = ret.send(self.handle_delete(uuid).await); } Snapshot { indexes, path, ret } => { diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index 01e7fd989..21bfad61e 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -552,149 +552,149 @@ impl UpdateStore { } } -#[cfg(test)] -mod test { - use super::*; - use crate::index_controller::{ - index_actor::{error::IndexActorError, MockIndexActorHandle}, - UpdateResult, - }; +//#[cfg(test)] +//mod test { + //use super::*; + //use crate::index_controller::{ + //index_actor::{error::IndexActorError, MockIndexActorHandle}, + //UpdateResult, + //}; - use futures::future::ok; + //use futures::future::ok; - #[actix_rt::test] - async fn test_next_id() { - let dir = tempfile::tempdir_in(".").unwrap(); - let mut options = EnvOpenOptions::new(); - let handle = Arc::new(MockIndexActorHandle::new()); - options.map_size(4096 * 100); - let update_store = UpdateStore::open( - options, - dir.path(), - handle, - Arc::new(AtomicBool::new(false)), - ) - .unwrap(); + //#[actix_rt::test] + //async fn test_next_id() { + //let dir = tempfile::tempdir_in(".").unwrap(); + //let mut options = EnvOpenOptions::new(); + //let handle = Arc::new(MockIndexActorHandle::new()); + //options.map_size(4096 * 100); + //let update_store = UpdateStore::open( + //options, + //dir.path(), + //handle, + //Arc::new(AtomicBool::new(false)), + //) + //.unwrap(); - let index1_uuid = Uuid::new_v4(); - let index2_uuid = Uuid::new_v4(); + //let index1_uuid = Uuid::new_v4(); + //let index2_uuid = Uuid::new_v4(); - let mut txn = update_store.env.write_txn().unwrap(); - let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); - txn.commit().unwrap(); - assert_eq!((0, 0), ids); + //let mut txn = update_store.env.write_txn().unwrap(); + //let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); + //txn.commit().unwrap(); + //assert_eq!((0, 0), ids); - let mut txn = update_store.env.write_txn().unwrap(); - let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap(); - txn.commit().unwrap(); - assert_eq!((1, 0), ids); + //let mut txn = update_store.env.write_txn().unwrap(); + //let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap(); + //txn.commit().unwrap(); + //assert_eq!((1, 0), ids); - let mut txn = update_store.env.write_txn().unwrap(); - let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); - txn.commit().unwrap(); - assert_eq!((2, 1), ids); - } + //let mut txn = update_store.env.write_txn().unwrap(); + //let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); + //txn.commit().unwrap(); + //assert_eq!((2, 1), ids); + //} - #[actix_rt::test] - async fn test_register_update() { - let dir = tempfile::tempdir_in(".").unwrap(); - let mut options = EnvOpenOptions::new(); - let handle = Arc::new(MockIndexActorHandle::new()); - options.map_size(4096 * 100); - let update_store = UpdateStore::open( - options, - dir.path(), - handle, - Arc::new(AtomicBool::new(false)), - ) - .unwrap(); - let meta = UpdateMeta::ClearDocuments; - let uuid = Uuid::new_v4(); - let store_clone = update_store.clone(); - tokio::task::spawn_blocking(move || { - store_clone.register_update(meta, None, uuid).unwrap(); - }) - .await - .unwrap(); + //#[actix_rt::test] + //async fn test_register_update() { + //let dir = tempfile::tempdir_in(".").unwrap(); + //let mut options = EnvOpenOptions::new(); + //let handle = Arc::new(MockIndexActorHandle::new()); + //options.map_size(4096 * 100); + //let update_store = UpdateStore::open( + //options, + //dir.path(), + //handle, + //Arc::new(AtomicBool::new(false)), + //) + //.unwrap(); + //let meta = UpdateMeta::ClearDocuments; + //let uuid = Uuid::new_v4(); + //let store_clone = update_store.clone(); + //tokio::task::spawn_blocking(move || { + //store_clone.register_update(meta, None, uuid).unwrap(); + //}) + //.await + //.unwrap(); - let txn = update_store.env.read_txn().unwrap(); - assert!(update_store - .pending_queue - .get(&txn, &(0, uuid, 0)) - .unwrap() - .is_some()); - } + //let txn = update_store.env.read_txn().unwrap(); + //assert!(update_store + //.pending_queue + //.get(&txn, &(0, uuid, 0)) + //.unwrap() + //.is_some()); + //} - #[actix_rt::test] - async fn test_process_update() { - let dir = tempfile::tempdir_in(".").unwrap(); - let mut handle = MockIndexActorHandle::new(); + //#[actix_rt::test] + //async fn test_process_update() { + //let dir = tempfile::tempdir_in(".").unwrap(); + //let mut handle = MockIndexActorHandle::new(); - handle - .expect_update() - .times(2) - .returning(|_index_uuid, processing, _file| { - if processing.id() == 0 { - Box::pin(ok(Ok(processing.process(UpdateResult::Other)))) - } else { - Box::pin(ok(Err( - processing.fail(IndexActorError::ExistingPrimaryKey.into()) - ))) - } - }); + //handle + //.expect_update() + //.times(2) + //.returning(|_index_uuid, processing, _file| { + //if processing.id() == 0 { + //Box::pin(ok(Ok(processing.process(UpdateResult::Other)))) + //} else { + //Box::pin(ok(Err( + //processing.fail(IndexActorError::ExistingPrimaryKey.into()) + //))) + //} + //}); - let handle = Arc::new(handle); + //let handle = Arc::new(handle); - let mut options = EnvOpenOptions::new(); - options.map_size(4096 * 100); - let store = UpdateStore::open( - options, - dir.path(), - handle.clone(), - Arc::new(AtomicBool::new(false)), - ) - .unwrap(); + //let mut options = EnvOpenOptions::new(); + //options.map_size(4096 * 100); + //let store = UpdateStore::open( + //options, + //dir.path(), + //handle.clone(), + //Arc::new(AtomicBool::new(false)), + //) + //.unwrap(); - // wait a bit for the event loop exit. - tokio::time::sleep(std::time::Duration::from_millis(50)).await; + //// wait a bit for the event loop exit. + //tokio::time::sleep(std::time::Duration::from_millis(50)).await; - let mut txn = store.env.write_txn().unwrap(); + //let mut txn = store.env.write_txn().unwrap(); - let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None); - let uuid = Uuid::new_v4(); + //let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None); + //let uuid = Uuid::new_v4(); - store - .pending_queue - .put(&mut txn, &(0, uuid, 0), &update) - .unwrap(); + //store + //.pending_queue + //.put(&mut txn, &(0, uuid, 0), &update) + //.unwrap(); - let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None); + //let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None); - store - .pending_queue - .put(&mut txn, &(1, uuid, 1), &update) - .unwrap(); + //store + //.pending_queue + //.put(&mut txn, &(1, uuid, 1), &update) + //.unwrap(); - txn.commit().unwrap(); + //txn.commit().unwrap(); - // Process the pending, and check that it has been moved to the update databases, and - // removed from the pending database. - let store_clone = store.clone(); - tokio::task::spawn_blocking(move || { - store_clone.process_pending_update(handle.clone()).unwrap(); - store_clone.process_pending_update(handle).unwrap(); - }) - .await - .unwrap(); + //// Process the pending, and check that it has been moved to the update databases, and + //// removed from the pending database. + //let store_clone = store.clone(); + //tokio::task::spawn_blocking(move || { + //store_clone.process_pending_update(handle.clone()).unwrap(); + //store_clone.process_pending_update(handle).unwrap(); + //}) + //.await + //.unwrap(); - let txn = store.env.read_txn().unwrap(); + //let txn = store.env.read_txn().unwrap(); - assert!(store.pending_queue.first(&txn).unwrap().is_none()); - let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap(); + //assert!(store.pending_queue.first(&txn).unwrap().is_none()); + //let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap(); - assert!(matches!(update, UpdateStatus::Processed(_))); - let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap(); + //assert!(matches!(update, UpdateStatus::Processed(_))); + //let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap(); - assert!(matches!(update, UpdateStatus::Failed(_))); - } -} + //assert!(matches!(update, UpdateStatus::Failed(_))); + //} +//} From 56766cffc397d5faa1594ef5701bc81631f92f64 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 18:58:56 +0200 Subject: [PATCH 20/37] remove module level doc --- meilisearch-http/src/lib.rs | 39 ------------------------------------- 1 file changed, 39 deletions(-) diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 481d38e1c..bea39ffdd 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -1,42 +1,3 @@ -//! # MeiliSearch -//! Hello there, future contributors. If you are here and see this code, it's probably because you want to add a super new fancy feature in MeiliSearch or fix a bug and first of all, thank you for that! -//! -//! To help you in this task, we'll try to do a little overview of the project. -//! ## Milli -//! [Milli](https://github.com/meilisearch/milli) is the core library of MeiliSearch. It's where we actually index documents and perform searches. Its purpose is to do these two tasks as fast as possible. You can give an update to milli, and it'll uses as many cores as provided to perform it as fast as possible. Nothing more. You can perform searches at the same time (search only uses one core). -//! As you can see, we're missing quite a lot of features here; milli does not handle multiples indexes, it can't queue updates, it doesn't provide any web / API frontend, it doesn't implement dumps or snapshots, etc... -//! -//! ## `Index` module -//! The [index] module is what encapsulates one milli index. It abstracts over its transaction and isolates a task that can be run into a thread. This is the unit of interaction with milli. -//! If you add a feature to milli, you'll probably need to add it in this module too before exposing it to the rest of meilisearch. -//! -//! ## `IndexController` module -//! To handle multiple indexes, we created an [index_controller]. It's in charge of creating new indexes, keeping references to all its indexes, forward asynchronous updates to its indexes, and provide an API to search in its indexes synchronously. -//! To achieves this goal, we use an [actor model](https://en.wikipedia.org/wiki/Actor_model). -//! -//! ### The actor model -//! Every actor is composed of at least three files: -//! - `mod.rs` declare and import all the files used by the actor. We also describe the interface (= all the methods) used to interact with the actor. If you are not modifying anything inside of an actor, this is usually all you need to see. -//! - `handle_impl.rs` implements the interface described in the `mod.rs`; in reality, there is no code logic in this file. Every method is only wrapping its parameters in a structure that is sent to the actor. This is useful for test and futureproofing. -//! - `message.rs` contains an enum that describes all the interactions you can have with the actor. -//! - `actor.rs` is used to create and execute the actor. It's where we'll write the loop looking for new messages and actually perform the tasks. -//! -//! MeiliSearch currently uses four actors: -//! - [`uuid_resolver`](index_controller/uuid_resolver/index.html) hold the association between the user-provided indexes name and the internal [`uuid`](https://en.wikipedia.org/wiki/Universally_unique_identifier) representation we use. -//! - [`index_actor`](index_controller::index_actor) is our representation of multiples indexes. Any request made to MeiliSearch that needs to talk to milli will pass through this actor. -//! - [`update_actor`](index_controller/update_actor/index.html) is in charge of indexes updates. Since updates can take a long time to receive and process, we need to: -//! 1. Store them as fast as possible so we can continue to receive other updates even if nothing has been processed -//! 2. Feed the `index_actor` with a new update every time it finished its current job. -//! - [`dump_actor`](index_controller/dump_actor/index.html) this actor handle the [dumps](https://docs.meilisearch.com/reference/api/dump.html). It needs to contact all the others actors and create a dump of everything that was currently happening. -//! -//! ## Data module -//! The [data] module provide a unified interface to communicate with the index controller and other services (snapshot, dumps, ...), initialize the MeiliSearch instance -//! -//! ## HTTP server -//! To handle the web and API part, we are using [actix-web](https://docs.rs/actix-web/); you can find all routes in the [routes] module. -//! Currently, the configuration of actix-web is made in the [lib.rs](crate). -//! Most of the routes use [extractors] to handle the authentication. - #![allow(rustdoc::private_intra_doc_links)] #[macro_use] pub mod error; From 3747f5bdd8937e771403c456547038fcf48735b5 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 19:29:14 +0200 Subject: [PATCH 21/37] replace unwraps with correct error --- .../src/index_controller/update_file_store.rs | 85 ++++++++++++------- .../src/index_controller/updates/error.rs | 5 +- .../src/index_controller/updates/mod.rs | 2 +- 3 files changed, 61 insertions(+), 31 deletions(-) diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index d7b3e2560..df4014e05 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -1,27 +1,51 @@ -use std::fs::{File, create_dir_all}; -use std::io::{BufReader, BufWriter, Write}; -use std::path::{Path, PathBuf}; +use std::fs::{create_dir_all, File}; +use std::io::{self, BufReader, BufWriter, Write}; use std::ops::{Deref, DerefMut}; +use std::path::{Path, PathBuf}; use milli::documents::DocumentBatchReader; use serde_json::Map; -use tempfile::NamedTempFile; +use tempfile::{NamedTempFile, PersistError}; use uuid::Uuid; const UPDATE_FILES_PATH: &str = "updates/updates_files"; use crate::document_formats::read_jsonl; -use super::error::Result; - pub struct UpdateFile { path: PathBuf, file: NamedTempFile, } +#[derive(Debug, thiserror::Error)] +#[error("Error while persisting update to disk: {0}")] +pub struct UpdateFileStoreError(Box); + +type Result = std::result::Result; + +macro_rules! into_update_store_error { + ($($other:path),*) => { + $( + impl From<$other> for UpdateFileStoreError { + fn from(other: $other) -> Self { + Self(Box::new(other)) + } + } + )* + }; +} + +into_update_store_error!( + PersistError, + io::Error, + serde_json::Error, + milli::documents::Error +); + impl UpdateFile { - pub fn persist(self) { - self.file.persist(&self.path).unwrap(); + pub fn persist(self) -> Result<()> { + self.file.persist(&self.path)?; + Ok(()) } } @@ -49,15 +73,17 @@ impl UpdateFileStore { let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH); let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH); - create_dir_all(&dst_update_files_path).unwrap(); + create_dir_all(&dst_update_files_path)?; - let entries = std::fs::read_dir(src_update_files_path).unwrap(); + let entries = std::fs::read_dir(src_update_files_path)?; for entry in entries { - let entry = entry.unwrap(); - let update_file = BufReader::new(File::open(entry.path()).unwrap()); + let entry = entry?; + let update_file = BufReader::new(File::open(entry.path())?); let file_uuid = entry.file_name(); - let file_uuid = file_uuid.to_str().ok_or_else(|| anyhow::anyhow!("invalid update file name"))?; + let file_uuid = file_uuid + .to_str() + .ok_or_else(|| anyhow::anyhow!("invalid update file name"))?; let dst_path = dst_update_files_path.join(file_uuid); let dst_file = BufWriter::new(File::create(dst_path)?); read_jsonl(update_file, dst_file)?; @@ -68,7 +94,7 @@ impl UpdateFileStore { pub fn new(path: impl AsRef) -> Result { let path = path.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&path).unwrap(); + std::fs::create_dir_all(&path)?; Ok(Self { path }) } @@ -76,7 +102,7 @@ impl UpdateFileStore { /// /// A call to persist is needed to persist in the database. pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { - let file = NamedTempFile::new().unwrap(); + let file = NamedTempFile::new()?; let uuid = Uuid::new_v4(); let path = self.path.join(uuid.to_string()); let update_file = UpdateFile { file, path }; @@ -87,7 +113,7 @@ impl UpdateFileStore { /// Returns a the file corresponding to the requested uuid. pub fn get_update(&self, uuid: Uuid) -> Result { let path = self.path.join(uuid.to_string()); - let file = File::open(path).unwrap(); + let file = File::open(path)?; Ok(file) } @@ -95,9 +121,9 @@ impl UpdateFileStore { pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { let src = self.path.join(uuid.to_string()); let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&dst).unwrap(); + std::fs::create_dir_all(&dst)?; dst.push(uuid.to_string()); - std::fs::copy(src, dst).unwrap(); + std::fs::copy(src, dst)?; Ok(()) } @@ -106,29 +132,30 @@ impl UpdateFileStore { let uuid_string = uuid.to_string(); let update_file_path = self.path.join(&uuid_string); let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&dst).unwrap(); + std::fs::create_dir_all(&dst)?; dst.push(&uuid_string); - let update_file = File::open(update_file_path).unwrap(); - let mut dst_file = NamedTempFile::new().unwrap(); - let mut document_reader = DocumentBatchReader::from_reader(update_file).unwrap(); + let update_file = File::open(update_file_path)?; + let mut dst_file = NamedTempFile::new()?; + let mut document_reader = DocumentBatchReader::from_reader(update_file)?; let mut document_buffer = Map::new(); // TODO: we need to find a way to do this more efficiently. (create a custom serializer to // jsonl for example...) - while let Some((index, document)) = document_reader.next_document_with_index().unwrap() { + while let Some((index, document)) = document_reader.next_document_with_index()? { for (field_id, content) in document.iter() { - let field_name = index.get_by_left(&field_id).unwrap(); - let content = serde_json::from_slice(content).unwrap(); - document_buffer.insert(field_name.to_string(), content); + if let Some(field_name) = index.get_by_left(&field_id) { + let content = serde_json::from_slice(content)?; + document_buffer.insert(field_name.to_string(), content); + } } - serde_json::to_writer(&mut dst_file, &document_buffer).unwrap(); - dst_file.write(b"\n").unwrap(); + serde_json::to_writer(&mut dst_file, &document_buffer)?; + dst_file.write(b"\n")?; document_buffer.clear(); } - dst_file.persist(dst).unwrap(); + dst_file.persist(dst)?; Ok(()) } diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs index 58635b3df..0e667fe65 100644 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ b/meilisearch-lib/src/index_controller/updates/error.rs @@ -3,6 +3,8 @@ use std::error::Error; use meilisearch_error::{Code, ErrorCode}; +use crate::index_controller::update_file_store::UpdateFileStoreError; + pub type Result = std::result::Result; #[derive(Debug, thiserror::Error)] @@ -42,7 +44,8 @@ internal_error!( UpdateLoopError: heed::Error, std::io::Error, serde_json::Error, - tokio::task::JoinError + tokio::task::JoinError, + UpdateFileStoreError ); impl ErrorCode for UpdateLoopError { diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index dc707c24b..e9b8cdd84 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -231,7 +231,7 @@ impl UpdateLoop { builder.add_documents(documents).unwrap(); builder.finish().unwrap(); - file.persist(); + file.persist()?; Ok(uuid) }) From df4e9f4e1e1bc06cc2293552bff7e235d690808f Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 19:49:25 +0200 Subject: [PATCH 22/37] restore dump v1 --- meilisearch-lib/src/index/mod.rs | 2 +- .../index_controller/dump_actor/loaders/v1.rs | 110 ++++++++++-------- 2 files changed, 64 insertions(+), 48 deletions(-) diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index c4fa812b1..0e4375517 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -14,7 +14,7 @@ use serde::{Serialize, Deserialize}; use error::Result; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; -pub use updates::{Checked, Facets, Settings, Unchecked}; +pub use updates::{Checked, Facets, Settings, Unchecked, apply_settings_to_builder}; use uuid::Uuid; use crate::EnvSizer; diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 1add2709b..f474935f0 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -1,12 +1,19 @@ use std::collections::{BTreeMap, BTreeSet}; +use std::fs::{File, create_dir_all}; +use std::io::{BufReader, Seek, SeekFrom}; use std::marker::PhantomData; use std::path::Path; +use heed::EnvOpenOptions; use log::{error, info, warn}; +use milli::documents::DocumentBatchReader; use milli::update::Setting; use serde::{Deserialize, Deserializer, Serialize}; use uuid::Uuid; +use crate::document_formats::read_jsonl; +use crate::index::apply_settings_to_builder; +use crate::index::update_handler::UpdateHandler; use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; use crate::index_controller::{self, IndexMetadata}; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; @@ -83,57 +90,66 @@ struct Settings { } fn load_index( - _src: impl AsRef, - _dst: impl AsRef, - _uuid: Uuid, - _primary_key: Option<&str>, - _size: usize, - _indexer_options: &IndexerOpts, + src: impl AsRef, + dst: impl AsRef, + uuid: Uuid, + primary_key: Option<&str>, + size: usize, + indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { - todo!("fix dump obkv documents") - //let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); + let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); - //create_dir_all(&index_path)?; - //let mut options = EnvOpenOptions::new(); - //options.map_size(size); - //let index = milli::Index::new(options, index_path)?; - //let index = Index(Arc::new(index)); + create_dir_all(&index_path)?; + let mut options = EnvOpenOptions::new(); + options.map_size(size); + let index = milli::Index::new(options, index_path)?; - //// extract `settings.json` file and import content - //let settings = import_settings(&src)?; - //let settings: index_controller::Settings = settings.into(); + let update_handler = UpdateHandler::new(indexer_options)?; - //let mut txn = index.write_txn()?; + let mut txn = index.write_txn()?; + // extract `settings.json` file and import content + let settings = import_settings(&src)?; + let settings: index_controller::Settings = settings.into(); - //let handler = UpdateHandler::new(indexer_options)?; + let handler = UpdateHandler::new(indexer_options)?; - //index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; + let mut builder = handler.update_builder(0).settings(&mut txn, &index); - //let file = File::open(&src.as_ref().join("documents.jsonl"))?; - //let mut reader = std::io::BufReader::new(file); - //reader.fill_buf()?; - //if !reader.buffer().is_empty() { - //index.update_documents_txn( - //&mut txn, - //IndexDocumentsMethod::ReplaceDocuments, - //Some(reader), - //handler.update_builder(0), - //primary_key, - //)?; - //} + if let Some(primary_key) = primary_key { + builder.set_primary_key(primary_key.to_string()); + } - //txn.commit()?; + apply_settings_to_builder(&settings.check(), &mut builder); - //// Finaly, we extract the original milli::Index and close it - //Arc::try_unwrap(index.0) - //.map_err(|_e| "Couldn't close the index properly") - //.unwrap() - //.prepare_for_closing() - //.wait(); + builder.execute(|_, _| ())?; - //// Updates are ignored in dumps V1. + let reader = BufReader::new(File::open(&src.as_ref().join("documents.jsonl"))?); - //Ok(()) + let mut tmp_doc_file = tempfile::tempfile()?; + + read_jsonl(reader, &mut tmp_doc_file)?; + + tmp_doc_file.seek(SeekFrom::Start(0))?; + + let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?; + + //If the document file is empty, we don't perform the document addition, to prevent + //a primary key error to be thrown. + if !documents_reader.is_empty() { + let builder = update_handler.update_builder(0).index_documents(&mut txn, &index); + builder.execute(documents_reader, |_, _| ())?; + } + + txn.commit()?; + + // Finaly, we extract the original milli::Index and close it + index + .prepare_for_closing() + .wait(); + + // Updates are ignored in dumps V1. + + Ok(()) } /// we need to **always** be able to convert the old settings to the settings currently being used @@ -201,14 +217,14 @@ impl From for index_controller::Settings { } // /// Extract Settings from `settings.json` file present at provided `dir_path` -//fn import_settings(dir_path: impl AsRef) -> anyhow::Result { - //let path = dir_path.as_ref().join("settings.json"); - //let file = File::open(path)?; - //let reader = std::io::BufReader::new(file); - //let metadata = serde_json::from_reader(reader)?; +fn import_settings(dir_path: impl AsRef) -> anyhow::Result { + let path = dir_path.as_ref().join("settings.json"); + let file = File::open(path)?; + let reader = std::io::BufReader::new(file); + let metadata = serde_json::from_reader(reader)?; - //Ok(metadata) -//} + Ok(metadata) +} #[cfg(test)] mod test { From 6f8e670dee45d352c5080a9b5141e41250931e31 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 20:13:26 +0200 Subject: [PATCH 23/37] move json reader to document_formats module --- meilisearch-lib/src/document_formats.rs | 13 +++++++ .../src/index_controller/updates/error.rs | 5 ++- .../src/index_controller/updates/mod.rs | 37 +++++++------------ 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index 8540ce4b2..a535ec686 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -8,12 +8,14 @@ type Result = std::result::Result; #[derive(Debug)] pub enum PayloadType { Jsonl, + Json, } impl fmt::Display for PayloadType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { PayloadType::Jsonl => write!(f, "ndjson"), + PayloadType::Json => write!(f, "json"), } } } @@ -50,3 +52,14 @@ pub fn read_jsonl(input: impl Read, writer: impl Write + Seek) -> Result<()> { Ok(()) } + +/// read json from input and write an obkv batch to writer. +pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> { + let mut builder = DocumentBatchBuilder::new(writer).unwrap(); + + let documents: Vec> = malformed!(PayloadType::Json, serde_json::from_reader(input))?; + builder.add_documents(documents).unwrap(); + builder.finish().unwrap(); + + Ok(()) +} diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs index 0e667fe65..d6c3bcba4 100644 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ b/meilisearch-lib/src/index_controller/updates/error.rs @@ -3,7 +3,7 @@ use std::error::Error; use meilisearch_error::{Code, ErrorCode}; -use crate::index_controller::update_file_store::UpdateFileStoreError; +use crate::{document_formats::DocumentFormatError, index_controller::update_file_store::UpdateFileStoreError}; pub type Result = std::result::Result; @@ -21,7 +21,8 @@ pub enum UpdateLoopError { )] FatalUpdateStoreError, #[error("{0}")] - InvalidPayload(Box), + InvalidPayload(#[from] DocumentFormatError), + // TODO: The reference to actix has to go. #[error("{0}")] PayloadError(#[from] actix_web::error::PayloadError), } diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index e9b8cdd84..5296c03e5 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -13,22 +13,21 @@ use async_stream::stream; use bytes::Bytes; use futures::{Stream, StreamExt}; use log::trace; -use milli::documents::DocumentBatchBuilder; use milli::update::IndexDocumentsMethod; use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value}; use tokio::sync::mpsc; use uuid::Uuid; use self::error::{Result, UpdateLoopError}; pub use self::message::UpdateMsg; use self::store::{UpdateStore, UpdateStoreInfo}; +use crate::document_formats::read_json; use crate::index::{Index, Settings, Unchecked}; use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; use super::index_resolver::HardStateIndexResolver; -use super::{DocumentAdditionFormat, Payload, Update}; +use super::{DocumentAdditionFormat, Update}; pub type UpdateSender = mpsc::Sender; @@ -197,9 +196,18 @@ impl UpdateLoop { method, format, } => { - let content_uuid = match format { - DocumentAdditionFormat::Json => self.documents_from_json(payload).await?, - }; + let reader = StreamReader::new(payload); + let (content_uuid, mut update_file) = self.update_file_store.new_update()?; + tokio::task::spawn_blocking(move || -> Result<_> { + match format { + DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?, + } + + update_file.persist()?; + + Ok(()) + }).await??; + RegisterUpdate::DocumentAddition { primary_key, @@ -220,23 +228,6 @@ impl UpdateLoop { Ok(status.into()) } - async fn documents_from_json(&self, payload: Payload) -> Result { - let file_store = self.update_file_store.clone(); - tokio::task::spawn_blocking(move || { - let (uuid, mut file) = file_store.new_update().unwrap(); - let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap(); - - let documents: Vec> = - serde_json::from_reader(StreamReader::new(payload))?; - builder.add_documents(documents).unwrap(); - builder.finish().unwrap(); - - file.persist()?; - - Ok(uuid) - }) - .await? - } async fn handle_list_updates(&self, uuid: Uuid) -> Result> { let update_store = self.store.clone(); From 7a27cbcc78264dc2d328238d61f3346ff64c5031 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 20:20:13 +0200 Subject: [PATCH 24/37] rename RegisterUpdate to store::Update --- meilisearch-http/src/routes/mod.rs | 10 +++++----- meilisearch-lib/src/index/updates.rs | 10 +++++----- .../index_controller/index_resolver/mod.rs | 2 +- .../src/index_controller/updates/mod.rs | 20 ++++--------------- .../src/index_controller/updates/status.rs | 18 ++++++++--------- .../index_controller/updates/store/dump.rs | 4 ++-- .../src/index_controller/updates/store/mod.rs | 17 +++++++++++++--- meilisearch-lib/src/lib.rs | 2 +- 8 files changed, 41 insertions(+), 42 deletions(-) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 3066b3492..aa9847e23 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -6,7 +6,7 @@ use log::debug; use meilisearch_lib::index_controller::updates::status::{UpdateResult, UpdateStatus}; use serde::{Deserialize, Serialize}; -use meilisearch_lib::{MeiliSearch, RegisterUpdate}; +use meilisearch_lib::{MeiliSearch, Update}; use meilisearch_lib::index::{Settings, Unchecked}; use crate::error::ResponseError; @@ -52,7 +52,7 @@ impl From<&UpdateStatus> for UpdateType { fn from(other: &UpdateStatus) -> Self { use milli::update::IndexDocumentsMethod::*; match other.meta() { - RegisterUpdate::DocumentAddition{ method, .. } => { + Update::DocumentAddition{ method, .. } => { let number = match other { UpdateStatus::Processed(processed) => match processed.success { UpdateResult::DocumentsAddition(ref addition) => { @@ -69,11 +69,11 @@ impl From<&UpdateStatus> for UpdateType { _ => unreachable!(), } } - RegisterUpdate::Settings(settings) => UpdateType::Settings { + Update::Settings(settings) => UpdateType::Settings { settings: settings.clone(), }, - RegisterUpdate::ClearDocuments => UpdateType::ClearAll, - RegisterUpdate::DeleteDocuments(ids) => UpdateType::DocumentsDeletion { + Update::ClearDocuments => UpdateType::ClearAll, + Update::DeleteDocuments(ids) => UpdateType::DocumentsDeletion { number: Some(ids.len()), }, } diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 44558fdae..b5035443a 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -8,7 +8,7 @@ use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use serde::{Deserialize, Serialize, Serializer}; use uuid::Uuid; -use crate::RegisterUpdate; +use crate::Update; use crate::index_controller::updates::status::{Failed, Processed, Processing, UpdateResult}; use super::{Index, IndexMeta}; @@ -170,19 +170,19 @@ impl Index { let result = (|| { let mut txn = self.write_txn()?; let result = match update.meta() { - RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => { + Update::DocumentAddition { primary_key, content_uuid, method } => { self.update_documents(&mut txn, *method, *content_uuid, update_builder, primary_key.as_deref()) } - RegisterUpdate::Settings(settings) => { + Update::Settings(settings) => { let settings = settings.clone().check(); self.update_settings(&mut txn, &settings, update_builder) }, - RegisterUpdate::ClearDocuments => { + Update::ClearDocuments => { let builder = update_builder.clear_documents(&mut txn, self); let _count = builder.execute()?; Ok(UpdateResult::Other) }, - RegisterUpdate::DeleteDocuments(ids) => { + Update::DeleteDocuments(ids) => { let mut builder = update_builder.delete_documents(&mut txn, self)?; // We ignore unexisting document ids diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index d41c37ac6..f04b3f42b 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -36,7 +36,7 @@ impl IndexResolver { let indexes_path = src.as_ref().join("indexes"); let indexes = indexes_path.read_dir()?; - let update_handler = UpdateHandler::new(indexer_opts).unwrap(); + let update_handler = UpdateHandler::new(indexer_opts)?; for index in indexes { let index = index?; Index::load_dump(&index.path(), &dst, index_db_size, &update_handler)?; diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 5296c03e5..3823f6b70 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -45,18 +45,6 @@ pub fn create_update_handler( Ok(sender) } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum RegisterUpdate { - DeleteDocuments(Vec), - DocumentAddition { - primary_key: Option, - method: IndexDocumentsMethod, - content_uuid: Uuid, - }, - Settings(Settings), - ClearDocuments, -} - /// A wrapper type to implement read on a `Stream>`. struct StreamReader { stream: S, @@ -209,15 +197,15 @@ impl UpdateLoop { }).await??; - RegisterUpdate::DocumentAddition { + store::Update::DocumentAddition { primary_key, method, content_uuid, } } - Update::Settings(settings) => RegisterUpdate::Settings(settings), - Update::ClearDocuments => RegisterUpdate::ClearDocuments, - Update::DeleteDocuments(ids) => RegisterUpdate::DeleteDocuments(ids), + Update::Settings(settings) => store::Update::Settings(settings), + Update::ClearDocuments => store::Update::ClearDocuments, + Update::DeleteDocuments(ids) => store::Update::DeleteDocuments(ids), }; let store = self.store.clone(); diff --git a/meilisearch-lib/src/index_controller/updates/status.rs b/meilisearch-lib/src/index_controller/updates/status.rs index 7716473ab..3108fe638 100644 --- a/meilisearch-lib/src/index_controller/updates/status.rs +++ b/meilisearch-lib/src/index_controller/updates/status.rs @@ -6,7 +6,7 @@ use meilisearch_error::{Code, ErrorCode}; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; -use crate::{RegisterUpdate, index::{Settings, Unchecked}}; +use crate::{Update, index::{Settings, Unchecked}}; #[derive(Debug, Clone, Serialize, Deserialize)] pub enum UpdateResult { @@ -34,12 +34,12 @@ pub enum UpdateMeta { #[serde(rename_all = "camelCase")] pub struct Enqueued { pub update_id: u64, - pub meta: RegisterUpdate, + pub meta: Update, pub enqueued_at: DateTime, } impl Enqueued { - pub fn new(meta: RegisterUpdate, update_id: u64) -> Self { + pub fn new(meta: Update, update_id: u64) -> Self { Self { enqueued_at: Utc::now(), meta, @@ -61,7 +61,7 @@ impl Enqueued { } } - pub fn meta(&self) -> &RegisterUpdate { + pub fn meta(&self) -> &Update { &self.meta } @@ -84,7 +84,7 @@ impl Processed { self.from.id() } - pub fn meta(&self) -> &RegisterUpdate { + pub fn meta(&self) -> &Update { self.from.meta() } } @@ -102,7 +102,7 @@ impl Processing { self.from.id() } - pub fn meta(&self) -> &RegisterUpdate { + pub fn meta(&self) -> &Update { self.from.meta() } @@ -139,7 +139,7 @@ impl Aborted { self.from.id() } - pub fn meta(&self) -> &RegisterUpdate { + pub fn meta(&self) -> &Update { self.from.meta() } } @@ -173,7 +173,7 @@ impl Failed { self.from.id() } - pub fn meta(&self) -> &RegisterUpdate { + pub fn meta(&self) -> &Update { self.from.meta() } } @@ -199,7 +199,7 @@ impl UpdateStatus { } } - pub fn meta(&self) -> &RegisterUpdate { + pub fn meta(&self) -> &Update { match self { UpdateStatus::Processing(u) => u.meta(), UpdateStatus::Enqueued(u) => u.meta(), diff --git a/meilisearch-lib/src/index_controller/updates/store/dump.rs b/meilisearch-lib/src/index_controller/updates/store/dump.rs index 68380a9d4..3356a54b9 100644 --- a/meilisearch-lib/src/index_controller/updates/store/dump.rs +++ b/meilisearch-lib/src/index_controller/updates/store/dump.rs @@ -11,7 +11,7 @@ use tempfile::{NamedTempFile, TempDir}; use uuid::Uuid; use super::{Result, State, UpdateStore}; -use crate::{RegisterUpdate, index::Index, index_controller::{update_file_store::UpdateFileStore, updates::status::{Enqueued, UpdateStatus}}}; +use crate::{Update, index::Index, index_controller::{update_file_store::UpdateFileStore, updates::status::{Enqueued, UpdateStatus}}}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -74,7 +74,7 @@ impl UpdateStore { let update = data.decode()?; if let Enqueued { - meta: RegisterUpdate::DocumentAddition { + meta: Update::DocumentAddition { content_uuid, .. }, .. } = update { diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index 21bfad61e..06577985c 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -26,7 +26,6 @@ use rayon::prelude::*; use codec::*; -use super::RegisterUpdate; use super::error::Result; use super::status::{Enqueued, Processing}; use crate::EnvSizer; @@ -37,6 +36,18 @@ use crate::index::Index; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Update { + DeleteDocuments(Vec), + DocumentAddition { + primary_key: Option, + method: IndexDocumentsMethod, + content_uuid: Uuid, + }, + Settings(Settings), + ClearDocuments, +} + #[derive(Debug)] pub struct UpdateStoreInfo { /// Size of the update store in bytes. @@ -242,7 +253,7 @@ impl UpdateStore { pub fn register_update( &self, index_uuid: Uuid, - update: RegisterUpdate, + update: Update, ) -> heed::Result { let mut txn = self.env.write_txn()?; let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?; @@ -512,7 +523,7 @@ impl UpdateStore { let ((_, uuid, _), pending) = entry?; if uuids.contains(&uuid) { if let Enqueued { - meta: RegisterUpdate::DocumentAddition { + meta: Update::DocumentAddition { content_uuid, .. }, .. diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 93fd2f094..776a17f0d 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -5,7 +5,7 @@ pub mod options; pub mod index; pub mod index_controller; -pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate}; +pub use index_controller::{IndexController as MeiliSearch, updates::store::Update}; mod compression; mod document_formats; From 1cc733f8014db3fd92a69163450cdc5d43ba12fd Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 22:02:04 +0200 Subject: [PATCH 25/37] fix get_info --- .../src/index_controller/update_file_store.rs | 4 +++ .../src/index_controller/updates/store/mod.rs | 25 +++++++++++-------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index df4014e05..fed5fe200 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -159,4 +159,8 @@ impl UpdateFileStore { Ok(()) } + + pub fn get_size(&self, uuid: Uuid) -> Result { + Ok(self.get_update(uuid)?.metadata()?.len()) + } } diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index 06577985c..46786f1ac 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -535,24 +535,27 @@ impl UpdateStore { } let path = path.as_ref().to_owned(); - indexes.par_iter().try_for_each(|index| index.snapshot(&path)).unwrap(); + indexes.par_iter().try_for_each(|index| index.snapshot(path.clone())).unwrap(); Ok(()) } pub fn get_info(&self) -> Result { - let size = self.env.size(); + let mut size = self.env.size(); let txn = self.env.read_txn()?; for entry in self.pending_queue.iter(&txn)? { - let (_, _pending) = entry?; - //if let Enqueued { - //content: Some(uuid), - //.. - //} = pending - //{ - //let path = update_uuid_to_file_path(&self.path, uuid); - //size += File::open(path)?.metadata()?.len(); - //} + let (_, pending) = entry?; + if let Enqueued { + meta: store::Update::DocumentAddition { + content_uuid, + .. + }, + .. + } = pending + { + let len = self.update_file_store.get_size(content_uuid)?; + size += len; + } } let processing = match *self.state.read() { State::Processing(uuid, _) => Some(uuid), From 3503fbf7feeb7f71fdd6f4abd131ffefea78ec79 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 22:08:03 +0200 Subject: [PATCH 26/37] re-export milli from meilisearch_lib --- Cargo.lock | 1 - meilisearch-http/Cargo.toml | 1 - meilisearch-http/src/error.rs | 4 +++- meilisearch-http/src/routes/indexes/documents.rs | 2 +- meilisearch-http/src/routes/indexes/settings.rs | 2 +- meilisearch-http/src/routes/mod.rs | 2 +- meilisearch-lib/src/lib.rs | 2 ++ 7 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e763ecaa5..ec63e5020 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1678,7 +1678,6 @@ dependencies = [ "meilisearch-lib", "meilisearch-tokenizer", "memmap", - "milli", "mime", "mockall", "num_cpus", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 085a40b09..7875e5ae4 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -49,7 +49,6 @@ meilisearch-lib = { path = "../meilisearch-lib" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { path = "../../milli/milli" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 00229960e..fb44b9a49 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -7,7 +7,7 @@ use actix_web::http::StatusCode; use actix_web::HttpResponseBuilder; use aweb::error::{JsonPayloadError, QueryPayloadError}; use meilisearch_error::{Code, ErrorCode}; -use milli::UserError; +use meilisearch_lib::milli; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -68,6 +68,8 @@ impl fmt::Display for MilliError<'_> { impl ErrorCode for MilliError<'_> { fn error_code(&self) -> Code { + use milli::UserError; + match self.0 { milli::Error::InternalError(_) => Code::Internal, milli::Error::IoError(_) => Code::Internal, diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index f7aa4f485..cf939bccd 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -5,7 +5,7 @@ use futures::{Stream, StreamExt}; use log::debug; use meilisearch_lib::MeiliSearch; use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update}; -use milli::update::IndexDocumentsMethod; +use meilisearch_lib::milli::update::IndexDocumentsMethod; use serde::Deserialize; use serde_json::Value; use tokio::sync::mpsc; diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 4a1e26426..24fd469d3 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -15,7 +15,7 @@ macro_rules! make_setting_route { use log::debug; use actix_web::{web, HttpResponse, Resource}; - use milli::update::Setting; + use meilisearch_lib::milli::update::Setting; use meilisearch_lib::{MeiliSearch, index::Settings, index_controller::Update}; use crate::error::ResponseError; diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index aa9847e23..3a5f84f18 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -50,7 +50,7 @@ pub enum UpdateType { impl From<&UpdateStatus> for UpdateType { fn from(other: &UpdateStatus) -> Self { - use milli::update::IndexDocumentsMethod::*; + use meilisearch_lib::milli::update::IndexDocumentsMethod::*; match other.meta() { Update::DocumentAddition{ method, .. } => { let number = match other { diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 776a17f0d..3b7b5e7fa 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -7,6 +7,8 @@ pub mod index_controller; pub use index_controller::{IndexController as MeiliSearch, updates::store::Update}; +pub use milli; + mod compression; mod document_formats; From 5fa9bc67d7801c98a419bf6ba00244ab8eb0bbc6 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 22:16:18 +0200 Subject: [PATCH 27/37] remove unused dependencies --- Cargo.lock | 214 +----------------- meilisearch-http/Cargo.toml | 5 - meilisearch-lib/Cargo.toml | 12 - .../src/index_controller/dump_actor/mod.rs | 3 - 4 files changed, 8 insertions(+), 226 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec63e5020..116ffd8cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,7 +65,7 @@ dependencies = [ "percent-encoding", "pin-project", "pin-project-lite", - "rand 0.8.4", + "rand", "regex", "serde", "sha-1 0.9.8", @@ -312,15 +312,6 @@ dependencies = [ "stable_deref_trait", ] -[[package]] -name = "assert-json-diff" -version = "1.0.1" -source = "git+https://github.com/qdequele/assert-json-diff?branch=master#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "async-stream" version = "0.3.2" @@ -811,12 +802,6 @@ version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2c9736e15e7df1638a7f6eee92a6511615c738246a052af5ba86f039b65aede" -[[package]] -name = "difference" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" - [[package]] name = "digest" version = "0.8.1" @@ -841,12 +826,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0" -[[package]] -name = "downcast" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb454f0228b18c7f4c3b0ebbee346ed9c52e7443b0999cd543ff3571205701d" - [[package]] name = "either" version = "1.6.1" @@ -895,15 +874,6 @@ dependencies = [ "termcolor", ] -[[package]] -name = "erased-serde" -version = "0.3.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa" -dependencies = [ - "serde", -] - [[package]] name = "fake-simd" version = "0.1.2" @@ -940,15 +910,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "float-cmp" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4" -dependencies = [ - "num-traits", -] - [[package]] name = "fnv" version = "1.0.7" @@ -965,12 +926,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fragile" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69a039c3498dc930fe810151a34ba0c1c70b02b8625035592e74432f678591f2" - [[package]] name = "fs_extra" version = "1.2.0" @@ -983,12 +938,6 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" -[[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" - [[package]] name = "futures" version = "0.3.17" @@ -1616,12 +1565,6 @@ dependencies = [ "syn 0.15.44", ] -[[package]] -name = "main_error" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "155db5e86c6e45ee456bf32fad5a290ee1f7151c2faca27ea27097568da67d1a" - [[package]] name = "maplit" version = "1.0.2" @@ -1652,7 +1595,6 @@ dependencies = [ "actix-web-static-files", "anyhow", "arc-swap", - "assert-json-diff", "async-stream", "async-trait", "byte-unit", @@ -1673,20 +1615,18 @@ dependencies = [ "itertools", "jemallocator", "log", - "main_error", "meilisearch-error", "meilisearch-lib", "meilisearch-tokenizer", "memmap", "mime", - "mockall", "num_cpus", "obkv", "once_cell", "parking_lot", "paste", "pin-project", - "rand 0.8.4", + "rand", "rayon", "regex", "reqwest", @@ -1694,7 +1634,6 @@ dependencies = [ "serde", "serde_json", "serde_url_params", - "serdeval", "sha-1 0.9.8", "sha2", "siphasher", @@ -1702,7 +1641,6 @@ dependencies = [ "structopt", "sysinfo", "tar", - "tempdir", "tempfile", "thiserror", "tokio", @@ -1719,13 +1657,11 @@ dependencies = [ name = "meilisearch-lib" version = "0.1.0" dependencies = [ - "actix-cors", "actix-rt", "actix-web", "actix-web-static-files", "anyhow", "arc-swap", - "assert-json-diff", "async-stream", "async-trait", "byte-unit", @@ -1734,8 +1670,6 @@ dependencies = [ "crossbeam-channel", "derivative", "either", - "env_logger", - "erased-serde", "flate2", "fst", "futures", @@ -1746,40 +1680,32 @@ dependencies = [ "itertools", "lazy_static", "log", - "main_error", "meilisearch-error", "meilisearch-tokenizer", "memmap", "milli", "mime", - "mockall", "num_cpus", "obkv", "once_cell", "parking_lot", "paste", "pin-project", - "rand 0.8.4", + "rand", "rayon", "regex", "reqwest", "rustls", "serde", "serde_json", - "serde_url_params", - "serdeval", - "sha2", "siphasher", "slice-group-by", "structopt", "sysinfo", "tar", - "tempdir", "tempfile", "thiserror", "tokio", - "tokio-stream", - "urlencoding", "uuid", "walkdir", "whoami", @@ -1917,39 +1843,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "mockall" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ab571328afa78ae322493cacca3efac6a0f2e0a67305b4df31fd439ef129ac0" -dependencies = [ - "cfg-if 1.0.0", - "downcast", - "fragile", - "lazy_static", - "mockall_derive", - "predicates", - "predicates-tree", -] - -[[package]] -name = "mockall_derive" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7e25b214433f669161f414959594216d8e6ba83b6679d3db96899c0b4639033" -dependencies = [ - "cfg-if 1.0.0", - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", -] - -[[package]] -name = "normalize-line-endings" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" - [[package]] name = "ntapi" version = "0.3.6" @@ -2175,7 +2068,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082" dependencies = [ "phf_shared", - "rand 0.8.4", + "rand", ] [[package]] @@ -2231,35 +2124,6 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" -[[package]] -name = "predicates" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49cfaf7fdaa3bfacc6fa3e7054e65148878354a5cfddcf661df4c851f8021df" -dependencies = [ - "difference", - "float-cmp", - "normalize-line-endings", - "predicates-core", - "regex", -] - -[[package]] -name = "predicates-core" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57e35a3326b75e49aa85f5dc6ec15b41108cf5aee58eabb1f274dd18b73c2451" - -[[package]] -name = "predicates-tree" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7dd0fd014130206c9352efbdc92be592751b2b9274dff685348341082c6ea3d" -dependencies = [ - "predicates-core", - "treeline", -] - [[package]] name = "proc-macro-error" version = "1.0.4" @@ -2332,19 +2196,6 @@ dependencies = [ "proc-macro2 1.0.29", ] -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi", -] - [[package]] name = "rand" version = "0.8.4" @@ -2353,7 +2204,7 @@ checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" dependencies = [ "libc", "rand_chacha", - "rand_core 0.6.3", + "rand_core", "rand_hc", ] @@ -2364,24 +2215,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.3", + "rand_core", ] -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -dependencies = [ - "rand_core 0.4.2", -] - -[[package]] -name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - [[package]] name = "rand_core" version = "0.6.3" @@ -2397,7 +2233,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" dependencies = [ - "rand_core 0.6.3", + "rand_core", ] [[package]] @@ -2425,15 +2261,6 @@ dependencies = [ "num_cpus", ] -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - [[package]] name = "redox_syscall" version = "0.2.10" @@ -2717,15 +2544,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serdeval" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94023adfd3d548a8bd9a1f09c09f44eaab7080c7a9ab20314bb65154bee62bd0" -dependencies = [ - "serde", -] - [[package]] name = "sha-1" version = "0.8.2" @@ -2992,16 +2810,6 @@ dependencies = [ "xattr", ] -[[package]] -name = "tempdir" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" -dependencies = [ - "rand 0.4.6", - "remove_dir_all", -] - [[package]] name = "tempfile" version = "3.2.0" @@ -3010,7 +2818,7 @@ checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if 1.0.0", "libc", - "rand 0.8.4", + "rand", "redox_syscall", "remove_dir_all", "winapi", @@ -3220,12 +3028,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "treeline" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" - [[package]] name = "try-lock" version = "0.2.3" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 7875e5ae4..f2c6a12e0 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -44,7 +44,6 @@ http = "0.2.4" indexmap = { version = "1.7.0", features = ["serde-1"] } itertools = "0.10.1" log = "0.4.14" -main_error = "0.1.1" meilisearch-lib = { path = "../meilisearch-lib" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } @@ -73,17 +72,13 @@ obkv = "0.2.0" pin-project = "1.0.8" whoami = { version = "1.1.3", optional = true } reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true } -serdeval = "0.1.0" sysinfo = "0.20.2" tokio-stream = "0.1.7" [dev-dependencies] actix-rt = "2.2.0" -assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" } -mockall = "0.10.2" paste = "1.0.5" serde_url_params = "0.2.1" -tempdir = "0.3.7" urlencoding = "2.1.0" [features] diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 446a20fb4..df8b1e45d 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -6,7 +6,6 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" } actix-web = { version = "4.0.0-beta.9", features = ["rustls"] } actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true } anyhow = { version = "1.0.43", features = ["backtrace"] } @@ -18,7 +17,6 @@ bytes = "1.1.0" chrono = { version = "0.4.19", features = ["serde"] } crossbeam-channel = "0.5.1" either = "1.6.1" -env_logger = "0.9.0" flate2 = "1.0.21" fst = "0.4.7" futures = "0.3.17" @@ -29,7 +27,6 @@ indexmap = { version = "1.7.0", features = ["serde-1"] } itertools = "0.10.1" lazy_static = "1.4.0" log = "0.4.14" -main_error = "0.1.1" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" @@ -45,7 +42,6 @@ regex = "1.5.4" rustls = "0.19.1" serde = { version = "1.0.130", features = ["derive"] } serde_json = { version = "1.0.67", features = ["preserve_order"] } -sha2 = "0.9.6" siphasher = "0.3.7" slice-group-by = "0.2.6" structopt = "0.3.23" @@ -59,17 +55,9 @@ obkv = "0.2.0" pin-project = "1.0.8" whoami = { version = "1.1.3", optional = true } reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true } -serdeval = "0.1.0" sysinfo = "0.20.2" -tokio-stream = "0.1.7" -erased-serde = "0.3.16" derivative = "2.2.0" [dev-dependencies] actix-rt = "2.2.0" -assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" } -mockall = "0.10.2" paste = "1.0.5" -serde_url_params = "0.2.1" -tempdir = "0.3.7" -urlencoding = "2.1.0" diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 802f872cd..82b8d1355 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -4,8 +4,6 @@ use std::sync::Arc; use chrono::{DateTime, Utc}; use log::{info, trace, warn}; -#[cfg(test)] -use mockall::automock; use serde::{Deserialize, Serialize}; use tokio::fs::create_dir_all; @@ -32,7 +30,6 @@ mod message; const META_FILE_NAME: &str = "metadata.json"; #[async_trait::async_trait] -#[cfg_attr(test, automock)] pub trait DumpActorHandle { /// Start the creation of a dump /// Implementation: [handle_impl::DumpActorHandleImpl::create_dump] From 102c46f88b3674174e9025be439d7758013d7c65 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 22:22:59 +0200 Subject: [PATCH 28/37] clippy + fmt --- meilisearch-http/src/analytics.rs | 2 +- meilisearch-http/src/lib.rs | 13 +- meilisearch-http/src/main.rs | 7 +- meilisearch-http/src/option.rs | 26 +- meilisearch-http/src/routes/dump.rs | 4 +- .../src/routes/indexes/documents.rs | 23 +- meilisearch-http/src/routes/indexes/mod.rs | 6 +- .../src/routes/indexes/settings.rs | 6 +- .../src/routes/indexes/updates.rs | 4 +- meilisearch-http/src/routes/mod.rs | 8 +- meilisearch-http/tests/common/server.rs | 10 +- meilisearch-http/tests/common/service.rs | 2 +- .../tests/documents/add_documents.rs | 21 +- meilisearch-lib/src/compression.rs | 12 +- meilisearch-lib/src/document_formats.rs | 17 +- meilisearch-lib/src/index/dump.rs | 4 +- meilisearch-lib/src/index/mod.rs | 33 +- meilisearch-lib/src/index/search.rs | 6 +- meilisearch-lib/src/index/updates.rs | 35 +- .../index_controller/dump_actor/loaders/v1.rs | 15 +- .../src/index_controller/dump_actor/mod.rs | 3 +- .../index_controller/index_resolver/error.rs | 3 +- .../index_resolver/index_store.rs | 13 +- .../index_controller/index_resolver/mod.rs | 57 +-- .../index_resolver/uuid_store.rs | 2 +- meilisearch-lib/src/index_controller/mod.rs | 81 +++-- .../src/index_controller/snapshot.rs | 247 +++++++------ .../src/index_controller/update_file_store.rs | 2 +- .../src/index_controller/updates/error.rs | 10 +- .../src/index_controller/updates/message.rs | 6 +- .../src/index_controller/updates/mod.rs | 18 +- .../src/index_controller/updates/status.rs | 7 +- .../index_controller/updates/store/dump.rs | 37 +- .../src/index_controller/updates/store/mod.rs | 325 +++++++++--------- meilisearch-lib/src/lib.rs | 3 +- meilisearch-lib/src/options.rs | 1 - 36 files changed, 596 insertions(+), 473 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 8d91c9e9c..596b69aa0 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -2,9 +2,9 @@ use std::hash::{Hash, Hasher}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use log::debug; +use meilisearch_lib::MeiliSearch; use serde::Serialize; use siphasher::sip::SipHasher; -use meilisearch_lib::MeiliSearch; use crate::Opt; diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index bea39ffdd..0e479b122 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -92,11 +92,7 @@ pub fn setup_temp_dir(db_path: impl AsRef) -> anyhow::Result<()> { Ok(()) } -pub fn configure_data( - config: &mut web::ServiceConfig, - data: MeiliSearch, - opt: &Opt, - ) { +pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config .app_data(data) @@ -120,9 +116,9 @@ pub fn configure_auth(config: &mut web::ServiceConfig, opts: &Opt) { master: opts.master_key.clone(), private: None, public: None, - }; + }; - keys.generate_missing_api_keys(); + keys.generate_missing_api_keys(); let auth_config = if let Some(ref master_key) = keys.master { let private_key = keys.private.as_ref().unwrap(); @@ -139,8 +135,7 @@ pub fn configure_auth(config: &mut web::ServiceConfig, opts: &Opt) { AuthConfig::NoAuth }; - config.app_data(auth_config) - .app_data(keys); + config.app_data(auth_config).app_data(keys); } #[cfg(feature = "mini-dashboard")] diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 3c7a34ddf..52892c3d6 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,7 +1,7 @@ use std::env; use actix_web::HttpServer; -use meilisearch_http::{Opt, create_app, setup_meilisearch}; +use meilisearch_http::{create_app, setup_meilisearch, Opt}; use meilisearch_lib::MeiliSearch; use structopt::StructOpt; @@ -23,7 +23,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { log_builder.init(); - Ok(()) } @@ -36,7 +35,9 @@ async fn main() -> anyhow::Result<()> { match opt.env.as_ref() { "production" => { if opt.master_key.is_none() { - anyhow::bail!("In production mode, the environment variable MEILI_MASTER_KEY is mandatory") + anyhow::bail!( + "In production mode, the environment variable MEILI_MASTER_KEY is mandatory" + ) } } "development" => (), diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 72fbeab44..20e3be38d 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -1,16 +1,16 @@ +use std::fs; use std::io::{BufReader, Read}; use std::path::PathBuf; use std::sync::Arc; -use std::fs; use byte_unit::Byte; +use meilisearch_lib::options::IndexerOpts; use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use rustls::{ AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth, RootCertStore, }; use structopt::StructOpt; -use meilisearch_lib::options::IndexerOpts; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; @@ -173,24 +173,30 @@ impl Opt { } fn load_certs(filename: PathBuf) -> anyhow::Result> { - let certfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; + let certfile = + fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; let mut reader = BufReader::new(certfile); - Ok(certs(&mut reader).map_err(|_| anyhow::anyhow!("cannot read certificate file"))?) + certs(&mut reader).map_err(|_| anyhow::anyhow!("cannot read certificate file")) } fn load_private_key(filename: PathBuf) -> anyhow::Result { let rsa_keys = { - let keyfile = - fs::File::open(filename.clone()).map_err(|_| anyhow::anyhow!("cannot open private key file"))?; + let keyfile = fs::File::open(filename.clone()) + .map_err(|_| anyhow::anyhow!("cannot open private key file"))?; let mut reader = BufReader::new(keyfile); - rsa_private_keys(&mut reader).map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))? + rsa_private_keys(&mut reader) + .map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))? }; let pkcs8_keys = { - let keyfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open private key file"))?; + let keyfile = fs::File::open(filename) + .map_err(|_| anyhow::anyhow!("cannot open private key file"))?; let mut reader = BufReader::new(keyfile); - pkcs8_private_keys(&mut reader) - .map_err(|_| anyhow::anyhow!("file contains invalid pkcs8 private key (encrypted keys not supported)"))? + pkcs8_private_keys(&mut reader).map_err(|_| { + anyhow::anyhow!( + "file contains invalid pkcs8 private key (encrypted keys not supported)" + ) + })? }; // prefer to load pkcs8 keys diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 494e97516..cbf89ddea 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -11,7 +11,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status))); } -pub async fn create_dump(meilisearch: GuardedData) -> Result { +pub async fn create_dump( + meilisearch: GuardedData, +) -> Result { let res = meilisearch.create_dump().await?; debug!("returns: {:?}", res); diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index cf939bccd..e89b75f28 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -1,11 +1,11 @@ use actix_web::error::PayloadError; -use actix_web::{web, HttpResponse}; use actix_web::web::Bytes; +use actix_web::{web, HttpResponse}; use futures::{Stream, StreamExt}; use log::debug; -use meilisearch_lib::MeiliSearch; use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update}; use meilisearch_lib::milli::update::IndexDocumentsMethod; +use meilisearch_lib::MeiliSearch; use serde::Deserialize; use serde_json::Value; use tokio::sync::mpsc; @@ -38,7 +38,7 @@ guard_content_type!(guard_json, "application/json"); */ /// This is required because Payload is not Sync nor Send -fn payload_to_stream(mut payload: Payload) -> impl Stream> { +fn payload_to_stream(mut payload: Payload) -> impl Stream> { let (snd, recv) = mpsc::channel(1); tokio::task::spawn_local(async move { while let Some(data) = payload.next().await { @@ -104,9 +104,14 @@ pub async fn delete_document( meilisearch: GuardedData, path: web::Path, ) -> Result { - let DocumentParam { document_id, index_uid } = path.into_inner(); + let DocumentParam { + document_id, + index_uid, + } = path.into_inner(); let update = Update::DeleteDocuments(vec![document_id]); - let update_status = meilisearch.register_update(index_uid, update, false).await?; + let update_status = meilisearch + .register_update(index_uid, update, false) + .await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } @@ -216,7 +221,9 @@ pub async fn delete_documents( .collect(); let update = Update::DeleteDocuments(ids); - let update_status = meilisearch.register_update(path.into_inner().index_uid, update, false).await?; + let update_status = meilisearch + .register_update(path.into_inner().index_uid, update, false) + .await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } @@ -226,7 +233,9 @@ pub async fn clear_all_documents( path: web::Path, ) -> Result { let update = Update::ClearDocuments; - let update_status = meilisearch.register_update(path.into_inner().index_uid, update, false).await?; + let update_status = meilisearch + .register_update(path.into_inner().index_uid, update, false) + .await?; debug!("returns: {:?}", update_status); Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) } diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index b10b5a004..0d0132d05 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -1,8 +1,8 @@ use actix_web::{web, HttpResponse}; use chrono::{DateTime, Utc}; use log::debug; -use meilisearch_lib::MeiliSearch; use meilisearch_lib::index_controller::IndexSettings; +use meilisearch_lib::MeiliSearch; use serde::{Deserialize, Serialize}; use crate::error::ResponseError; @@ -36,7 +36,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -pub async fn list_indexes(data: GuardedData) -> Result { +pub async fn list_indexes( + data: GuardedData, +) -> Result { let indexes = data.list_indexes().await?; debug!("returns: {:?}", indexes); Ok(HttpResponse::Ok().json(indexes)) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 24fd469d3..7e6033180 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -1,12 +1,12 @@ use log::debug; -use actix_web::{HttpResponse, web}; -use meilisearch_lib::MeiliSearch; +use actix_web::{web, HttpResponse}; use meilisearch_lib::index::{Settings, Unchecked}; use meilisearch_lib::index_controller::Update; +use meilisearch_lib::MeiliSearch; -use crate::extractors::authentication::{policies::*, GuardedData}; use crate::error::ResponseError; +use crate::extractors::authentication::{policies::*, GuardedData}; #[macro_export] macro_rules! make_setting_route { diff --git a/meilisearch-http/src/routes/indexes/updates.rs b/meilisearch-http/src/routes/indexes/updates.rs index cfef5ba63..2923736b7 100644 --- a/meilisearch-http/src/routes/indexes/updates.rs +++ b/meilisearch-http/src/routes/indexes/updates.rs @@ -53,7 +53,9 @@ pub async fn get_all_updates_status( meilisearch: GuardedData, path: web::Path, ) -> Result { - let metas = meilisearch.all_update_status(path.into_inner().index_uid).await?; + let metas = meilisearch + .all_update_status(path.into_inner().index_uid) + .await?; let metas = metas .into_iter() .map(UpdateStatusResponse::from) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 3a5f84f18..382147f31 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -6,8 +6,8 @@ use log::debug; use meilisearch_lib::index_controller::updates::status::{UpdateResult, UpdateStatus}; use serde::{Deserialize, Serialize}; -use meilisearch_lib::{MeiliSearch, Update}; use meilisearch_lib::index::{Settings, Unchecked}; +use meilisearch_lib::{MeiliSearch, Update}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; @@ -52,7 +52,7 @@ impl From<&UpdateStatus> for UpdateType { fn from(other: &UpdateStatus) -> Self { use meilisearch_lib::milli::update::IndexDocumentsMethod::*; match other.meta() { - Update::DocumentAddition{ method, .. } => { + Update::DocumentAddition { method, .. } => { let number = match other { UpdateStatus::Processed(processed) => match processed.success { UpdateResult::DocumentsAddition(ref addition) => { @@ -233,7 +233,9 @@ pub async fn running() -> HttpResponse { HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" })) } -async fn get_stats(meilisearch: GuardedData) -> Result { +async fn get_stats( + meilisearch: GuardedData, +) -> Result { let response = meilisearch.get_all_stats().await?; debug!("returns: {:?}", response); diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index ef2e51355..82666fc57 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -35,7 +35,10 @@ impl Server { let options = default_settings(dir.path()); let meilisearch = setup_meilisearch(&options).unwrap(); - let service = Service { meilisearch, options }; + let service = Service { + meilisearch, + options, + }; Server { service, @@ -45,7 +48,10 @@ impl Server { pub async fn new_with_options(options: Opt) -> Self { let meilisearch = setup_meilisearch(&options).unwrap(); - let service = Service { meilisearch, options }; + let service = Service { + meilisearch, + options, + }; Server { service, diff --git a/meilisearch-http/tests/common/service.rs b/meilisearch-http/tests/common/service.rs index 1450a6dd9..8a3b07c1d 100644 --- a/meilisearch-http/tests/common/service.rs +++ b/meilisearch-http/tests/common/service.rs @@ -2,7 +2,7 @@ use actix_web::{http::StatusCode, test}; use meilisearch_lib::MeiliSearch; use serde_json::Value; -use meilisearch_http::{Opt, create_app}; +use meilisearch_http::{create_app, Opt}; pub struct Service { pub meilisearch: MeiliSearch, diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index a0436c67d..42fdc7509 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -16,7 +16,12 @@ async fn add_documents_test_json_content_types() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!(&server.service.meilisearch, true, &server.service.options)).await; + let app = test::init_service(create_app!( + &server.service.meilisearch, + true, + &server.service.options + )) + .await; let req = test::TestRequest::post() .uri("/indexes/dog/documents") .set_payload(document.to_string()) @@ -41,7 +46,12 @@ async fn add_documents_test_no_content_types() { ]); let server = Server::new().await; - let app = test::init_service(create_app!(&server.service.meilisearch, true, &server.service.options)).await; + let app = test::init_service(create_app!( + &server.service.meilisearch, + true, + &server.service.options + )) + .await; let req = test::TestRequest::post() .uri("/indexes/dog/documents") .set_payload(document.to_string()) @@ -67,7 +77,12 @@ async fn add_documents_test_bad_content_types() { ]); let server = Server::new().await; - let app = test::init_service(create_app!(&server.service.meilisearch, true, &server.service.options)).await; + let app = test::init_service(create_app!( + &server.service.meilisearch, + true, + &server.service.options + )) + .await; let req = test::TestRequest::post() .uri("/indexes/dog/documents") .set_payload(document.to_string()) diff --git a/meilisearch-lib/src/compression.rs b/meilisearch-lib/src/compression.rs index cd60854c6..a71a02a55 100644 --- a/meilisearch-lib/src/compression.rs +++ b/meilisearch-lib/src/compression.rs @@ -17,10 +17,10 @@ pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Resul } //pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { - //let f = File::open(&src)?; - //let gz = GzDecoder::new(f); - //let mut ar = Archive::new(gz); - //create_dir_all(&dest)?; - //ar.unpack(&dest)?; - //Ok(()) +//let f = File::open(&src)?; +//let gz = GzDecoder::new(f); +//let mut ar = Archive::new(gz); +//create_dir_all(&dest)?; +//ar.unpack(&dest)?; +//Ok(()) //} diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index a535ec686..297c89831 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -1,4 +1,7 @@ -use std::{fmt, io::{Read, Seek, Write}}; +use std::{ + fmt, + io::{Read, Seek, Write}, +}; use milli::documents::DocumentBatchBuilder; use serde_json::{Deserializer, Map, Value}; @@ -25,12 +28,13 @@ pub enum DocumentFormatError { #[error("Internal error: {0}")] Internal(Box), #[error("{0}. The {1} payload provided is malformed.")] - MalformedPayload(Box, PayloadType), + MalformedPayload( + Box, + PayloadType, + ), } -internal_error!( - DocumentFormatError: milli::documents::Error -); +internal_error!(DocumentFormatError: milli::documents::Error); macro_rules! malformed { ($type:path, $e:expr) => { @@ -57,7 +61,8 @@ pub fn read_jsonl(input: impl Read, writer: impl Write + Seek) -> Result<()> { pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> { let mut builder = DocumentBatchBuilder::new(writer).unwrap(); - let documents: Vec> = malformed!(PayloadType::Json, serde_json::from_reader(input))?; + let documents: Vec> = + malformed!(PayloadType::Json, serde_json::from_reader(input))?; builder.add_documents(documents).unwrap(); builder.finish().unwrap(); diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index 8049df500..f6e081760 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -151,7 +151,9 @@ impl Index { //If the document file is empty, we don't perform the document addition, to prevent //a primary key error to be thrown. if !documents_reader.is_empty() { - let builder = update_handler.update_builder(0).index_documents(&mut txn, &index); + let builder = update_handler + .update_builder(0) + .index_documents(&mut txn, &index); builder.execute(documents_reader, |_, _| ())?; } diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index 0e4375517..899c830a5 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -8,17 +8,17 @@ use std::sync::Arc; use chrono::{DateTime, Utc}; use heed::{EnvOpenOptions, RoTxn}; use milli::update::Setting; -use milli::{FieldDistribution, FieldId, obkv_to_json}; +use milli::{obkv_to_json, FieldDistribution, FieldId}; +use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; -use serde::{Serialize, Deserialize}; use error::Result; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; -pub use updates::{Checked, Facets, Settings, Unchecked, apply_settings_to_builder}; +pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; use uuid::Uuid; -use crate::EnvSizer; use crate::index_controller::update_file_store::UpdateFileStore; +use crate::EnvSizer; use self::error::IndexError; use self::update_handler::UpdateHandler; @@ -75,11 +75,11 @@ impl IndexMeta { #[derivative(Debug)] pub struct Index { pub uuid: Uuid, - #[derivative(Debug="ignore")] + #[derivative(Debug = "ignore")] pub inner: Arc, - #[derivative(Debug="ignore")] + #[derivative(Debug = "ignore")] update_file_store: Arc, - #[derivative(Debug="ignore")] + #[derivative(Debug = "ignore")] update_handler: Arc, } @@ -92,12 +92,23 @@ impl Deref for Index { } impl Index { - pub fn open(path: impl AsRef, size: usize, update_file_store: Arc, uuid: Uuid, update_handler: Arc) -> Result { + pub fn open( + path: impl AsRef, + size: usize, + update_file_store: Arc, + uuid: Uuid, + update_handler: Arc, + ) -> Result { create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let inner = Arc::new(milli::Index::new(options, &path)?); - Ok(Index { inner, update_file_store, uuid, update_handler }) + Ok(Index { + inner, + update_file_store, + uuid, + update_handler, + }) } pub fn stats(&self) -> Result { @@ -268,7 +279,9 @@ impl Index { create_dir_all(&dst)?; dst.push("data.mdb"); let _txn = self.write_txn()?; - self.inner.env.copy_to_path(dst, heed::CompactionOption::Enabled)?; + self.inner + .env + .copy_to_path(dst, heed::CompactionOption::Enabled)?; Ok(()) } } diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index 70d0510ac..a0ea26127 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -6,9 +6,7 @@ use either::Either; use heed::RoTxn; use indexmap::IndexMap; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; -use milli::{ - AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, SortError -}; +use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, SortError}; use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; @@ -685,7 +683,7 @@ fn parse_filter_array( } } - Ok(FilterCondition::from_array(txn, &index, ands)?) + Ok(FilterCondition::from_array(txn, index, ands)?) } #[cfg(test)] diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index b5035443a..b5de21403 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -8,11 +8,11 @@ use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use serde::{Deserialize, Serialize, Serializer}; use uuid::Uuid; -use crate::Update; use crate::index_controller::updates::status::{Failed, Processed, Processing, UpdateResult}; +use crate::Update; -use super::{Index, IndexMeta}; use super::error::{IndexError, Result}; +use super::{Index, IndexMeta}; fn serialize_with_wildcard( field: &Setting>, @@ -170,18 +170,26 @@ impl Index { let result = (|| { let mut txn = self.write_txn()?; let result = match update.meta() { - Update::DocumentAddition { primary_key, content_uuid, method } => { - self.update_documents(&mut txn, *method, *content_uuid, update_builder, primary_key.as_deref()) - } + Update::DocumentAddition { + primary_key, + content_uuid, + method, + } => self.update_documents( + &mut txn, + *method, + *content_uuid, + update_builder, + primary_key.as_deref(), + ), Update::Settings(settings) => { let settings = settings.clone().check(); self.update_settings(&mut txn, &settings, update_builder) - }, + } Update::ClearDocuments => { let builder = update_builder.clear_documents(&mut txn, self); let _count = builder.execute()?; Ok(UpdateResult::Other) - }, + } Update::DeleteDocuments(ids) => { let mut builder = update_builder.delete_documents(&mut txn, self)?; @@ -276,7 +284,10 @@ impl Index { } } -pub fn apply_settings_to_builder(settings: &Settings, builder: &mut milli::update::Settings) { +pub fn apply_settings_to_builder( + settings: &Settings, + builder: &mut milli::update::Settings, +) { match settings.searchable_attributes { Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), Setting::Reset => builder.reset_searchable_fields(), @@ -298,9 +309,7 @@ pub fn apply_settings_to_builder(settings: &Settings, builder: &mut mil } match settings.sortable_attributes { - Setting::Set(ref fields) => { - builder.set_sortable_fields(fields.iter().cloned().collect()) - } + Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()), Setting::Reset => builder.reset_sortable_fields(), Setting::NotSet => (), } @@ -318,9 +327,7 @@ pub fn apply_settings_to_builder(settings: &Settings, builder: &mut mil } match settings.synonyms { - Setting::Set(ref synonyms) => { - builder.set_synonyms(synonyms.clone().into_iter().collect()) - } + Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()), Setting::Reset => builder.reset_synonyms(), Setting::NotSet => (), } diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index f474935f0..840fd7ccc 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -1,5 +1,5 @@ use std::collections::{BTreeMap, BTreeSet}; -use std::fs::{File, create_dir_all}; +use std::fs::{create_dir_all, File}; use std::io::{BufReader, Seek, SeekFrom}; use std::marker::PhantomData; use std::path::Path; @@ -17,10 +17,7 @@ use crate::index::update_handler::UpdateHandler; use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; use crate::index_controller::{self, IndexMetadata}; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; -use crate::{ - index::Unchecked, - options::IndexerOpts, -}; +use crate::{index::Unchecked, options::IndexerOpts}; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] @@ -136,16 +133,16 @@ fn load_index( //If the document file is empty, we don't perform the document addition, to prevent //a primary key error to be thrown. if !documents_reader.is_empty() { - let builder = update_handler.update_builder(0).index_documents(&mut txn, &index); + let builder = update_handler + .update_builder(0) + .index_documents(&mut txn, &index); builder.execute(documents_reader, |_, _| ())?; } txn.commit()?; // Finaly, we extract the original milli::Index and close it - index - .prepare_for_closing() - .wait(); + index.prepare_for_closing().wait(); // Updates are ignored in dumps V1. diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 82b8d1355..72a83a505 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -158,8 +158,7 @@ impl DumpTask { create_dir_all(&self.path).await?; - let temp_dump_dir = - tokio::task::spawn_blocking(|| tempfile::TempDir::new()).await??; + let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; let temp_dump_path = temp_dump_dir.path().to_owned(); let meta = Metadata::new_v2(self.index_db_size, self.update_db_size); diff --git a/meilisearch-lib/src/index_controller/index_resolver/error.rs b/meilisearch-lib/src/index_controller/index_resolver/error.rs index af61a99de..661b9bde3 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/error.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/error.rs @@ -27,7 +27,8 @@ pub enum IndexResolverError { } impl From> for IndexResolverError -where T: Send + Sync + 'static + fmt::Debug +where + T: Send + Sync + 'static + fmt::Debug, { fn from(other: tokio::sync::mpsc::error::SendError) -> Self { Self::Internal(Box::new(other)) diff --git a/meilisearch-lib/src/index_controller/index_resolver/index_store.rs b/meilisearch-lib/src/index_controller/index_resolver/index_store.rs index 5969108de..047711a96 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/index_store.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/index_store.rs @@ -9,8 +9,8 @@ use tokio::task::spawn_blocking; use uuid::Uuid; use super::error::{IndexResolverError, Result}; -use crate::index::Index; use crate::index::update_handler::UpdateHandler; +use crate::index::Index; use crate::index_controller::update_file_store::UpdateFileStore; use crate::options::IndexerOpts; @@ -32,7 +32,11 @@ pub struct MapIndexStore { } impl MapIndexStore { - pub fn new(path: impl AsRef, index_size: usize, indexer_opts: &IndexerOpts) -> anyhow::Result { + pub fn new( + path: impl AsRef, + index_size: usize, + indexer_opts: &IndexerOpts, + ) -> anyhow::Result { let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?); let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap()); let path = path.as_ref().join("indexes/"); @@ -100,7 +104,10 @@ impl IndexStore for MapIndexStore { let index_size = self.index_size; let file_store = self.update_file_store.clone(); let update_handler = self.update_handler.clone(); - let index = spawn_blocking(move || Index::open(path, index_size, file_store, uuid, update_handler)).await??; + let index = spawn_blocking(move || { + Index::open(path, index_size, file_store, uuid, update_handler) + }) + .await??; self.index_store.write().await.insert(uuid, index.clone()); Ok(Some(index)) } diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index f04b3f42b..9f86f7b08 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -1,19 +1,26 @@ -pub mod uuid_store; -mod index_store; pub mod error; +mod index_store; +pub mod uuid_store; use std::path::Path; -use uuid::Uuid; -use uuid_store::{UuidStore, HeedUuidStore}; +use error::{IndexResolverError, Result}; use index_store::{IndexStore, MapIndexStore}; -use error::{Result, IndexResolverError}; +use uuid::Uuid; +use uuid_store::{HeedUuidStore, UuidStore}; -use crate::{index::{Index, update_handler::UpdateHandler}, options::IndexerOpts}; +use crate::{ + index::{update_handler::UpdateHandler, Index}, + options::IndexerOpts, +}; pub type HardStateIndexResolver = IndexResolver; -pub fn create_index_resolver(path: impl AsRef, index_size: usize, indexer_opts: &IndexerOpts) -> anyhow::Result { +pub fn create_index_resolver( + path: impl AsRef, + index_size: usize, + indexer_opts: &IndexerOpts, +) -> anyhow::Result { let uuid_store = HeedUuidStore::new(&path)?; let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?; Ok(IndexResolver::new(uuid_store, index_store)) @@ -30,7 +37,7 @@ impl IndexResolver { dst: impl AsRef, index_db_size: usize, indexer_opts: &IndexerOpts, - ) -> anyhow::Result<()> { + ) -> anyhow::Result<()> { HeedUuidStore::load_dump(&src, &dst)?; let indexes_path = src.as_ref().join("indexes"); @@ -46,14 +53,12 @@ impl IndexResolver { } } -impl IndexResolver -where U: UuidStore, - I: IndexStore, +impl IndexResolver +where + U: UuidStore, + I: IndexStore, { - pub fn new( - index_uuid_store: U, - index_store: I, - ) -> Self { + pub fn new(index_uuid_store: U, index_store: I) -> Self { Self { index_uuid_store, index_store, @@ -75,7 +80,10 @@ where U: UuidStore, } pub async fn snapshot(&self, path: impl AsRef) -> Result> { - let uuids = self.index_uuid_store.snapshot(path.as_ref().to_owned()).await?; + let uuids = self + .index_uuid_store + .snapshot(path.as_ref().to_owned()) + .await?; let mut indexes = Vec::new(); for uuid in uuids { indexes.push(self.get_index_by_uuid(uuid).await?); @@ -99,13 +107,11 @@ where U: UuidStore, let mut indexes = Vec::new(); for (name, uuid) in uuids { match self.index_store.get(uuid).await? { - Some(index) => { - indexes.push((name, index)) - }, + Some(index) => indexes.push((name, index)), None => { // we found an unexisting index, we remove it from the uuid store let _ = self.index_uuid_store.delete(name).await; - }, + } } } @@ -124,7 +130,10 @@ where U: UuidStore, pub async fn get_index_by_uuid(&self, uuid: Uuid) -> Result { // TODO: Handle this error better. - self.index_store.get(uuid).await?.ok_or(IndexResolverError::UnexistingIndex(String::new())) + self.index_store + .get(uuid) + .await? + .ok_or_else(|| IndexResolverError::UnexistingIndex(String::new())) } pub async fn get_index(&self, uid: String) -> Result { @@ -137,17 +146,17 @@ where U: UuidStore, // and remove the uuid from th uuid store. let _ = self.index_uuid_store.delete(name.clone()).await; Err(IndexResolverError::UnexistingIndex(name)) - }, + } } } - (name, _) => Err(IndexResolverError::UnexistingIndex(name)) + (name, _) => Err(IndexResolverError::UnexistingIndex(name)), } } pub async fn get_uuid(&self, uid: String) -> Result { match self.index_uuid_store.get_uuid(uid).await? { (_, Some(uuid)) => Ok(uuid), - (name, _) => Err(IndexResolverError::UnexistingIndex(name)) + (name, _) => Err(IndexResolverError::UnexistingIndex(name)), } } } diff --git a/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs b/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs index a4bcd17d4..3e582944d 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/uuid_store.rs @@ -8,7 +8,7 @@ use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::error::{Result, IndexResolverError}; +use super::error::{IndexResolverError, Result}; use crate::EnvSizer; const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 0dee6521f..f117369fd 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -18,25 +18,27 @@ use dump_actor::DumpActorHandle; pub use dump_actor::{DumpInfo, DumpStatus}; use snapshot::load_snapshot; -use crate::index::{Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked}; +use crate::index::error::Result as IndexResult; +use crate::index::{ + Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, +}; use crate::index_controller::index_resolver::create_index_resolver; use crate::index_controller::snapshot::SnapshotService; use crate::options::IndexerOpts; use error::Result; -use crate::index::error::{Result as IndexResult}; use self::dump_actor::load_dump; -use self::index_resolver::HardStateIndexResolver; use self::index_resolver::error::IndexResolverError; +use self::index_resolver::HardStateIndexResolver; use self::updates::status::UpdateStatus; use self::updates::UpdateMsg; mod dump_actor; pub mod error; +mod index_resolver; mod snapshot; pub mod update_file_store; pub mod updates; -mod index_resolver; pub type Payload = Box< dyn Stream> + Send + Sync + 'static + Unpin, @@ -79,6 +81,7 @@ pub struct Stats { pub indexes: BTreeMap, } +#[allow(clippy::large_enum_variant)] #[derive(derivative::Derivative)] #[derivative(Debug)] pub enum Update { @@ -86,7 +89,7 @@ pub enum Update { ClearDocuments, Settings(Settings), DocumentAddition { - #[derivative(Debug="ignore")] + #[derivative(Debug = "ignore")] payload: Payload, primary_key: Option, method: IndexDocumentsMethod, @@ -141,12 +144,19 @@ impl IndexControllerBuilder { std::fs::create_dir_all(db_path.as_ref())?; - let index_resolver = Arc::new(create_index_resolver(&db_path, index_size, &indexer_options)?); + let index_resolver = Arc::new(create_index_resolver( + &db_path, + index_size, + &indexer_options, + )?); #[allow(unreachable_code)] - let update_sender = updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?; + let update_sender = + updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?; - let dump_path = self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; + let dump_path = self + .dump_dst + .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; let dump_handle = dump_actor::DumpActorHandleImpl::new( dump_path, index_resolver.clone(), @@ -159,13 +169,15 @@ impl IndexControllerBuilder { let snapshot_service = SnapshotService::new( index_resolver.clone(), update_sender.clone(), - self.snapshot_interval.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?, - self.snapshot_dir.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?, + self.snapshot_interval + .ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?, + self.snapshot_dir + .ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?, db_path - .as_ref() - .file_name() - .map(|n| n.to_owned().into_string().expect("invalid path")) - .unwrap_or_else(|| String::from("data.ms")), + .as_ref() + .file_name() + .map(|n| n.to_owned().into_string().expect("invalid path")) + .unwrap_or_else(|| String::from("data.ms")), ); tokio::task::spawn(snapshot_service.run()); @@ -246,7 +258,12 @@ impl IndexController { IndexControllerBuilder::default() } - pub async fn register_update(&self, uid: String, update: Update, create_index: bool) -> Result { + pub async fn register_update( + &self, + uid: String, + update: Update, + create_index: bool, + ) -> Result { match self.index_resolver.get_uuid(uid).await { Ok(uuid) => { let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?; @@ -255,12 +272,13 @@ impl IndexController { Err(IndexResolverError::UnexistingIndex(name)) => { if create_index { let index = self.index_resolver.create_index(name, None).await?; - let update_result = UpdateMsg::update(&self.update_sender, index.uuid, update).await?; + let update_result = + UpdateMsg::update(&self.update_sender, index.uuid, update).await?; // ignore if index creation fails now, since it may already have been created Ok(update_result) } else { - Err(IndexResolverError::UnexistingIndex(name).into()) + Err(IndexResolverError::UnexistingIndex(name).into()) } } Err(e) => Err(e.into()), @@ -310,7 +328,9 @@ impl IndexController { attributes_to_retrieve: Option>, ) -> Result> { let index = self.index_resolver.get_index(uid).await?; - let documents = spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)).await??; + let documents = + spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)) + .await??; Ok(documents) } @@ -321,7 +341,9 @@ impl IndexController { attributes_to_retrieve: Option>, ) -> Result { let index = self.index_resolver.get_index(uid).await?; - let document = spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)).await??; + let document = + spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)) + .await??; Ok(document) } @@ -330,12 +352,12 @@ impl IndexController { uid: String, mut index_settings: IndexSettings, ) -> Result { - index_settings.uid.take(); let index = self.index_resolver.get_index(uid.clone()).await?; let uuid = index.uuid; - let meta = spawn_blocking(move || index.update_primary_key(index_settings.primary_key)).await??; + let meta = + spawn_blocking(move || index.update_primary_key(index_settings.primary_key)).await??; let meta = IndexMetadata { uuid, name: uid.clone(), @@ -386,7 +408,8 @@ impl IndexController { let stats = index.stats()?; let meta = index.meta()?; Ok((stats, meta)) - }).await??; + }) + .await??; database_size += stats.size; @@ -415,8 +438,15 @@ impl IndexController { Ok(self.dump_handle.dump_info(uid).await?) } - pub async fn create_index(&self, uid: String, primary_key: Option) -> Result { - let index = self.index_resolver.create_index(uid.clone(), primary_key).await?; + pub async fn create_index( + &self, + uid: String, + primary_key: Option, + ) -> Result { + let index = self + .index_resolver + .create_index(uid.clone(), primary_key) + .await?; let meta = spawn_blocking(move || -> IndexResult<_> { let meta = index.meta()?; let meta = IndexMetadata { @@ -426,7 +456,8 @@ impl IndexController { meta, }; Ok(meta) - }).await??; + }) + .await??; Ok(meta) } diff --git a/meilisearch-lib/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs index 66bdfe60e..2d83a491c 100644 --- a/meilisearch-lib/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -4,14 +4,14 @@ use std::time::Duration; use anyhow::bail; use log::{error, info, trace}; +use tokio::fs; use tokio::task::spawn_blocking; use tokio::time::sleep; -use tokio::fs; use crate::index_controller::updates::UpdateMsg; -use super::updates::UpdateSender; use super::index_resolver::HardStateIndexResolver; +use super::updates::UpdateSender; pub struct SnapshotService { index_resolver: Arc, @@ -56,8 +56,7 @@ impl SnapshotService { let snapshot_dir = self.snapshot_path.clone(); fs::create_dir_all(&snapshot_dir).await?; - let temp_snapshot_dir = - spawn_blocking(move || tempfile::tempdir()).await??; + let temp_snapshot_dir = spawn_blocking(tempfile::tempdir).await??; let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); let indexes = self @@ -99,7 +98,7 @@ pub fn load_snapshot( match crate::from_tar_gz(snapshot_path, &db_path) { Ok(()) => Ok(()), Err(e) => { - //clean created db folder + //clean created db folder std::fs::remove_dir_all(&db_path)?; Err(e) } @@ -127,131 +126,131 @@ pub fn load_snapshot( //#[cfg(test)] //mod test { - //use std::iter::FromIterator; - //use std::{collections::HashSet, sync::Arc}; +//use std::iter::FromIterator; +//use std::{collections::HashSet, sync::Arc}; - //use futures::future::{err, ok}; - //use rand::Rng; - //use tokio::time::timeout; - //use uuid::Uuid; +//use futures::future::{err, ok}; +//use rand::Rng; +//use tokio::time::timeout; +//use uuid::Uuid; - //use super::*; +//use super::*; - //#[actix_rt::test] - //async fn test_normal() { - //let mut rng = rand::thread_rng(); - //let uuids_num: usize = rng.gen_range(5..10); - //let uuids = (0..uuids_num) - //.map(|_| Uuid::new_v4()) - //.collect::>(); +//#[actix_rt::test] +//async fn test_normal() { +//let mut rng = rand::thread_rng(); +//let uuids_num: usize = rng.gen_range(5..10); +//let uuids = (0..uuids_num) +//.map(|_| Uuid::new_v4()) +//.collect::>(); - //let mut uuid_resolver = MockUuidResolverHandle::new(); - //let uuids_clone = uuids.clone(); - //uuid_resolver - //.expect_snapshot() - //.times(1) - //.returning(move |_| Box::pin(ok(uuids_clone.clone()))); +//let mut uuid_resolver = MockUuidResolverHandle::new(); +//let uuids_clone = uuids.clone(); +//uuid_resolver +//.expect_snapshot() +//.times(1) +//.returning(move |_| Box::pin(ok(uuids_clone.clone()))); - //let uuids_clone = uuids.clone(); - //let mut index_handle = MockIndexActorHandle::new(); - //index_handle - //.expect_snapshot() - //.withf(move |uuid, _path| uuids_clone.contains(uuid)) - //.times(uuids_num) - //.returning(move |_, _| Box::pin(ok(()))); +//let uuids_clone = uuids.clone(); +//let mut index_handle = MockIndexActorHandle::new(); +//index_handle +//.expect_snapshot() +//.withf(move |uuid, _path| uuids_clone.contains(uuid)) +//.times(uuids_num) +//.returning(move |_, _| Box::pin(ok(()))); - //let dir = tempfile::tempdir_in(".").unwrap(); - //let handle = Arc::new(index_handle); - //let update_handle = - //UpdateActorHandleImpl::>::new(handle.clone(), dir.path(), 4096 * 100).unwrap(); +//let dir = tempfile::tempdir_in(".").unwrap(); +//let handle = Arc::new(index_handle); +//let update_handle = +//UpdateActorHandleImpl::>::new(handle.clone(), dir.path(), 4096 * 100).unwrap(); - //let snapshot_path = tempfile::tempdir_in(".").unwrap(); - //let snapshot_service = SnapshotService::new( - //uuid_resolver, - //update_handle, - //Duration::from_millis(100), - //snapshot_path.path().to_owned(), - //"data.ms".to_string(), - //); +//let snapshot_path = tempfile::tempdir_in(".").unwrap(); +//let snapshot_service = SnapshotService::new( +//uuid_resolver, +//update_handle, +//Duration::from_millis(100), +//snapshot_path.path().to_owned(), +//"data.ms".to_string(), +//); - //snapshot_service.perform_snapshot().await.unwrap(); - //} - - //#[actix_rt::test] - //async fn error_performing_uuid_snapshot() { - //let mut uuid_resolver = MockUuidResolverHandle::new(); - //uuid_resolver - //.expect_snapshot() - //.times(1) - ////abitrary error - //.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); - - //let update_handle = MockUpdateActorHandle::new(); - - //let snapshot_path = tempfile::tempdir_in(".").unwrap(); - //let snapshot_service = SnapshotService::new( - //uuid_resolver, - //update_handle, - //Duration::from_millis(100), - //snapshot_path.path().to_owned(), - //"data.ms".to_string(), - //); - - //assert!(snapshot_service.perform_snapshot().await.is_err()); - ////Nothing was written to the file - //assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); - //} - - //#[actix_rt::test] - //async fn error_performing_index_snapshot() { - //let uuid = Uuid::new_v4(); - //let mut uuid_resolver = MockUuidResolverHandle::new(); - //uuid_resolver - //.expect_snapshot() - //.times(1) - //.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid))))); - - //let mut update_handle = MockUpdateActorHandle::new(); - //update_handle - //.expect_snapshot() - ////abitrary error - //.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0)))); - - //let snapshot_path = tempfile::tempdir_in(".").unwrap(); - //let snapshot_service = SnapshotService::new( - //uuid_resolver, - //update_handle, - //Duration::from_millis(100), - //snapshot_path.path().to_owned(), - //"data.ms".to_string(), - //); - - //assert!(snapshot_service.perform_snapshot().await.is_err()); - ////Nothing was written to the file - //assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); - //} - - //#[actix_rt::test] - //async fn test_loop() { - //let mut uuid_resolver = MockUuidResolverHandle::new(); - //uuid_resolver - //.expect_snapshot() - ////we expect the funtion to be called between 2 and 3 time in the given interval. - //.times(2..4) - ////abitrary error, to short-circuit the function - //.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); - - //let update_handle = MockUpdateActorHandle::new(); - - //let snapshot_path = tempfile::tempdir_in(".").unwrap(); - //let snapshot_service = SnapshotService::new( - //uuid_resolver, - //update_handle, - //Duration::from_millis(100), - //snapshot_path.path().to_owned(), - //"data.ms".to_string(), - //); - - //let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await; - //} +//snapshot_service.perform_snapshot().await.unwrap(); +//} + +//#[actix_rt::test] +//async fn error_performing_uuid_snapshot() { +//let mut uuid_resolver = MockUuidResolverHandle::new(); +//uuid_resolver +//.expect_snapshot() +//.times(1) +////abitrary error +//.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); + +//let update_handle = MockUpdateActorHandle::new(); + +//let snapshot_path = tempfile::tempdir_in(".").unwrap(); +//let snapshot_service = SnapshotService::new( +//uuid_resolver, +//update_handle, +//Duration::from_millis(100), +//snapshot_path.path().to_owned(), +//"data.ms".to_string(), +//); + +//assert!(snapshot_service.perform_snapshot().await.is_err()); +////Nothing was written to the file +//assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); +//} + +//#[actix_rt::test] +//async fn error_performing_index_snapshot() { +//let uuid = Uuid::new_v4(); +//let mut uuid_resolver = MockUuidResolverHandle::new(); +//uuid_resolver +//.expect_snapshot() +//.times(1) +//.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid))))); + +//let mut update_handle = MockUpdateActorHandle::new(); +//update_handle +//.expect_snapshot() +////abitrary error +//.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0)))); + +//let snapshot_path = tempfile::tempdir_in(".").unwrap(); +//let snapshot_service = SnapshotService::new( +//uuid_resolver, +//update_handle, +//Duration::from_millis(100), +//snapshot_path.path().to_owned(), +//"data.ms".to_string(), +//); + +//assert!(snapshot_service.perform_snapshot().await.is_err()); +////Nothing was written to the file +//assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); +//} + +//#[actix_rt::test] +//async fn test_loop() { +//let mut uuid_resolver = MockUuidResolverHandle::new(); +//uuid_resolver +//.expect_snapshot() +////we expect the funtion to be called between 2 and 3 time in the given interval. +//.times(2..4) +////abitrary error, to short-circuit the function +//.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); + +//let update_handle = MockUpdateActorHandle::new(); + +//let snapshot_path = tempfile::tempdir_in(".").unwrap(); +//let snapshot_service = SnapshotService::new( +//uuid_resolver, +//update_handle, +//Duration::from_millis(100), +//snapshot_path.path().to_owned(), +//"data.ms".to_string(), +//); + +//let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await; +//} //} diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index fed5fe200..f7a7e3a1a 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -151,7 +151,7 @@ impl UpdateFileStore { } serde_json::to_writer(&mut dst_file, &document_buffer)?; - dst_file.write(b"\n")?; + dst_file.write_all(b"\n")?; document_buffer.clear(); } diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs index d6c3bcba4..8cbcf211a 100644 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ b/meilisearch-lib/src/index_controller/updates/error.rs @@ -1,9 +1,12 @@ -use std::fmt; use std::error::Error; +use std::fmt; use meilisearch_error::{Code, ErrorCode}; -use crate::{document_formats::DocumentFormatError, index_controller::update_file_store::UpdateFileStoreError}; +use crate::{ + document_formats::DocumentFormatError, + index_controller::update_file_store::UpdateFileStoreError, +}; pub type Result = std::result::Result; @@ -28,7 +31,8 @@ pub enum UpdateLoopError { } impl From> for UpdateLoopError -where T: Sync + Send + 'static + fmt::Debug +where + T: Sync + Send + 'static + fmt::Debug, { fn from(other: tokio::sync::mpsc::error::SendError) -> Self { Self::Internal(Box::new(other)) diff --git a/meilisearch-lib/src/index_controller/updates/message.rs b/meilisearch-lib/src/index_controller/updates/message.rs index 3b157e568..4249e36f2 100644 --- a/meilisearch-lib/src/index_controller/updates/message.rs +++ b/meilisearch-lib/src/index_controller/updates/message.rs @@ -44,7 +44,11 @@ pub enum UpdateMsg { } impl UpdateMsg { - pub async fn snapshot(sender: &mpsc::Sender, path: PathBuf, indexes: Vec) -> Result<()> { + pub async fn snapshot( + sender: &mpsc::Sender, + path: PathBuf, + indexes: Vec, + ) -> Result<()> { let (ret, rcv) = oneshot::channel(); let msg = Self::Snapshot { path, indexes, ret }; sender.send(msg).await?; diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 3823f6b70..fad337553 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -80,7 +80,7 @@ impl> + Unpin> io::Rea self.read(buf) } Some(Err(e)) => Err(io::Error::new(io::ErrorKind::BrokenPipe, e)), - None => return Ok(0), + None => Ok(0), }, } } @@ -109,7 +109,13 @@ impl UpdateLoop { let must_exit = Arc::new(AtomicBool::new(false)); let update_file_store = UpdateFileStore::new(&path).unwrap(); - let store = UpdateStore::open(options, &path, index_resolver.clone(), must_exit.clone(), update_file_store.clone())?; + let store = UpdateStore::open( + options, + &path, + index_resolver, + must_exit.clone(), + update_file_store.clone(), + )?; let inbox = Some(inbox); @@ -194,8 +200,8 @@ impl UpdateLoop { update_file.persist()?; Ok(()) - }).await??; - + }) + .await??; store::Update::DocumentAddition { primary_key, @@ -216,7 +222,6 @@ impl UpdateLoop { Ok(status.into()) } - async fn handle_list_updates(&self, uuid: Uuid) -> Result> { let update_store = self.store.clone(); tokio::task::spawn_blocking(move || { @@ -248,8 +253,7 @@ impl UpdateLoop { async fn handle_snapshot(&self, indexes: Vec, path: PathBuf) -> Result<()> { let update_store = self.store.clone(); - tokio::task::spawn_blocking(move || update_store.snapshot(indexes, path)) - .await??; + tokio::task::spawn_blocking(move || update_store.snapshot(indexes, path)).await??; Ok(()) } diff --git a/meilisearch-lib/src/index_controller/updates/status.rs b/meilisearch-lib/src/index_controller/updates/status.rs index 3108fe638..e7f82b343 100644 --- a/meilisearch-lib/src/index_controller/updates/status.rs +++ b/meilisearch-lib/src/index_controller/updates/status.rs @@ -6,7 +6,10 @@ use meilisearch_error::{Code, ErrorCode}; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use serde::{Deserialize, Serialize}; -use crate::{Update, index::{Settings, Unchecked}}; +use crate::{ + index::{Settings, Unchecked}, + Update, +}; #[derive(Debug, Clone, Serialize, Deserialize)] pub enum UpdateResult { @@ -160,7 +163,7 @@ impl Display for Failed { } } -impl Error for Failed { } +impl Error for Failed {} impl ErrorCode for Failed { fn error_code(&self) -> Code { diff --git a/meilisearch-lib/src/index_controller/updates/store/dump.rs b/meilisearch-lib/src/index_controller/updates/store/dump.rs index 3356a54b9..298217885 100644 --- a/meilisearch-lib/src/index_controller/updates/store/dump.rs +++ b/meilisearch-lib/src/index_controller/updates/store/dump.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; -use std::path::{Path, PathBuf}; +use std::fs::{create_dir_all, File}; use std::io::{BufReader, Write}; -use std::fs::{File, create_dir_all}; +use std::path::{Path, PathBuf}; use heed::{EnvOpenOptions, RoTxn}; use rayon::prelude::*; @@ -11,7 +11,14 @@ use tempfile::{NamedTempFile, TempDir}; use uuid::Uuid; use super::{Result, State, UpdateStore}; -use crate::{Update, index::Index, index_controller::{update_file_store::UpdateFileStore, updates::status::{Enqueued, UpdateStatus}}}; +use crate::{ + index::Index, + index_controller::{ + update_file_store::UpdateFileStore, + updates::status::{Enqueued, UpdateStatus}, + }, + Update, +}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -20,11 +27,7 @@ struct UpdateEntry { } impl UpdateStore { - pub fn dump( - &self, - indexes: &[Index], - path: PathBuf, - ) -> Result<()> { + pub fn dump(&self, indexes: &[Index], path: PathBuf) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Dumping); @@ -35,7 +38,10 @@ impl UpdateStore { self.dump_updates(&txn, &uuids, &path)?; - indexes.par_iter().try_for_each(|index| index.dump(&path)).unwrap(); + indexes + .par_iter() + .try_for_each(|index| index.dump(&path)) + .unwrap(); Ok(()) } @@ -74,11 +80,13 @@ impl UpdateStore { let update = data.decode()?; if let Enqueued { - meta: Update::DocumentAddition { - content_uuid, .. - }, .. - } = update { - self.update_file_store.dump(content_uuid, &dst_path).unwrap(); + meta: Update::DocumentAddition { content_uuid, .. }, + .. + } = update + { + self.update_file_store + .dump(content_uuid, &dst_path) + .unwrap(); } let update_json = UpdateEntry { @@ -122,7 +130,6 @@ impl UpdateStore { dst: impl AsRef, db_size: usize, ) -> anyhow::Result<()> { - println!("target path: {}", dst.as_ref().display()); let mut options = EnvOpenOptions::new(); diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index 46786f1ac..bb77250b5 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -17,25 +17,26 @@ use heed::zerocopy::U64; use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use log::error; use parking_lot::{Mutex, MutexGuard}; +use rayon::prelude::*; use tokio::runtime::Handle; use tokio::sync::mpsc; use tokio::sync::mpsc::error::TrySendError; use tokio::time::timeout; use uuid::Uuid; -use rayon::prelude::*; use codec::*; use super::error::Result; use super::status::{Enqueued, Processing}; -use crate::EnvSizer; +use crate::index::Index; use crate::index_controller::update_files_path; use crate::index_controller::updates::*; -use crate::index::Index; +use crate::EnvSizer; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; +#[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Update { DeleteDocuments(Vec), @@ -164,7 +165,8 @@ impl UpdateStore { must_exit: Arc, update_file_store: UpdateFileStore, ) -> anyhow::Result> { - let (update_store, mut notification_receiver) = Self::new(options, path, update_file_store)?; + let (update_store, mut notification_receiver) = + Self::new(options, path, update_file_store)?; let update_store = Arc::new(update_store); // Send a first notification to trigger the process. @@ -250,11 +252,7 @@ impl UpdateStore { /// Registers the update content in the pending store and the meta /// into the pending-meta store. Returns the new unique update id. - pub fn register_update( - &self, - index_uuid: Uuid, - update: Update, - ) -> heed::Result { + pub fn register_update(&self, index_uuid: Uuid, update: Update) -> heed::Result { let mut txn = self.env.write_txn()?; let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?; let meta = Enqueued::new(update, update_id); @@ -299,7 +297,10 @@ impl UpdateStore { /// Executes the user provided function on the next pending update (the one with the lowest id). /// This is asynchronous as it let the user process the update with a read-only txn and /// only writing the result meta to the processed-meta store *after* it has been processed. - fn process_pending_update(&self, index_resolver: Arc) -> Result> { + fn process_pending_update( + &self, + index_resolver: Arc, + ) -> Result> { // Create a read transaction to be able to retrieve the pending update in order. let rtxn = self.env.read_txn()?; let first_meta = self.pending_queue.first(&rtxn)?; @@ -315,8 +316,7 @@ impl UpdateStore { let state = self.state.write(); state.swap(State::Processing(index_uuid, processing.clone())); - let result = - self.perform_update(processing, index_resolver, index_uuid, global_id); + let result = self.perform_update(processing, index_resolver, index_uuid, global_id); state.swap(State::Idle); @@ -444,7 +444,7 @@ impl UpdateStore { if uuid == index_uuid { let mut _pending = pending.decode()?; //if let Some(update_uuid) = pending.content.take() { - //uuids_to_remove.push(update_uuid); + //uuids_to_remove.push(update_uuid); //} // Invariant check: we can only delete the current entry when we don't hold @@ -495,15 +495,10 @@ impl UpdateStore { Ok(()) } - pub fn snapshot( - &self, - indexes: Vec, - path: impl AsRef, - ) -> Result<()> { + pub fn snapshot(&self, indexes: Vec, path: impl AsRef) -> Result<()> { let state_lock = self.state.write(); state_lock.swap(State::Snapshoting); - let txn = self.env.write_txn()?; let update_path = path.as_ref().join("updates"); @@ -523,19 +518,22 @@ impl UpdateStore { let ((_, uuid, _), pending) = entry?; if uuids.contains(&uuid) { if let Enqueued { - meta: Update::DocumentAddition { - content_uuid, .. - }, + meta: Update::DocumentAddition { content_uuid, .. }, .. } = pending.decode()? { - self.update_file_store.snapshot(content_uuid, &path).unwrap(); + self.update_file_store + .snapshot(content_uuid, &path) + .unwrap(); } } } let path = path.as_ref().to_owned(); - indexes.par_iter().try_for_each(|index| index.snapshot(path.clone())).unwrap(); + indexes + .par_iter() + .try_for_each(|index| index.snapshot(path.clone())) + .unwrap(); Ok(()) } @@ -546,10 +544,7 @@ impl UpdateStore { for entry in self.pending_queue.iter(&txn)? { let (_, pending) = entry?; if let Enqueued { - meta: store::Update::DocumentAddition { - content_uuid, - .. - }, + meta: store::Update::DocumentAddition { content_uuid, .. }, .. } = pending { @@ -568,147 +563,147 @@ impl UpdateStore { //#[cfg(test)] //mod test { - //use super::*; - //use crate::index_controller::{ - //index_actor::{error::IndexActorError, MockIndexActorHandle}, - //UpdateResult, - //}; +//use super::*; +//use crate::index_controller::{ +//index_actor::{error::IndexActorError, MockIndexActorHandle}, +//UpdateResult, +//}; - //use futures::future::ok; +//use futures::future::ok; - //#[actix_rt::test] - //async fn test_next_id() { - //let dir = tempfile::tempdir_in(".").unwrap(); - //let mut options = EnvOpenOptions::new(); - //let handle = Arc::new(MockIndexActorHandle::new()); - //options.map_size(4096 * 100); - //let update_store = UpdateStore::open( - //options, - //dir.path(), - //handle, - //Arc::new(AtomicBool::new(false)), - //) - //.unwrap(); +//#[actix_rt::test] +//async fn test_next_id() { +//let dir = tempfile::tempdir_in(".").unwrap(); +//let mut options = EnvOpenOptions::new(); +//let handle = Arc::new(MockIndexActorHandle::new()); +//options.map_size(4096 * 100); +//let update_store = UpdateStore::open( +//options, +//dir.path(), +//handle, +//Arc::new(AtomicBool::new(false)), +//) +//.unwrap(); - //let index1_uuid = Uuid::new_v4(); - //let index2_uuid = Uuid::new_v4(); +//let index1_uuid = Uuid::new_v4(); +//let index2_uuid = Uuid::new_v4(); - //let mut txn = update_store.env.write_txn().unwrap(); - //let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); - //txn.commit().unwrap(); - //assert_eq!((0, 0), ids); +//let mut txn = update_store.env.write_txn().unwrap(); +//let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); +//txn.commit().unwrap(); +//assert_eq!((0, 0), ids); - //let mut txn = update_store.env.write_txn().unwrap(); - //let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap(); - //txn.commit().unwrap(); - //assert_eq!((1, 0), ids); +//let mut txn = update_store.env.write_txn().unwrap(); +//let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap(); +//txn.commit().unwrap(); +//assert_eq!((1, 0), ids); - //let mut txn = update_store.env.write_txn().unwrap(); - //let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); - //txn.commit().unwrap(); - //assert_eq!((2, 1), ids); - //} - - //#[actix_rt::test] - //async fn test_register_update() { - //let dir = tempfile::tempdir_in(".").unwrap(); - //let mut options = EnvOpenOptions::new(); - //let handle = Arc::new(MockIndexActorHandle::new()); - //options.map_size(4096 * 100); - //let update_store = UpdateStore::open( - //options, - //dir.path(), - //handle, - //Arc::new(AtomicBool::new(false)), - //) - //.unwrap(); - //let meta = UpdateMeta::ClearDocuments; - //let uuid = Uuid::new_v4(); - //let store_clone = update_store.clone(); - //tokio::task::spawn_blocking(move || { - //store_clone.register_update(meta, None, uuid).unwrap(); - //}) - //.await - //.unwrap(); - - //let txn = update_store.env.read_txn().unwrap(); - //assert!(update_store - //.pending_queue - //.get(&txn, &(0, uuid, 0)) - //.unwrap() - //.is_some()); - //} - - //#[actix_rt::test] - //async fn test_process_update() { - //let dir = tempfile::tempdir_in(".").unwrap(); - //let mut handle = MockIndexActorHandle::new(); - - //handle - //.expect_update() - //.times(2) - //.returning(|_index_uuid, processing, _file| { - //if processing.id() == 0 { - //Box::pin(ok(Ok(processing.process(UpdateResult::Other)))) - //} else { - //Box::pin(ok(Err( - //processing.fail(IndexActorError::ExistingPrimaryKey.into()) - //))) - //} - //}); - - //let handle = Arc::new(handle); - - //let mut options = EnvOpenOptions::new(); - //options.map_size(4096 * 100); - //let store = UpdateStore::open( - //options, - //dir.path(), - //handle.clone(), - //Arc::new(AtomicBool::new(false)), - //) - //.unwrap(); - - //// wait a bit for the event loop exit. - //tokio::time::sleep(std::time::Duration::from_millis(50)).await; - - //let mut txn = store.env.write_txn().unwrap(); - - //let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None); - //let uuid = Uuid::new_v4(); - - //store - //.pending_queue - //.put(&mut txn, &(0, uuid, 0), &update) - //.unwrap(); - - //let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None); - - //store - //.pending_queue - //.put(&mut txn, &(1, uuid, 1), &update) - //.unwrap(); - - //txn.commit().unwrap(); - - //// Process the pending, and check that it has been moved to the update databases, and - //// removed from the pending database. - //let store_clone = store.clone(); - //tokio::task::spawn_blocking(move || { - //store_clone.process_pending_update(handle.clone()).unwrap(); - //store_clone.process_pending_update(handle).unwrap(); - //}) - //.await - //.unwrap(); - - //let txn = store.env.read_txn().unwrap(); - - //assert!(store.pending_queue.first(&txn).unwrap().is_none()); - //let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap(); - - //assert!(matches!(update, UpdateStatus::Processed(_))); - //let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap(); - - //assert!(matches!(update, UpdateStatus::Failed(_))); - //} +//let mut txn = update_store.env.write_txn().unwrap(); +//let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap(); +//txn.commit().unwrap(); +//assert_eq!((2, 1), ids); +//} + +//#[actix_rt::test] +//async fn test_register_update() { +//let dir = tempfile::tempdir_in(".").unwrap(); +//let mut options = EnvOpenOptions::new(); +//let handle = Arc::new(MockIndexActorHandle::new()); +//options.map_size(4096 * 100); +//let update_store = UpdateStore::open( +//options, +//dir.path(), +//handle, +//Arc::new(AtomicBool::new(false)), +//) +//.unwrap(); +//let meta = UpdateMeta::ClearDocuments; +//let uuid = Uuid::new_v4(); +//let store_clone = update_store.clone(); +//tokio::task::spawn_blocking(move || { +//store_clone.register_update(meta, None, uuid).unwrap(); +//}) +//.await +//.unwrap(); + +//let txn = update_store.env.read_txn().unwrap(); +//assert!(update_store +//.pending_queue +//.get(&txn, &(0, uuid, 0)) +//.unwrap() +//.is_some()); +//} + +//#[actix_rt::test] +//async fn test_process_update() { +//let dir = tempfile::tempdir_in(".").unwrap(); +//let mut handle = MockIndexActorHandle::new(); + +//handle +//.expect_update() +//.times(2) +//.returning(|_index_uuid, processing, _file| { +//if processing.id() == 0 { +//Box::pin(ok(Ok(processing.process(UpdateResult::Other)))) +//} else { +//Box::pin(ok(Err( +//processing.fail(IndexActorError::ExistingPrimaryKey.into()) +//))) +//} +//}); + +//let handle = Arc::new(handle); + +//let mut options = EnvOpenOptions::new(); +//options.map_size(4096 * 100); +//let store = UpdateStore::open( +//options, +//dir.path(), +//handle.clone(), +//Arc::new(AtomicBool::new(false)), +//) +//.unwrap(); + +//// wait a bit for the event loop exit. +//tokio::time::sleep(std::time::Duration::from_millis(50)).await; + +//let mut txn = store.env.write_txn().unwrap(); + +//let update = Enqueued::new(UpdateMeta::ClearDocuments, 0, None); +//let uuid = Uuid::new_v4(); + +//store +//.pending_queue +//.put(&mut txn, &(0, uuid, 0), &update) +//.unwrap(); + +//let update = Enqueued::new(UpdateMeta::ClearDocuments, 1, None); + +//store +//.pending_queue +//.put(&mut txn, &(1, uuid, 1), &update) +//.unwrap(); + +//txn.commit().unwrap(); + +//// Process the pending, and check that it has been moved to the update databases, and +//// removed from the pending database. +//let store_clone = store.clone(); +//tokio::task::spawn_blocking(move || { +//store_clone.process_pending_update(handle.clone()).unwrap(); +//store_clone.process_pending_update(handle).unwrap(); +//}) +//.await +//.unwrap(); + +//let txn = store.env.read_txn().unwrap(); + +//assert!(store.pending_queue.first(&txn).unwrap().is_none()); +//let update = store.updates.get(&txn, &(uuid, 0)).unwrap().unwrap(); + +//assert!(matches!(update, UpdateStatus::Processed(_))); +//let update = store.updates.get(&txn, &(uuid, 1)).unwrap().unwrap(); + +//assert!(matches!(update, UpdateStatus::Failed(_))); +//} //} diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 3b7b5e7fa..6eaaf431c 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -5,7 +5,7 @@ pub mod options; pub mod index; pub mod index_controller; -pub use index_controller::{IndexController as MeiliSearch, updates::store::Update}; +pub use index_controller::{updates::store::Update, IndexController as MeiliSearch}; pub use milli; @@ -55,4 +55,3 @@ pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Res ar.unpack(&dest)?; Ok(()) } - diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index f4b992f2e..0e59392d6 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -112,4 +112,3 @@ fn total_memory_bytes() -> Option { None } } - From d30830a55c6786538325f039ef0f8b9fb0d86e1f Mon Sep 17 00:00:00 2001 From: many Date: Wed, 22 Sep 2021 15:46:25 +0200 Subject: [PATCH 29/37] Add csv deserializer for documents --- Cargo.lock | 24 ++ meilisearch-lib/Cargo.toml | 1 + .../updates/csv_documents_iter.rs | 282 ++++++++++++++++++ .../src/index_controller/updates/error.rs | 15 +- .../src/index_controller/updates/mod.rs | 25 +- 5 files changed, 340 insertions(+), 7 deletions(-) create mode 100644 meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs diff --git a/Cargo.lock b/Cargo.lock index 116ffd8cc..ad726632b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -471,6 +471,7 @@ dependencies = [ "lazy_static", "memchr", "regex-automata", + "serde", ] [[package]] @@ -772,6 +773,28 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "derivative" version = "2.2.0" @@ -1668,6 +1691,7 @@ dependencies = [ "bytes", "chrono", "crossbeam-channel", + "csv", "derivative", "either", "flate2", diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index df8b1e45d..43db857d2 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -15,6 +15,7 @@ arc-swap = "1.3.2" byte-unit = { version = "4.0.12", default-features = false, features = ["std"] } bytes = "1.1.0" chrono = { version = "0.4.19", features = ["serde"] } +csv = "1.1.6" crossbeam-channel = "0.5.1" either = "1.6.1" flate2 = "1.0.21" diff --git a/meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs b/meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs new file mode 100644 index 000000000..837240ceb --- /dev/null +++ b/meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs @@ -0,0 +1,282 @@ +use super::error::{Result, UpdateLoopError}; +use std::io::{Read, Result as IoResult}; + +use csv::{Reader as CsvReader, StringRecordsIntoIter}; +use serde_json::{Map, Value}; + +enum AllowedType { + String, + Number, +} + +fn parse_csv_header(header: &str) -> (String, AllowedType) { + // if there are several separators we only split on the last one. + match header.rsplit_once(':') { + Some((field_name, field_type)) => match field_type { + "string" => (field_name.to_string(), AllowedType::String), + "number" => (field_name.to_string(), AllowedType::Number), + // if the pattern isn't reconized, we keep the whole field. + _otherwise => (header.to_string(), AllowedType::String), + }, + None => (header.to_string(), AllowedType::String), + } +} + +pub struct CsvDocumentIter +where + R: Read, +{ + documents: StringRecordsIntoIter, + headers: Vec<(String, AllowedType)>, +} + +impl CsvDocumentIter { + pub fn from_reader(reader: R) -> IoResult { + let mut records = CsvReader::from_reader(reader); + + let headers = records + .headers()? + .into_iter() + .map(parse_csv_header) + .collect(); + + Ok(Self { + documents: records.into_records(), + headers, + }) + } +} + +impl Iterator for CsvDocumentIter { + type Item = Result>; + + fn next(&mut self) -> Option { + let csv_document = self.documents.next()?; + + match csv_document { + Ok(csv_document) => { + let mut document = Map::new(); + + for ((field_name, field_type), value) in + self.headers.iter().zip(csv_document.into_iter()) + { + let parsed_value = (|| match field_type { + AllowedType::Number => value + .parse::() + .map(Value::from) + .map_err(|e| UpdateLoopError::MalformedPayload(Box::new(e))), + AllowedType::String => Ok(Value::String(value.to_string())), + })(); + + match parsed_value { + Ok(value) => drop(document.insert(field_name.to_string(), value)), + Err(e) => return Some(Err(e)), + } + } + + Some(Ok(document)) + } + Err(e) => Some(Err(UpdateLoopError::MalformedPayload(Box::new(e)))), + } + } +} + +#[cfg(test)] +mod test { + use serde_json::json; + + use super::*; + + #[test] + fn simple_csv_document() { + let documents = r#"city,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn coma_in_field() { + let documents = r#"city,country,pop +"Boston","United, States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United, States", + "pop": "4628910", + }) + ); + } + + #[test] + fn quote_in_field() { + let documents = r#"city,country,pop +"Boston","United"" States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United\" States", + "pop": "4628910", + }) + ); + } + + #[test] + fn integer_in_field() { + let documents = r#"city,country,pop:number +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United States", + "pop": 4628910.0, + }) + ); + } + + #[test] + fn float_in_field() { + let documents = r#"city,country,pop:number +"Boston","United States","4628910.01""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United States", + "pop": 4628910.01, + }) + ); + } + + #[test] + fn several_double_dot_in_header() { + let documents = r#"city:love:string,country:state,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city:love": "Boston", + "country:state": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn ending_by_double_dot_in_header() { + let documents = r#"city:,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city:": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn starting_by_double_dot_in_header() { + let documents = r#":city,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + ":city": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn starting_by_double_dot_in_header2() { + let documents = r#":string,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(csv_iter.next().unwrap().is_err()); + } + + #[test] + fn double_double_dot_in_header() { + let documents = r#"city::string,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city:": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn bad_type_in_header() { + let documents = r#"city,country:number,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(csv_iter.next().unwrap().is_err()); + } + + #[test] + fn bad_column_count1() { + let documents = r#"city,country,pop +"Boston","United States","4628910", "too much""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(csv_iter.next().unwrap().is_err()); + } + + #[test] + fn bad_column_count2() { + let documents = r#"city,country,pop +"Boston","United States""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(csv_iter.next().unwrap().is_err()); + } +} diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs index 8cbcf211a..217567569 100644 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ b/meilisearch-lib/src/index_controller/updates/error.rs @@ -25,6 +25,8 @@ pub enum UpdateLoopError { FatalUpdateStoreError, #[error("{0}")] InvalidPayload(#[from] DocumentFormatError), + #[error("{0}")] + MalformedPayload(Box), // TODO: The reference to actix has to go. #[error("{0}")] PayloadError(#[from] actix_web::error::PayloadError), @@ -56,12 +58,13 @@ internal_error!( impl ErrorCode for UpdateLoopError { fn error_code(&self) -> Code { match self { - UpdateLoopError::UnexistingUpdate(_) => Code::NotFound, - UpdateLoopError::Internal(_) => Code::Internal, - //UpdateLoopError::IndexActor(e) => e.error_code(), - UpdateLoopError::FatalUpdateStoreError => Code::Internal, - UpdateLoopError::InvalidPayload(_) => Code::BadRequest, - UpdateLoopError::PayloadError(error) => match error { + Self::UnexistingUpdate(_) => Code::NotFound, + Self::Internal(_) => Code::Internal, + //Self::IndexActor(e) => e.error_code(), + Self::FatalUpdateStoreError => Code::Internal, + Self::InvalidPayload(_) => Code::BadRequest, + Self::MalformedPayload(_) => Code::BadRequest, + Self::PayloadError(error) => match error { actix_web::error::PayloadError::Overflow => Code::PayloadTooLarge, _ => Code::Internal, }, diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index fad337553..14f0a7c69 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -1,8 +1,10 @@ +mod csv_documents_iter; pub mod error; mod message; pub mod status; pub mod store; +use crate::index_controller::updates::csv_documents_iter::CsvDocumentIter; use std::io; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -13,6 +15,7 @@ use async_stream::stream; use bytes::Bytes; use futures::{Stream, StreamExt}; use log::trace; +use milli::documents::DocumentBatchBuilder; use milli::update::IndexDocumentsMethod; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; @@ -27,7 +30,7 @@ use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; use super::index_resolver::HardStateIndexResolver; -use super::{DocumentAdditionFormat, Update}; +use super::{DocumentAdditionFormat, Payload, Update}; pub type UpdateSender = mpsc::Sender; @@ -222,6 +225,26 @@ impl UpdateLoop { Ok(status.into()) } + async fn documents_from_csv(&self, payload: Payload) -> Result { + let file_store = self.update_file_store.clone(); + tokio::task::spawn_blocking(move || { + let (uuid, mut file) = file_store.new_update().unwrap(); + let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap(); + + let iter = CsvDocumentIter::from_reader(StreamReader::new(payload))?; + for doc in iter { + let doc = doc?; + builder.add_documents(doc).unwrap(); + } + builder.finish().unwrap(); + + file.persist(); + + Ok(uuid) + }) + .await? + } + async fn handle_list_updates(&self, uuid: Uuid) -> Result> { let update_store = self.store.clone(); tokio::task::spawn_blocking(move || { From 61e5eed49374b0caf5129411b4163f7b73b0b0df Mon Sep 17 00:00:00 2001 From: many Date: Wed, 22 Sep 2021 16:01:21 +0200 Subject: [PATCH 30/37] Call csv specialized function --- meilisearch-lib/src/index_controller/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index f117369fd..438e5af3e 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -71,6 +71,7 @@ pub struct IndexController { #[derive(Debug)] pub enum DocumentAdditionFormat { Json, + Csv, } #[derive(Serialize, Debug)] From 2a149481237d83b5e979a480734eacfbe37d3c00 Mon Sep 17 00:00:00 2001 From: many Date: Tue, 28 Sep 2021 17:50:21 +0200 Subject: [PATCH 31/37] Use an existing revision of milli --- Cargo.lock | 19 +++++++++++++++++++ meilisearch-lib/Cargo.toml | 1 - 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index ad726632b..af964f2cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -305,6 +305,20 @@ name = "as-slice" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45403b49e3954a4b8428a0ac21a4b7afadccf92bfd96273f1a58cd4812496ae0" +<<<<<<< HEAD +======= +dependencies = [ + "generic-array 0.12.4", + "generic-array 0.13.3", + "generic-array 0.14.4", + "stable_deref_trait", +] + +[[package]] +name = "assert-json-diff" +version = "1.0.1" +source = "git+https://github.com/qdequele/assert-json-diff?branch=master#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4" +>>>>>>> 9d9543fd (Use an existing revision of milli) dependencies = [ "generic-array 0.12.4", "generic-array 0.13.3", @@ -1778,7 +1792,12 @@ dependencies = [ [[package]] name = "milli" +<<<<<<< HEAD version = "0.16.0" +======= +version = "0.14.0" +source = "git+https://github.com/meilisearch/milli.git?rev=9d9010e#9d9010e45ff1eddd8a7715423ad0988a35ee34b6" +>>>>>>> 9d9543fd (Use an existing revision of milli) dependencies = [ "bimap", "bincode", diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 43db857d2..a52bd9e43 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -31,7 +31,6 @@ log = "0.4.14" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -#milli = { git = "https://github.com/meilisearch/milli.git", branch = "main" } milli = { path = "../../milli/milli" } mime = "0.3.16" num_cpus = "1.13.0" From 6e8a3fe8deea62818ac0cc9194939904a69f8e6b Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 28 Sep 2021 22:58:48 +0200 Subject: [PATCH 32/37] move csv parsing to document_formats --- Cargo.lock | 185 +++++------ meilisearch-lib/src/document_formats.rs | 303 +++++++++++++++++- .../updates/csv_documents_iter.rs | 282 ---------------- .../src/index_controller/updates/mod.rs | 28 +- 4 files changed, 384 insertions(+), 414 deletions(-) delete mode 100644 meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs diff --git a/Cargo.lock b/Cargo.lock index af964f2cd..d7ab9dbe4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,7 +82,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2f86cd6857c135e6e9fe57b1619a88d1f94a7df34c00e11fe13e64fd3438837" dependencies = [ "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -218,7 +218,7 @@ dependencies = [ "actix-router", "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -296,29 +296,15 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5ab7d9e73059c86c36473f459b52adbd99c3554a4fec492caef460806006f00" +checksum = "e6df5aef5c5830360ce5218cecb8f018af3438af5686ae945094affc86fdec63" [[package]] name = "as-slice" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45403b49e3954a4b8428a0ac21a4b7afadccf92bfd96273f1a58cd4812496ae0" -<<<<<<< HEAD -======= -dependencies = [ - "generic-array 0.12.4", - "generic-array 0.13.3", - "generic-array 0.14.4", - "stable_deref_trait", -] - -[[package]] -name = "assert-json-diff" -version = "1.0.1" -source = "git+https://github.com/qdequele/assert-json-diff?branch=master#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4" ->>>>>>> 9d9543fd (Use an existing revision of milli) dependencies = [ "generic-array 0.12.4", "generic-array 0.13.3", @@ -344,7 +330,7 @@ checksum = "648ed8c8d2ce5409ccd57453d9d1b214b342a0d69376a6feda1fd6cae3299308" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -355,7 +341,7 @@ checksum = "44318e776df68115a881de9a8fd1b9e53368d7a4a5ce4cc48517da3393233a5e" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -478,9 +464,9 @@ dependencies = [ [[package]] name = "bstr" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90682c8d613ad3373e66de8c6411e0ae2ab2571e879d2efbf73558cc66f21279" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ "lazy_static", "memchr", @@ -490,9 +476,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.7.0" +version = "3.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631" +checksum = "d9df67f7bf9ef8498769f994239c45613ef0c5899415fb58e9add412d2c1a538" [[package]] name = "byte-tools" @@ -526,7 +512,7 @@ checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -593,9 +579,9 @@ dependencies = [ [[package]] name = "cedarwood" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "963e82c7b94163808ca3a452608d260b64ba5bc7b5653b4af1af59887899f48d" +checksum = "fa312498f9f41452998d984d3deb84c84f86aeb8a2499d7505bb8106d78d147d" dependencies = [ "smallvec", ] @@ -668,7 +654,7 @@ checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -817,7 +803,7 @@ checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -830,7 +816,7 @@ dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", "rustc_version 0.3.3", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -895,7 +881,7 @@ checksum = "c134c37760b27a871ba422106eedbb8247da973a09e82558bf26d619c882b159" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -937,9 +923,9 @@ checksum = "31586bda1b136406162e381a3185a506cdfc1631708dd40cba2f6628d8634499" [[package]] name = "flate2" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80edafed416a46fb378521624fab1cfa2eb514784fd8921adbe8a8d8321da811" +checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" dependencies = [ "cfg-if 1.0.0", "crc32fast", @@ -1033,7 +1019,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -1132,7 +1118,7 @@ dependencies = [ "proc-macro-error", "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -1288,9 +1274,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11" +checksum = "1323096b05d41827dadeaee54c9981958c0f94e670bc94ed80037d1a7b8b186b" dependencies = [ "bytes", "fnv", @@ -1334,9 +1320,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.12" +version = "0.14.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13f67199e765030fa08fe0bd581af683f0d5bc04ea09c2b1102012c5fb90e7fd" +checksum = "15d1cfb9e4f68655fa04c01f59edb405b6074a0f7118ea881e5026e4a1cd8593" dependencies = [ "bytes", "futures-channel", @@ -1395,9 +1381,9 @@ dependencies = [ [[package]] name = "instant" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee0328b1209d157ef001c94dd85b4f8f64139adb0eac2659f4b08382b2f474d" +checksum = "716d3d89f35ac6a34fd0eed635395f4c3b76fa889338a4632e5231a8684216bd" dependencies = [ "cfg-if 1.0.0", ] @@ -1470,9 +1456,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.54" +version = "0.3.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1866b355d9c878e5e607473cbe3f63282c0b7aad2db1dbebf55076c686918254" +checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" dependencies = [ "wasm-bindgen", ] @@ -1500,9 +1486,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.101" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21" +checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6" [[package]] name = "libgit2-sys" @@ -1792,12 +1778,7 @@ dependencies = [ [[package]] name = "milli" -<<<<<<< HEAD version = "0.16.0" -======= -version = "0.14.0" -source = "git+https://github.com/meilisearch/milli.git?rev=9d9010e#9d9010e45ff1eddd8a7715423ad0988a35ee34b6" ->>>>>>> 9d9543fd (Use an existing revision of milli) dependencies = [ "bimap", "bincode", @@ -2071,7 +2052,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -2140,7 +2121,7 @@ checksum = "6e8fe8163d14ce7f0cdac2e040116f22eac817edabff0be91e8aff7e9accf389" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -2157,9 +2138,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.19" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" +checksum = "7c9b1041b4387893b91ee6746cddfc28516aff326a3519fb2adf820932c5e6cb" [[package]] name = "ppv-lite86" @@ -2176,7 +2157,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", "version_check", ] @@ -2383,9 +2364,9 @@ dependencies = [ [[package]] name = "retain_mut" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9c17925a9027d298a4603d286befe3f9dc0e8ed02523141914eb628798d6e5b" +checksum = "448296241d034b96c11173591deaa1302f2c17b56092106c1f92c1bc0183a8c9" [[package]] name = "ring" @@ -2550,14 +2531,14 @@ checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] name = "serde_json" -version = "1.0.67" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f9e390c27c3c0ce8bc5d725f6e4d30a29d26659494aa4b17535f7522c5c950" +checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" dependencies = [ "indexmap", "itoa", @@ -2670,15 +2651,15 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" +checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" [[package]] name = "socket2" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "765f090f0e423d2b55843402a07915add955e7d60657db13707a159727326cad" +checksum = "5dc90fe6c7be1a323296982db1836d1ea9e47b6839496dde9a541bc496df3516" dependencies = [ "libc", "winapi", @@ -2729,7 +2710,7 @@ dependencies = [ "quote 1.0.9", "serde", "serde_derive", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -2745,7 +2726,7 @@ dependencies = [ "serde_derive", "serde_json", "sha1", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -2781,7 +2762,7 @@ dependencies = [ "proc-macro-error", "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -2797,9 +2778,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.76" +version = "1.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6f107db402c2c2055242dbf4d2af0e69197202e9faacbef9571bbe47f5a1b84" +checksum = "5239bc68e0fef57495900cfea4e8dc75596d9a319d7e16b1e0a440d24e6fe0a0" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", @@ -2823,15 +2804,15 @@ checksum = "474aaa926faa1603c40b7885a9eaea29b444d1cb2850cb7c0e37bb1a4182f4fa" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", "unicode-xid 0.2.2", ] [[package]] name = "sysinfo" -version = "0.20.3" +version = "0.20.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d77883450d697c0010e60db3d940ed130b0ed81d27485edee981621b434e52" +checksum = "ffff4a02fa61eee51f95210fc9c98ea6eeb46bb071adeafd61e1a0b9b22c6a6d" dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", @@ -2902,7 +2883,7 @@ checksum = "bad553cc2c78e8de258400763a647e80e6d1b31ee237275d756f6836d204494c" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -2951,14 +2932,14 @@ dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", "standback", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] name = "tinyvec" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5241dd6f21443a3606b432718b166d3cedc962fd4b8bea54a8bc7f514ebda986" +checksum = "f83b2a3d4d9091d0abd7eba4dc2710b1718583bd4d8992e2190720ea38f391f7" dependencies = [ "tinyvec_macros", ] @@ -2971,9 +2952,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4efe6fc2395938c8155973d7be49fe8d03a843726e285e100a8a383cc0154ce" +checksum = "c2c2416fdedca8443ae44b4527de1ea633af61d8f7169ffa6e72c5b53d24efcc" dependencies = [ "autocfg", "bytes", @@ -2997,7 +2978,7 @@ checksum = "54473be61f4ebe4efd09cec9bd5d16fa51d70ea0192213d754d2d500457db110" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", ] [[package]] @@ -3053,9 +3034,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d" +checksum = "84f96e095c0c82419687c20ddf5cb3eadb61f4e1405923c9dc8e53a1adacbda8" dependencies = [ "cfg-if 1.0.0", "pin-project-lite", @@ -3121,9 +3102,9 @@ checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" [[package]] name = "unicode-width" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] name = "unicode-xid" @@ -3240,9 +3221,9 @@ checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasm-bindgen" -version = "0.2.77" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e68338db6becec24d3c7977b5bf8a48be992c934b5d07177e3931f5dc9b076c" +checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" dependencies = [ "cfg-if 1.0.0", "serde", @@ -3252,24 +3233,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.77" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f34c405b4f0658583dba0c1c7c9b694f3cac32655db463b56c254a1c75269523" +checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" dependencies = [ "bumpalo", "lazy_static", "log", "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a87d738d4abc4cf22f6eb142f5b9a81301331ee3c767f2fef2fda4e325492060" +checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39" dependencies = [ "cfg-if 1.0.0", "js-sys", @@ -3279,9 +3260,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.77" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d5a6580be83b19dc570a8f9c324251687ab2184e57086f71625feb57ec77c8" +checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" dependencies = [ "quote 1.0.9", "wasm-bindgen-macro-support", @@ -3289,28 +3270,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.77" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3775a030dc6f5a0afd8a84981a21cc92a781eb429acef9ecce476d0c9113e92" +checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", - "syn 1.0.76", + "syn 1.0.77", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.77" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c279e376c7a8e8752a8f1eaa35b7b0bee6bb9fb0cdacfa97cc3f1f289c87e2b4" +checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" [[package]] name = "web-sys" -version = "0.3.54" +version = "0.3.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a84d70d1ec7d2da2d26a5bd78f4bca1b8c3254805363ce743b7a05bc30d195a" +checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" dependencies = [ "js-sys", "wasm-bindgen", @@ -3346,9 +3327,9 @@ dependencies = [ [[package]] name = "whoami" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7741161a40200a867c96dfa5574544efa4178cf4c8f770b62dd1cc0362d7ae1" +checksum = "cabfe22aa4936611957e0b5ad9ed0472ac52b2bfb9aedac4a3f3a91a03bd1ff0" dependencies = [ "wasm-bindgen", "web-sys", @@ -3420,7 +3401,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ "proc-macro2 1.0.29", - "syn 1.0.76", + "syn 1.0.77", "synstructure", ] diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index 297c89831..f06a509c2 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -1,8 +1,7 @@ -use std::{ - fmt, - io::{Read, Seek, Write}, -}; +use std::io::{self, Read, Result as IoResult, Seek, Write}; +use std::fmt; +use csv::{Reader as CsvReader, StringRecordsIntoIter}; use milli::documents::DocumentBatchBuilder; use serde_json::{Deserializer, Map, Value}; @@ -12,6 +11,7 @@ type Result = std::result::Result; pub enum PayloadType { Jsonl, Json, + Csv, } impl fmt::Display for PayloadType { @@ -19,6 +19,7 @@ impl fmt::Display for PayloadType { match self { PayloadType::Jsonl => write!(f, "ndjson"), PayloadType::Json => write!(f, "json"), + PayloadType::Csv => write!(f, "csv"), } } } @@ -34,7 +35,7 @@ pub enum DocumentFormatError { ), } -internal_error!(DocumentFormatError: milli::documents::Error); +internal_error!(DocumentFormatError: milli::documents::Error, io::Error); macro_rules! malformed { ($type:path, $e:expr) => { @@ -42,6 +43,20 @@ macro_rules! malformed { }; } +pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<()> { + let mut builder = DocumentBatchBuilder::new(writer).unwrap(); + + let iter = CsvDocumentIter::from_reader(input)?; + for doc in iter { + let doc = doc?; + builder.add_documents(doc).unwrap(); + } + builder.finish().unwrap(); + + Ok(()) +} + + /// read jsonl from input and write an obkv batch to writer. pub fn read_jsonl(input: impl Read, writer: impl Write + Seek) -> Result<()> { let mut builder = DocumentBatchBuilder::new(writer)?; @@ -68,3 +83,281 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> { Ok(()) } + + +enum AllowedType { + String, + Number, +} + +fn parse_csv_header(header: &str) -> (String, AllowedType) { + // if there are several separators we only split on the last one. + match header.rsplit_once(':') { + Some((field_name, field_type)) => match field_type { + "string" => (field_name.to_string(), AllowedType::String), + "number" => (field_name.to_string(), AllowedType::Number), + // if the pattern isn't reconized, we keep the whole field. + _otherwise => (header.to_string(), AllowedType::String), + }, + None => (header.to_string(), AllowedType::String), + } +} + +pub struct CsvDocumentIter +where + R: Read, +{ + documents: StringRecordsIntoIter, + headers: Vec<(String, AllowedType)>, +} + +impl CsvDocumentIter { + pub fn from_reader(reader: R) -> IoResult { + let mut records = CsvReader::from_reader(reader); + + let headers = records + .headers()? + .into_iter() + .map(parse_csv_header) + .collect(); + + Ok(Self { + documents: records.into_records(), + headers, + }) + } +} + +impl Iterator for CsvDocumentIter { + type Item = Result>; + + fn next(&mut self) -> Option { + let csv_document = self.documents.next()?; + + match csv_document { + Ok(csv_document) => { + let mut document = Map::new(); + + for ((field_name, field_type), value) in + self.headers.iter().zip(csv_document.into_iter()) + { + let parsed_value = (|| match field_type { + AllowedType::Number => malformed!(PayloadType::Csv, value + .parse::() + .map(Value::from)), + AllowedType::String => Ok(Value::String(value.to_string())), + })(); + + match parsed_value { + Ok(value) => drop(document.insert(field_name.to_string(), value)), + Err(e) => return Some(Err(e)), + } + } + + Some(Ok(document)) + } + Err(e) => Some(Err(DocumentFormatError::MalformedPayload(Box::new(e), PayloadType::Csv))), + } + } +} + +#[cfg(test)] +mod test { + use serde_json::json; + + use super::*; + + #[test] + fn simple_csv_document() { + let documents = r#"city,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn coma_in_field() { + let documents = r#"city,country,pop +"Boston","United, States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United, States", + "pop": "4628910", + }) + ); + } + + #[test] + fn quote_in_field() { + let documents = r#"city,country,pop +"Boston","United"" States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United\" States", + "pop": "4628910", + }) + ); + } + + #[test] + fn integer_in_field() { + let documents = r#"city,country,pop:number +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United States", + "pop": 4628910.0, + }) + ); + } + + #[test] + fn float_in_field() { + let documents = r#"city,country,pop:number +"Boston","United States","4628910.01""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city": "Boston", + "country": "United States", + "pop": 4628910.01, + }) + ); + } + + #[test] + fn several_colon_in_header() { + let documents = r#"city:love:string,country:state,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city:love": "Boston", + "country:state": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn ending_by_colon_in_header() { + let documents = r#"city:,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city:": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn starting_by_colon_in_header() { + let documents = r#":city,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + ":city": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[ignore] + #[test] + fn starting_by_colon_in_header2() { + let documents = r#":string,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(dbg!(csv_iter.next().unwrap()).is_err()); + } + + #[test] + fn double_colon_in_header() { + let documents = r#"city::string,country,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert_eq!( + Value::Object(csv_iter.next().unwrap().unwrap()), + json!({ + "city:": "Boston", + "country": "United States", + "pop": "4628910", + }) + ); + } + + #[test] + fn bad_type_in_header() { + let documents = r#"city,country:number,pop +"Boston","United States","4628910""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(csv_iter.next().unwrap().is_err()); + } + + #[test] + fn bad_column_count1() { + let documents = r#"city,country,pop +"Boston","United States","4628910", "too much""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(csv_iter.next().unwrap().is_err()); + } + + #[test] + fn bad_column_count2() { + let documents = r#"city,country,pop +"Boston","United States""#; + + let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); + + assert!(csv_iter.next().unwrap().is_err()); + } +} diff --git a/meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs b/meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs deleted file mode 100644 index 837240ceb..000000000 --- a/meilisearch-lib/src/index_controller/updates/csv_documents_iter.rs +++ /dev/null @@ -1,282 +0,0 @@ -use super::error::{Result, UpdateLoopError}; -use std::io::{Read, Result as IoResult}; - -use csv::{Reader as CsvReader, StringRecordsIntoIter}; -use serde_json::{Map, Value}; - -enum AllowedType { - String, - Number, -} - -fn parse_csv_header(header: &str) -> (String, AllowedType) { - // if there are several separators we only split on the last one. - match header.rsplit_once(':') { - Some((field_name, field_type)) => match field_type { - "string" => (field_name.to_string(), AllowedType::String), - "number" => (field_name.to_string(), AllowedType::Number), - // if the pattern isn't reconized, we keep the whole field. - _otherwise => (header.to_string(), AllowedType::String), - }, - None => (header.to_string(), AllowedType::String), - } -} - -pub struct CsvDocumentIter -where - R: Read, -{ - documents: StringRecordsIntoIter, - headers: Vec<(String, AllowedType)>, -} - -impl CsvDocumentIter { - pub fn from_reader(reader: R) -> IoResult { - let mut records = CsvReader::from_reader(reader); - - let headers = records - .headers()? - .into_iter() - .map(parse_csv_header) - .collect(); - - Ok(Self { - documents: records.into_records(), - headers, - }) - } -} - -impl Iterator for CsvDocumentIter { - type Item = Result>; - - fn next(&mut self) -> Option { - let csv_document = self.documents.next()?; - - match csv_document { - Ok(csv_document) => { - let mut document = Map::new(); - - for ((field_name, field_type), value) in - self.headers.iter().zip(csv_document.into_iter()) - { - let parsed_value = (|| match field_type { - AllowedType::Number => value - .parse::() - .map(Value::from) - .map_err(|e| UpdateLoopError::MalformedPayload(Box::new(e))), - AllowedType::String => Ok(Value::String(value.to_string())), - })(); - - match parsed_value { - Ok(value) => drop(document.insert(field_name.to_string(), value)), - Err(e) => return Some(Err(e)), - } - } - - Some(Ok(document)) - } - Err(e) => Some(Err(UpdateLoopError::MalformedPayload(Box::new(e)))), - } - } -} - -#[cfg(test)] -mod test { - use serde_json::json; - - use super::*; - - #[test] - fn simple_csv_document() { - let documents = r#"city,country,pop -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn coma_in_field() { - let documents = r#"city,country,pop -"Boston","United, States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city": "Boston", - "country": "United, States", - "pop": "4628910", - }) - ); - } - - #[test] - fn quote_in_field() { - let documents = r#"city,country,pop -"Boston","United"" States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city": "Boston", - "country": "United\" States", - "pop": "4628910", - }) - ); - } - - #[test] - fn integer_in_field() { - let documents = r#"city,country,pop:number -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city": "Boston", - "country": "United States", - "pop": 4628910.0, - }) - ); - } - - #[test] - fn float_in_field() { - let documents = r#"city,country,pop:number -"Boston","United States","4628910.01""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city": "Boston", - "country": "United States", - "pop": 4628910.01, - }) - ); - } - - #[test] - fn several_double_dot_in_header() { - let documents = r#"city:love:string,country:state,pop -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city:love": "Boston", - "country:state": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn ending_by_double_dot_in_header() { - let documents = r#"city:,country,pop -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city:": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn starting_by_double_dot_in_header() { - let documents = r#":city,country,pop -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - ":city": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn starting_by_double_dot_in_header2() { - let documents = r#":string,country,pop -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert!(csv_iter.next().unwrap().is_err()); - } - - #[test] - fn double_double_dot_in_header() { - let documents = r#"city::string,country,pop -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert_eq!( - Value::Object(csv_iter.next().unwrap().unwrap()), - json!({ - "city:": "Boston", - "country": "United States", - "pop": "4628910", - }) - ); - } - - #[test] - fn bad_type_in_header() { - let documents = r#"city,country:number,pop -"Boston","United States","4628910""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert!(csv_iter.next().unwrap().is_err()); - } - - #[test] - fn bad_column_count1() { - let documents = r#"city,country,pop -"Boston","United States","4628910", "too much""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert!(csv_iter.next().unwrap().is_err()); - } - - #[test] - fn bad_column_count2() { - let documents = r#"city,country,pop -"Boston","United States""#; - - let mut csv_iter = CsvDocumentIter::from_reader(documents.as_bytes()).unwrap(); - - assert!(csv_iter.next().unwrap().is_err()); - } -} diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 14f0a7c69..56ea779de 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -1,10 +1,8 @@ -mod csv_documents_iter; pub mod error; mod message; pub mod status; pub mod store; -use crate::index_controller::updates::csv_documents_iter::CsvDocumentIter; use std::io; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -15,7 +13,6 @@ use async_stream::stream; use bytes::Bytes; use futures::{Stream, StreamExt}; use log::trace; -use milli::documents::DocumentBatchBuilder; use milli::update::IndexDocumentsMethod; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; @@ -24,13 +21,13 @@ use uuid::Uuid; use self::error::{Result, UpdateLoopError}; pub use self::message::UpdateMsg; use self::store::{UpdateStore, UpdateStoreInfo}; -use crate::document_formats::read_json; +use crate::document_formats::{read_csv, read_json}; use crate::index::{Index, Settings, Unchecked}; use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; use super::index_resolver::HardStateIndexResolver; -use super::{DocumentAdditionFormat, Payload, Update}; +use super::{DocumentAdditionFormat, Update}; pub type UpdateSender = mpsc::Sender; @@ -198,6 +195,7 @@ impl UpdateLoop { tokio::task::spawn_blocking(move || -> Result<_> { match format { DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?, + DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?, } update_file.persist()?; @@ -225,26 +223,6 @@ impl UpdateLoop { Ok(status.into()) } - async fn documents_from_csv(&self, payload: Payload) -> Result { - let file_store = self.update_file_store.clone(); - tokio::task::spawn_blocking(move || { - let (uuid, mut file) = file_store.new_update().unwrap(); - let mut builder = DocumentBatchBuilder::new(&mut *file).unwrap(); - - let iter = CsvDocumentIter::from_reader(StreamReader::new(payload))?; - for doc in iter { - let doc = doc?; - builder.add_documents(doc).unwrap(); - } - builder.finish().unwrap(); - - file.persist(); - - Ok(uuid) - }) - .await? - } - async fn handle_list_updates(&self, uuid: Uuid) -> Result> { let update_store = self.store.clone(); tokio::task::spawn_blocking(move || { From 911630000f4b5eb3d25064b813293edf1b427869 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 29 Sep 2021 00:12:25 +0200 Subject: [PATCH 33/37] split csv and json document routes --- .../src/routes/indexes/documents.rs | 82 +++++++++---------- meilisearch-http/src/routes/mod.rs | 6 +- .../tests/documents/add_documents.rs | 4 +- .../src/index_controller/updates/mod.rs | 5 +- 4 files changed, 49 insertions(+), 48 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index e89b75f28..dfa1244d1 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -18,7 +18,6 @@ use crate::routes::IndexParam; const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0; const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20; -/* macro_rules! guard_content_type { ($fn_name:ident, $guard_value:literal) => { fn $fn_name(head: &actix_web::dev::RequestHead) -> bool { @@ -33,9 +32,8 @@ macro_rules! guard_content_type { } }; } - guard_content_type!(guard_json, "application/json"); -*/ +guard_content_type!(guard_csv, "application/csv"); /// This is required because Payload is not Sync nor Send fn payload_to_stream(mut payload: Payload) -> impl Stream> { @@ -48,22 +46,6 @@ fn payload_to_stream(mut payload: Payload) -> impl Stream bool { - if let Some(_content_type) = head.headers.get("Content-Type") { - // CURRENTLY AND FOR THIS RELEASE ONLY WE DECIDED TO INTERPRET ALL CONTENT-TYPES AS JSON - true - /* - content_type - .to_str() - .map(|v| v.contains("application/json")) - .unwrap_or(false) - */ - } else { - // if no content-type is specified we still accept the data as json! - true - } -} - #[derive(Deserialize)] pub struct DocumentParam { index_uid: String, @@ -74,8 +56,10 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") .route(web::get().to(get_all_documents)) - .route(web::post().guard(guard_json).to(add_documents)) - .route(web::put().guard(guard_json).to(update_documents)) + .route(web::post().guard(guard_json).to(add_documents_json)) + .route(web::post().guard(guard_csv).to(add_documents_csv)) + .route(web::post().guard(guard_json).to(update_documents_json)) + .route(web::post().guard(guard_csv).to(update_documents_csv)) .route(web::delete().to(clear_all_documents)), ) // this route needs to be before the /documents/{document_id} to match properly @@ -159,43 +143,57 @@ pub struct UpdateDocumentsQuery { primary_key: Option, } -/// Route used when the payload type is "application/json" -/// Used to add or replace documents -pub async fn add_documents( +pub async fn add_documents_json( meilisearch: GuardedData, path: web::Path, params: web::Query, body: Payload, ) -> Result { - debug!("called with params: {:?}", params); - let update = Update::DocumentAddition { - payload: Box::new(payload_to_stream(body)), - primary_key: params.primary_key.clone(), - method: IndexDocumentsMethod::ReplaceDocuments, - format: DocumentAdditionFormat::Json, - }; - let update_status = meilisearch - .register_update(path.into_inner().index_uid, update, true) - .await?; - - debug!("returns: {:?}", update_status); - Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))) + document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Json, IndexDocumentsMethod::ReplaceDocuments).await } -/// Route used when the payload type is "application/json" -/// Used to add or replace documents -pub async fn update_documents( +pub async fn add_documents_csv( meilisearch: GuardedData, path: web::Path, params: web::Query, body: Payload, +) -> Result { + document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Csv, IndexDocumentsMethod::ReplaceDocuments).await +} + +pub async fn update_documents_json( + meilisearch: GuardedData, + path: web::Path, + params: web::Query, + body: Payload, +) -> Result { + document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Json, IndexDocumentsMethod::UpdateDocuments).await +} + +pub async fn update_documents_csv( + meilisearch: GuardedData, + path: web::Path, + params: web::Query, + body: Payload, +) -> Result { + document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Csv, IndexDocumentsMethod::UpdateDocuments).await +} +/// Route used when the payload type is "application/json" +/// Used to add or replace documents +async fn document_addition( + meilisearch: GuardedData, + path: web::Path, + params: web::Query, + body: Payload, + format: DocumentAdditionFormat, + method: IndexDocumentsMethod, ) -> Result { debug!("called with params: {:?}", params); let update = Update::DocumentAddition { payload: Box::new(payload_to_stream(body)), primary_key: params.primary_key.clone(), - method: IndexDocumentsMethod::UpdateDocuments, - format: DocumentAdditionFormat::Json, + method, + format, }; let update_status = meilisearch .register_update(path.into_inner().index_uid, update, true) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 382147f31..9c0e0bed1 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -361,8 +361,10 @@ mod test { indexes::documents::clear_all_documents, indexes::documents::delete_documents, - indexes::documents::update_documents, - indexes::documents::add_documents, + indexes::documents::update_documents_json, + indexes::documents::update_documents_csv, + indexes::documents::add_documents_json, + indexes::documents::add_documents_csv, indexes::documents::delete_document, indexes::updates::get_all_updates_status, diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 42fdc7509..13265dcfd 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -152,8 +152,8 @@ async fn document_add_create_index_bad_uid() { async fn document_update_create_index_bad_uid() { let server = Server::new().await; let index = server.index("883 fj!"); - let (_response, code) = index.update_documents(json!([]), None).await; - assert_eq!(code, 400); + let (response, code) = index.update_documents(json!([]), None).await; + assert_eq!(code, 400, "{}", response); } #[actix_rt::test] diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 56ea779de..474f0e77e 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -67,8 +67,9 @@ impl> + Unpin> io::Rea // TODO: optimize buf filling match self.current.take() { Some(mut bytes) => { - let copied = bytes.split_to(buf.len()); - buf.copy_from_slice(&copied); + let split_at = bytes.len().min(buf.len()); + let copied = bytes.split_to(split_at); + buf[..split_at].copy_from_slice(&copied); if !bytes.is_empty() { self.current.replace(bytes); } From 5bac65f8b81b7ce189a358aff95580f99f84acc9 Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 29 Sep 2021 00:19:08 +0200 Subject: [PATCH 34/37] add missing content type errors --- meilisearch-http/src/routes/indexes/documents.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index dfa1244d1..ae66d439d 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -32,9 +32,14 @@ macro_rules! guard_content_type { } }; } + guard_content_type!(guard_json, "application/json"); guard_content_type!(guard_csv, "application/csv"); +fn empty_application_type(head: &actix_web::dev::RequestHead) -> bool { + head.headers.get("Content-Type").is_none() +} + /// This is required because Payload is not Sync nor Send fn payload_to_stream(mut payload: Payload) -> impl Stream> { let (snd, recv) = mpsc::channel(1); @@ -56,10 +61,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") .route(web::get().to(get_all_documents)) + + .route(web::post().guard(empty_application_type).to(|| HttpResponse::UnsupportedMediaType())) .route(web::post().guard(guard_json).to(add_documents_json)) .route(web::post().guard(guard_csv).to(add_documents_csv)) - .route(web::post().guard(guard_json).to(update_documents_json)) - .route(web::post().guard(guard_csv).to(update_documents_csv)) + .route(web::post().to(|| HttpResponse::UnsupportedMediaType())) + + .route(web::put().guard(empty_application_type).to(|| HttpResponse::UnsupportedMediaType())) + .route(web::put().guard(guard_json).to(update_documents_json)) + .route(web::put().guard(guard_csv).to(update_documents_csv)) + .route(web::put().to(|| HttpResponse::UnsupportedMediaType())) .route(web::delete().to(clear_all_documents)), ) // this route needs to be before the /documents/{document_id} to match properly From 1f537e1b601af511a9aef79b2e68b0e670ab48ad Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 29 Sep 2021 10:17:52 +0200 Subject: [PATCH 35/37] jsonl support --- Cargo.lock | 1 + .../src/routes/indexes/documents.rs | 97 +++++++++++++++++-- meilisearch-lib/Cargo.toml | 2 +- meilisearch-lib/src/document_formats.rs | 27 +++--- meilisearch-lib/src/index/dump.rs | 4 +- .../index_controller/dump_actor/loaders/v1.rs | 6 +- meilisearch-lib/src/index_controller/mod.rs | 1 + .../src/index_controller/update_file_store.rs | 17 ++-- .../src/index_controller/updates/error.rs | 2 - .../src/index_controller/updates/mod.rs | 5 +- 10 files changed, 121 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d7ab9dbe4..429eff063 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1779,6 +1779,7 @@ dependencies = [ [[package]] name = "milli" version = "0.16.0" +source = "git+https://github.com/meilisearch/milli.git?rev=f65153ad6454317213680e9a9a908ec78d5645a7#f65153ad6454317213680e9a9a908ec78d5645a7" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index ae66d439d..5f465c394 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -35,6 +35,7 @@ macro_rules! guard_content_type { guard_content_type!(guard_json, "application/json"); guard_content_type!(guard_csv, "application/csv"); +guard_content_type!(guard_ndjson, "application/ndjson"); fn empty_application_type(head: &actix_web::dev::RequestHead) -> bool { head.headers.get("Content-Type").is_none() @@ -61,16 +62,26 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") .route(web::get().to(get_all_documents)) - - .route(web::post().guard(empty_application_type).to(|| HttpResponse::UnsupportedMediaType())) + // replace documents routes + .route( + web::post() + .guard(empty_application_type) + .to(HttpResponse::UnsupportedMediaType), + ) .route(web::post().guard(guard_json).to(add_documents_json)) + .route(web::post().guard(guard_ndjson).to(add_documents_ndjson)) .route(web::post().guard(guard_csv).to(add_documents_csv)) - .route(web::post().to(|| HttpResponse::UnsupportedMediaType())) - - .route(web::put().guard(empty_application_type).to(|| HttpResponse::UnsupportedMediaType())) + .route(web::post().to(HttpResponse::UnsupportedMediaType)) + // update documents routes + .route( + web::put() + .guard(empty_application_type) + .to(HttpResponse::UnsupportedMediaType), + ) .route(web::put().guard(guard_json).to(update_documents_json)) + .route(web::put().guard(guard_ndjson).to(update_documents_ndjson)) .route(web::put().guard(guard_csv).to(update_documents_csv)) - .route(web::put().to(|| HttpResponse::UnsupportedMediaType())) + .route(web::put().to(HttpResponse::UnsupportedMediaType)) .route(web::delete().to(clear_all_documents)), ) // this route needs to be before the /documents/{document_id} to match properly @@ -160,7 +171,32 @@ pub async fn add_documents_json( params: web::Query, body: Payload, ) -> Result { - document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Json, IndexDocumentsMethod::ReplaceDocuments).await + document_addition( + meilisearch, + path, + params, + body, + DocumentAdditionFormat::Json, + IndexDocumentsMethod::ReplaceDocuments, + ) + .await +} + +pub async fn add_documents_ndjson( + meilisearch: GuardedData, + path: web::Path, + params: web::Query, + body: Payload, +) -> Result { + document_addition( + meilisearch, + path, + params, + body, + DocumentAdditionFormat::Ndjson, + IndexDocumentsMethod::ReplaceDocuments, + ) + .await } pub async fn add_documents_csv( @@ -169,7 +205,15 @@ pub async fn add_documents_csv( params: web::Query, body: Payload, ) -> Result { - document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Csv, IndexDocumentsMethod::ReplaceDocuments).await + document_addition( + meilisearch, + path, + params, + body, + DocumentAdditionFormat::Csv, + IndexDocumentsMethod::ReplaceDocuments, + ) + .await } pub async fn update_documents_json( @@ -178,7 +222,32 @@ pub async fn update_documents_json( params: web::Query, body: Payload, ) -> Result { - document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Json, IndexDocumentsMethod::UpdateDocuments).await + document_addition( + meilisearch, + path, + params, + body, + DocumentAdditionFormat::Json, + IndexDocumentsMethod::UpdateDocuments, + ) + .await +} + +pub async fn update_documents_ndjson( + meilisearch: GuardedData, + path: web::Path, + params: web::Query, + body: Payload, +) -> Result { + document_addition( + meilisearch, + path, + params, + body, + DocumentAdditionFormat::Ndjson, + IndexDocumentsMethod::UpdateDocuments, + ) + .await } pub async fn update_documents_csv( @@ -187,7 +256,15 @@ pub async fn update_documents_csv( params: web::Query, body: Payload, ) -> Result { - document_addition(meilisearch, path, params, body, DocumentAdditionFormat::Csv, IndexDocumentsMethod::UpdateDocuments).await + document_addition( + meilisearch, + path, + params, + body, + DocumentAdditionFormat::Csv, + IndexDocumentsMethod::UpdateDocuments, + ) + .await } /// Route used when the payload type is "application/json" /// Used to add or replace documents diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index a52bd9e43..baf95d286 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -31,7 +31,7 @@ log = "0.4.14" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { path = "../../milli/milli" } +milli = { git = "https://github.com/meilisearch/milli.git", rev = "f65153ad6454317213680e9a9a908ec78d5645a7"} mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index f06a509c2..334b6f601 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -1,5 +1,5 @@ -use std::io::{self, Read, Result as IoResult, Seek, Write}; use std::fmt; +use std::io::{self, Read, Result as IoResult, Seek, Write}; use csv::{Reader as CsvReader, StringRecordsIntoIter}; use milli::documents::DocumentBatchBuilder; @@ -9,7 +9,7 @@ type Result = std::result::Result; #[derive(Debug)] pub enum PayloadType { - Jsonl, + Ndjson, Json, Csv, } @@ -17,7 +17,7 @@ pub enum PayloadType { impl fmt::Display for PayloadType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - PayloadType::Jsonl => write!(f, "ndjson"), + PayloadType::Ndjson => write!(f, "ndjson"), PayloadType::Json => write!(f, "json"), PayloadType::Csv => write!(f, "csv"), } @@ -56,14 +56,13 @@ pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<()> { Ok(()) } - /// read jsonl from input and write an obkv batch to writer. -pub fn read_jsonl(input: impl Read, writer: impl Write + Seek) -> Result<()> { +pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<()> { let mut builder = DocumentBatchBuilder::new(writer)?; let stream = Deserializer::from_reader(input).into_iter::>(); for value in stream { - let value = malformed!(PayloadType::Jsonl, value)?; + let value = malformed!(PayloadType::Ndjson, value)?; builder.add_documents(&value)?; } @@ -84,7 +83,6 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> { Ok(()) } - enum AllowedType { String, Number, @@ -141,12 +139,12 @@ impl Iterator for CsvDocumentIter { for ((field_name, field_type), value) in self.headers.iter().zip(csv_document.into_iter()) { - let parsed_value = (|| match field_type { - AllowedType::Number => malformed!(PayloadType::Csv, value - .parse::() - .map(Value::from)), + let parsed_value = match field_type { + AllowedType::Number => { + malformed!(PayloadType::Csv, value.parse::().map(Value::from)) + } AllowedType::String => Ok(Value::String(value.to_string())), - })(); + }; match parsed_value { Ok(value) => drop(document.insert(field_name.to_string(), value)), @@ -156,7 +154,10 @@ impl Iterator for CsvDocumentIter { Some(Ok(document)) } - Err(e) => Some(Err(DocumentFormatError::MalformedPayload(Box::new(e), PayloadType::Csv))), + Err(e) => Some(Err(DocumentFormatError::MalformedPayload( + Box::new(e), + PayloadType::Csv, + ))), } } } diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index f6e081760..4a769f136 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -9,7 +9,7 @@ use milli::documents::DocumentBatchReader; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::document_formats::read_jsonl; +use crate::document_formats::read_ndjson; use crate::index::update_handler::UpdateHandler; use crate::index::updates::apply_settings_to_builder; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; @@ -142,7 +142,7 @@ impl Index { let mut tmp_doc_file = tempfile::tempfile()?; - read_jsonl(reader, &mut tmp_doc_file)?; + read_ndjson(reader, &mut tmp_doc_file)?; tmp_doc_file.seek(SeekFrom::Start(0))?; diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 840fd7ccc..1ad92dd56 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -11,7 +11,7 @@ use milli::update::Setting; use serde::{Deserialize, Deserializer, Serialize}; use uuid::Uuid; -use crate::document_formats::read_jsonl; +use crate::document_formats::read_ndjson; use crate::index::apply_settings_to_builder; use crate::index::update_handler::UpdateHandler; use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; @@ -124,7 +124,7 @@ fn load_index( let mut tmp_doc_file = tempfile::tempfile()?; - read_jsonl(reader, &mut tmp_doc_file)?; + read_ndjson(reader, &mut tmp_doc_file)?; tmp_doc_file.seek(SeekFrom::Start(0))?; @@ -213,7 +213,7 @@ impl From for index_controller::Settings { } } -// /// Extract Settings from `settings.json` file present at provided `dir_path` +/// Extract Settings from `settings.json` file present at provided `dir_path` fn import_settings(dir_path: impl AsRef) -> anyhow::Result { let path = dir_path.as_ref().join("settings.json"); let file = File::open(path)?; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 438e5af3e..4938e7c8d 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -72,6 +72,7 @@ pub struct IndexController { pub enum DocumentAdditionFormat { Json, Csv, + Ndjson, } #[derive(Serialize, Debug)] diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index f7a7e3a1a..483fa80f8 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -10,7 +10,7 @@ use uuid::Uuid; const UPDATE_FILES_PATH: &str = "updates/updates_files"; -use crate::document_formats::read_jsonl; +use crate::document_formats::read_ndjson; pub struct UpdateFile { path: PathBuf, @@ -86,7 +86,7 @@ impl UpdateFileStore { .ok_or_else(|| anyhow::anyhow!("invalid update file name"))?; let dst_path = dst_update_files_path.join(file_uuid); let dst_file = BufWriter::new(File::create(dst_path)?); - read_jsonl(update_file, dst_file)?; + read_ndjson(update_file, dst_file)?; } Ok(()) @@ -98,9 +98,9 @@ impl UpdateFileStore { Ok(Self { path }) } - /// Created a new temporary update file. + /// Creates a new temporary update file. /// - /// A call to persist is needed to persist in the database. + /// A call to `persist` is needed to persist the file in the database. pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { let file = NamedTempFile::new()?; let uuid = Uuid::new_v4(); @@ -110,14 +110,14 @@ impl UpdateFileStore { Ok((uuid, update_file)) } - /// Returns a the file corresponding to the requested uuid. + /// Returns the file corresponding to the requested uuid. pub fn get_update(&self, uuid: Uuid) -> Result { let path = self.path.join(uuid.to_string()); let file = File::open(path)?; Ok(file) } - /// Copies the content of the update file poited to by uuid to dst directory. + /// Copies the content of the update file pointed to by `uuid` to the `dst` directory. pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { let src = self.path.join(uuid.to_string()); let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); @@ -127,7 +127,7 @@ impl UpdateFileStore { Ok(()) } - /// Peform a dump of the given update file uuid into the provided snapshot path. + /// Peforms a dump of the given update file uuid into the provided dump path. pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef) -> Result<()> { let uuid_string = uuid.to_string(); let update_file_path = self.path.join(&uuid_string); @@ -140,7 +140,8 @@ impl UpdateFileStore { let mut document_reader = DocumentBatchReader::from_reader(update_file)?; let mut document_buffer = Map::new(); - // TODO: we need to find a way to do this more efficiently. (create a custom serializer to + // TODO: we need to find a way to do this more efficiently. (create a custom serializer + // for // jsonl for example...) while let Some((index, document)) = document_reader.next_document_with_index()? { for (field_id, content) in document.iter() { diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs index 217567569..4948ea164 100644 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ b/meilisearch-lib/src/index_controller/updates/error.rs @@ -17,8 +17,6 @@ pub enum UpdateLoopError { UnexistingUpdate(u64), #[error("Internal error: {0}")] Internal(Box), - //#[error("{0}")] - //IndexActor(#[from] IndexActorError), #[error( "update store was shut down due to a fatal error, please check your logs for more info." )] diff --git a/meilisearch-lib/src/index_controller/updates/mod.rs b/meilisearch-lib/src/index_controller/updates/mod.rs index 474f0e77e..c3b15e5af 100644 --- a/meilisearch-lib/src/index_controller/updates/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/mod.rs @@ -21,7 +21,7 @@ use uuid::Uuid; use self::error::{Result, UpdateLoopError}; pub use self::message::UpdateMsg; use self::store::{UpdateStore, UpdateStoreInfo}; -use crate::document_formats::{read_csv, read_json}; +use crate::document_formats::{read_csv, read_json, read_ndjson}; use crate::index::{Index, Settings, Unchecked}; use crate::index_controller::update_file_store::UpdateFileStore; use status::UpdateStatus; @@ -40,7 +40,7 @@ pub fn create_update_handler( let (sender, receiver) = mpsc::channel(100); let actor = UpdateLoop::new(update_store_size, receiver, path, index_resolver)?; - tokio::task::spawn_local(actor.run()); + tokio::task::spawn(actor.run()); Ok(sender) } @@ -197,6 +197,7 @@ impl UpdateLoop { match format { DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?, DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?, + DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?, } update_file.persist()?; From 8fa6502b1615fb9564d57c3356bba0b6bceb81eb Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 29 Sep 2021 12:02:27 +0200 Subject: [PATCH 36/37] review changes --- meilisearch-http/src/error.rs | 48 ------------------- meilisearch-http/src/lib.rs | 2 - meilisearch-lib/src/compression.rs | 22 ++++----- meilisearch-lib/src/index/updates.rs | 4 ++ .../index_controller/dump_actor/loaders/v1.rs | 3 +- .../src/index_controller/dump_actor/mod.rs | 5 +- .../index_controller/index_resolver/mod.rs | 2 +- meilisearch-lib/src/index_controller/mod.rs | 6 +-- .../src/index_controller/snapshot.rs | 3 +- .../src/index_controller/update_file_store.rs | 9 +++- .../src/index_controller/updates/store/mod.rs | 30 +++++------- meilisearch-lib/src/lib.rs | 27 ----------- 12 files changed, 43 insertions(+), 118 deletions(-) diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index fb44b9a49..bb18ad6f4 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -7,7 +7,6 @@ use actix_web::http::StatusCode; use actix_web::HttpResponseBuilder; use aweb::error::{JsonPayloadError, QueryPayloadError}; use meilisearch_error::{Code, ErrorCode}; -use meilisearch_lib::milli; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone)] @@ -55,53 +54,6 @@ impl aweb::error::ResponseError for ResponseError { } } -#[derive(Debug)] -pub struct MilliError<'a>(pub &'a milli::Error); - -impl Error for MilliError<'_> {} - -impl fmt::Display for MilliError<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl ErrorCode for MilliError<'_> { - fn error_code(&self) -> Code { - use milli::UserError; - - match self.0 { - milli::Error::InternalError(_) => Code::Internal, - milli::Error::IoError(_) => Code::Internal, - milli::Error::UserError(ref error) => { - match error { - // TODO: wait for spec for new error codes. - UserError::SerdeJson(_) - | UserError::MaxDatabaseSizeReached - | UserError::InvalidDocumentId { .. } - | UserError::InvalidStoreFile - | UserError::NoSpaceLeftOnDevice - | UserError::DocumentLimitReached => Code::Internal, - UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, - UserError::InvalidFilter(_) => Code::Filter, - UserError::InvalidFilterAttribute(_) => Code::Filter, - UserError::MissingDocumentId { .. } => Code::MissingDocumentId, - UserError::MissingPrimaryKey => Code::MissingPrimaryKey, - UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, - UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent, - UserError::SortRankingRuleMissing => Code::Sort, - UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, - UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, - UserError::InvalidGeoField { .. } => Code::InvalidGeoField, - UserError::InvalidSortableAttribute { .. } => Code::Sort, - UserError::SortError(_) => Code::Sort, - UserError::CriterionError(_) => Code::InvalidRankingRule, - } - } - } - } -} - impl fmt::Display for PayloadError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 0e479b122..219e8b1c8 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -96,8 +96,6 @@ pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: & let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config .app_data(data) - // TODO!: Why are we passing the data with two different things? - //.app_data(data) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch-lib/src/compression.rs b/meilisearch-lib/src/compression.rs index a71a02a55..c4747cb21 100644 --- a/meilisearch-lib/src/compression.rs +++ b/meilisearch-lib/src/compression.rs @@ -1,9 +1,9 @@ -use std::fs::File; +use std::fs::{create_dir_all, File}; use std::io::Write; use std::path::Path; -use flate2::{write::GzEncoder, Compression}; -use tar::Builder; +use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use tar::{Archive, Builder}; pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { let mut f = File::create(dest)?; @@ -16,11 +16,11 @@ pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Resul Ok(()) } -//pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { -//let f = File::open(&src)?; -//let gz = GzDecoder::new(f); -//let mut ar = Archive::new(gz); -//create_dir_all(&dest)?; -//ar.unpack(&dest)?; -//Ok(()) -//} +pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { + let f = File::open(&src)?; + let gz = GzDecoder::new(f); + let mut ar = Archive::new(gz); + create_dir_all(&dest)?; + ar.unpack(&dest)?; + Ok(()) +} diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index b5de21403..0ae0aefdb 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -206,6 +206,10 @@ impl Index { result })(); + if let Update::DocumentAddition { content_uuid, .. } = update.from.meta() { + let _ = self.update_file_store.delete(*content_uuid); + } + match result { Ok(result) => Ok(update.process(result)), Err(e) => Err(update.fail(e)), diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 1ad92dd56..a41e18683 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -15,8 +15,7 @@ use crate::document_formats::read_ndjson; use crate::index::apply_settings_to_builder; use crate::index::update_handler::UpdateHandler; use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; -use crate::index_controller::{self, IndexMetadata}; -use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; +use crate::index_controller::{self, asc_ranking_rule, desc_ranking_rule, IndexMetadata}; use crate::{index::Unchecked, options::IndexerOpts}; #[derive(Serialize, Deserialize, Debug)] diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 72a83a505..3f9d33223 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -16,6 +16,7 @@ pub use message::DumpMsg; use super::index_resolver::HardStateIndexResolver; use super::updates::UpdateSender; +use crate::compression::{from_tar_gz, to_tar_gz}; use crate::index_controller::dump_actor::error::DumpActorError; use crate::index_controller::updates::UpdateMsg; use crate::options::IndexerOpts; @@ -111,7 +112,7 @@ pub fn load_dump( let tmp_src = tempfile::tempdir()?; let tmp_src_path = tmp_src.path(); - crate::from_tar_gz(&src_path, tmp_src_path)?; + from_tar_gz(&src_path, tmp_src_path)?; let meta_path = tmp_src_path.join(META_FILE_NAME); let mut meta_file = File::open(&meta_path)?; @@ -172,7 +173,7 @@ impl DumpTask { let dump_path = tokio::task::spawn_blocking(move || -> Result { let temp_dump_file = tempfile::NamedTempFile::new()?; - crate::to_tar_gz(temp_dump_path, temp_dump_file.path()) + to_tar_gz(temp_dump_path, temp_dump_file.path()) .map_err(|e| DumpActorError::Internal(e.into()))?; let dump_path = self.path.join(self.uid).with_extension("dump"); diff --git a/meilisearch-lib/src/index_controller/index_resolver/mod.rs b/meilisearch-lib/src/index_controller/index_resolver/mod.rs index 9f86f7b08..008d0d219 100644 --- a/meilisearch-lib/src/index_controller/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_controller/index_resolver/mod.rs @@ -143,7 +143,7 @@ where Some(index) => Ok(index), None => { // For some reason we got a uuid to an unexisting index, we return an error, - // and remove the uuid from th uuid store. + // and remove the uuid from the uuid store. let _ = self.index_uuid_store.delete(name.clone()).await; Err(IndexResolverError::UnexistingIndex(name)) } diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 4938e7c8d..52b2b1d01 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -496,13 +496,9 @@ pub fn asc_ranking_rule(text: &str) -> Option<&str> { .map(|(field, _)| field) } -/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name. +/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name. pub fn desc_ranking_rule(text: &str) -> Option<&str> { text.split_once("desc(") .and_then(|(_, tail)| tail.rsplit_once(")")) .map(|(field, _)| field) } - -fn update_files_path(path: impl AsRef) -> PathBuf { - path.as_ref().join("updates/updates_files") -} diff --git a/meilisearch-lib/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs index 2d83a491c..36e45547e 100644 --- a/meilisearch-lib/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -8,6 +8,7 @@ use tokio::fs; use tokio::task::spawn_blocking; use tokio::time::sleep; +use crate::compression::from_tar_gz; use crate::index_controller::updates::UpdateMsg; use super::index_resolver::HardStateIndexResolver; @@ -95,7 +96,7 @@ pub fn load_snapshot( ignore_missing_snapshot: bool, ) -> anyhow::Result<()> { if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { - match crate::from_tar_gz(snapshot_path, &db_path) { + match from_tar_gz(snapshot_path, &db_path) { Ok(()) => Ok(()), Err(e) => { //clean created db folder diff --git a/meilisearch-lib/src/index_controller/update_file_store.rs b/meilisearch-lib/src/index_controller/update_file_store.rs index 483fa80f8..09ddc1d89 100644 --- a/meilisearch-lib/src/index_controller/update_file_store.rs +++ b/meilisearch-lib/src/index_controller/update_file_store.rs @@ -141,8 +141,7 @@ impl UpdateFileStore { let mut document_buffer = Map::new(); // TODO: we need to find a way to do this more efficiently. (create a custom serializer - // for - // jsonl for example...) + // for jsonl for example...) while let Some((index, document)) = document_reader.next_document_with_index()? { for (field_id, content) in document.iter() { if let Some(field_name) = index.get_by_left(&field_id) { @@ -164,4 +163,10 @@ impl UpdateFileStore { pub fn get_size(&self, uuid: Uuid) -> Result { Ok(self.get_update(uuid)?.metadata()?.len()) } + + pub fn delete(&self, uuid: Uuid) -> Result<()> { + let path = self.path.join(uuid.to_string()); + std::fs::remove_file(path)?; + Ok(()) + } } diff --git a/meilisearch-lib/src/index_controller/updates/store/mod.rs b/meilisearch-lib/src/index_controller/updates/store/mod.rs index bb77250b5..df89d6ecc 100644 --- a/meilisearch-lib/src/index_controller/updates/store/mod.rs +++ b/meilisearch-lib/src/index_controller/updates/store/mod.rs @@ -1,7 +1,7 @@ mod codec; pub mod dump; -use std::fs::{create_dir_all, remove_file}; +use std::fs::create_dir_all; use std::path::Path; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -29,7 +29,6 @@ use codec::*; use super::error::Result; use super::status::{Enqueued, Processing}; use crate::index::Index; -use crate::index_controller::update_files_path; use crate::index_controller::updates::*; use crate::EnvSizer; @@ -269,8 +268,8 @@ impl UpdateStore { Ok(meta) } - // /// Push already processed update in the UpdateStore without triggering the notification - // /// process. This is useful for the dumps. + /// Push already processed update in the UpdateStore without triggering the notification + /// process. This is useful for the dumps. pub fn register_raw_updates( &self, wtxn: &mut heed::RwTxn, @@ -436,19 +435,19 @@ impl UpdateStore { pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> { let mut txn = self.env.write_txn()?; // Contains all the content file paths that we need to be removed if the deletion was successful. - let uuids_to_remove = Vec::new(); + let mut uuids_to_remove = Vec::new(); let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data(); while let Some(Ok(((_, uuid, _), pending))) = pendings.next() { if uuid == index_uuid { - let mut _pending = pending.decode()?; - //if let Some(update_uuid) = pending.content.take() { - //uuids_to_remove.push(update_uuid); - //} + let pending = pending.decode()?; + if let Update::DocumentAddition { content_uuid, .. } = pending.meta() { + uuids_to_remove.push(*content_uuid); + } - // Invariant check: we can only delete the current entry when we don't hold - // references to it anymore. This must be done after we have retrieved its content. + //Invariant check: we can only delete the current entry when we don't hold + //references to it anymore. This must be done after we have retrieved its content. unsafe { pendings.del_current()?; } @@ -485,12 +484,9 @@ impl UpdateStore { // Finally, remove any outstanding update files. This must be done after waiting for the // last update to ensure that the update files are not deleted before the update needs // them. - uuids_to_remove - .iter() - .map(|uuid: &Uuid| update_files_path(&self.path).join(uuid.to_string())) - .for_each(|path| { - let _ = remove_file(path); - }); + uuids_to_remove.iter().for_each(|uuid| { + let _ = self.update_file_store.delete(*uuid); + }); Ok(()) } diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 6eaaf431c..364a96dcf 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -28,30 +28,3 @@ impl EnvSizer for heed::Env { .fold(0, |acc, m| acc + m.len()) } } - -use std::fs::{create_dir_all, File}; -use std::io::Write; -use std::path::Path; - -use flate2::{read::GzDecoder, write::GzEncoder, Compression}; -use tar::{Archive, Builder}; - -pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { - let mut f = File::create(dest)?; - let gz_encoder = GzEncoder::new(&mut f, Compression::default()); - let mut tar_encoder = Builder::new(gz_encoder); - tar_encoder.append_dir_all(".", src)?; - let gz_encoder = tar_encoder.into_inner()?; - gz_encoder.finish()?; - f.flush()?; - Ok(()) -} - -pub fn from_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { - let f = File::open(&src)?; - let gz = GzDecoder::new(f); - let mut ar = Archive::new(gz); - create_dir_all(&dest)?; - ar.unpack(&dest)?; - Ok(()) -} From 311933614e6f4780388e30326deaae5ea805f1aa Mon Sep 17 00:00:00 2001 From: mpostma Date: Wed, 29 Sep 2021 15:44:54 +0200 Subject: [PATCH 37/37] bump milli to v0.17.0 --- Cargo.lock | 4 ++-- meilisearch-lib/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 429eff063..4aeb8e1c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1778,8 +1778,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.16.0" -source = "git+https://github.com/meilisearch/milli.git?rev=f65153ad6454317213680e9a9a908ec78d5645a7#f65153ad6454317213680e9a9a908ec78d5645a7" +version = "0.17.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.17.0#22551d0941bee1a9cdcf7d5bfc4ca46517dd25f3" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index baf95d286..5223ae7c9 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -31,7 +31,7 @@ log = "0.4.14" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", rev = "f65153ad6454317213680e9a9a908ec78d5645a7"} +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.17.0"} mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0"