From b7f9c94f4a43bf18755ff185bfc21b5561a3ae97 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 13 Oct 2022 15:02:59 +0200 Subject: [PATCH] write the dump export --- dump/Cargo.toml | 1 - dump/src/error.rs | 17 ++ dump/src/lib.rs | 7 +- dump/src/reader/compat/v5_to_v6.rs | 2 +- dump/src/reader/v6/mod.rs | 202 ++++++++++++++++++ dump/src/reader/v6/tasks.rs | 81 +++++++ dump/src/writer.rs | 13 +- index-scheduler/Cargo.toml | 1 + index-scheduler/src/batch.rs | 95 +++++++- index-scheduler/src/error.rs | 5 +- index-scheduler/src/lib.rs | 16 +- .../src/analytics/mock_analytics.rs | 17 +- meilisearch-http/src/analytics/mod.rs | 7 +- meilisearch-http/src/lib.rs | 1 + meilisearch-http/src/main.rs | 12 +- meilisearch-http/src/routes/dump.rs | 28 ++- .../src/routes/indexes/documents.rs | 15 +- meilisearch-http/src/routes/indexes/mod.rs | 17 +- .../src/routes/indexes/settings.rs | 146 ++----------- meilisearch-http/src/routes/tasks.rs | 2 +- meilisearch-http/src/search.rs | 5 +- meilisearch-types/Cargo.toml | 1 + meilisearch-types/src/lib.rs | 2 + meilisearch-types/src/settings.rs | 114 ++++++++++ meilisearch-types/src/tasks.rs | 63 +++++- 25 files changed, 686 insertions(+), 184 deletions(-) create mode 100644 dump/src/reader/v6/mod.rs create mode 100644 dump/src/reader/v6/tasks.rs diff --git a/dump/Cargo.toml b/dump/Cargo.toml index 01ca0d339..b2ab5ceaa 100644 --- a/dump/Cargo.toml +++ b/dump/Cargo.toml @@ -16,7 +16,6 @@ time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsi tar = "0.4.38" anyhow = "1.0.65" log = "0.4.17" -index-scheduler = { path = "../index-scheduler" } meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } http = "0.2.8" diff --git a/dump/src/error.rs b/dump/src/error.rs index 824e87c2d..7a6010269 100644 --- a/dump/src/error.rs +++ b/dump/src/error.rs @@ -1,3 +1,4 @@ +use meilisearch_types::error::{Code, ErrorCode}; use thiserror::Error; #[derive(Debug, Error)] @@ -16,3 +17,19 @@ pub enum Error { #[error(transparent)] Uuid(#[from] uuid::Error), } + +impl ErrorCode for Error { + fn error_code(&self) -> Code { + match self { + // Are these three really Internal errors? + Error::Io(_) => Code::Internal, + Error::Serde(_) => Code::Internal, + Error::Uuid(_) => Code::Internal, + + // all these errors should never be raised when creating a dump, thus no error code should be associated. + Error::DumpV1Unsupported => Code::Internal, + Error::BadIndexName => Code::Internal, + Error::MalformedTask => Code::Internal, + } + } +} diff --git a/dump/src/lib.rs b/dump/src/lib.rs index 31a7c96eb..8451f7495 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -52,12 +52,11 @@ pub(crate) mod test { }; use big_s::S; - use index_scheduler::task::Details; use maplit::btreeset; - use meilisearch_auth::{Action, Key}; + use meilisearch_types::keys::{Action, Key}; use meilisearch_types::milli::{self, update::Setting}; use meilisearch_types::settings::{Checked, Settings}; - use meilisearch_types::tasks::{DetailsView, Kind, Status, TaskView}; + use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::{index_uid::IndexUid, star_or::StarOr}; use serde_json::{json, Map, Value}; use time::{macros::datetime, Duration}; @@ -116,7 +115,7 @@ pub(crate) mod test { settings.check() } - pub fn create_test_tasks() -> Vec<(TaskView, Option>)> { + pub fn create_test_tasks() -> Vec<(Task, Option>)> { vec![ ( TaskView { diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index 9616913c8..cfcf7b545 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -124,7 +124,7 @@ impl CompatV5ToV6 { indexed_documents, } => v6::Details::DocumentAddition { received_documents: received_documents as u64, - indexed_documents: indexed_documents.map_or(0, |i| i as u64), + indexed_documents: indexed_documents.map(|i| i as u64), }, v5::Details::Settings { settings } => v6::Details::Settings { settings: settings.into(), diff --git a/dump/src/reader/v6/mod.rs b/dump/src/reader/v6/mod.rs new file mode 100644 index 000000000..d69fb0542 --- /dev/null +++ b/dump/src/reader/v6/mod.rs @@ -0,0 +1,202 @@ +use std::{ + fs::{self, File}, + io::{BufRead, BufReader}, + path::Path, + str::FromStr, +}; + +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::{Error, IndexMetadata, Result, Version}; + +mod tasks; + +pub use meilisearch_types::milli; + +use super::Document; + +pub type Metadata = crate::Metadata; + +pub type Settings = meilisearch_types::settings::Settings; +pub type Checked = meilisearch_types::settings::Checked; +pub type Unchecked = meilisearch_types::settings::Unchecked; + +pub type Task = tasks::TaskDump; +pub type Key = meilisearch_types::keys::Key; + +// ===== Other types to clarify the code of the compat module +// everything related to the tasks +pub type Status = meilisearch_types::tasks::Status; +pub type Kind = tasks::KindDump; +pub type Details = meilisearch_types::tasks::Details; + +// everything related to the settings +pub type Setting = meilisearch_types::milli::update::Setting; +pub type TypoTolerance = meilisearch_types::settings::TypoSettings; +pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting; +pub type FacetingSettings = meilisearch_types::settings::FacetingSettings; +pub type PaginationSettings = meilisearch_types::settings::PaginationSettings; + +// everything related to the api keys +pub type Action = meilisearch_types::keys::Action; +pub type StarOr = meilisearch_types::star_or::StarOr; +pub type IndexUid = meilisearch_types::index_uid::IndexUid; + +// everything related to the errors +pub type ResponseError = meilisearch_types::error::ResponseError; +pub type Code = meilisearch_types::error::Code; + +pub struct V6Reader { + dump: TempDir, + instance_uid: Uuid, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, +} + +impl V6Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?; + let instance_uid = Uuid::from_str(&instance_uid)?; + + Ok(V6Reader { + metadata: serde_json::from_reader(&*meta_file)?, + instance_uid, + tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), + keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), + dump, + }) + } + + pub fn version(&self) -> Version { + Version::V6 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn instance_uid(&self) -> Result> { + Ok(Some(self.instance_uid)) + } + + pub fn indexes(&self) -> Result> + '_>> { + let entries = fs::read_dir(self.dump.path().join("indexes"))?; + Ok(Box::new( + entries + .map(|entry| -> Result> { + let entry = entry?; + if entry.file_type()?.is_dir() { + let index = V6IndexReader::new( + entry + .file_name() + .to_str() + .ok_or(Error::BadIndexName)? + .to_string(), + &entry.path(), + )?; + Ok(Some(index)) + } else { + Ok(None) + } + }) + .filter_map(|entry| entry.transpose()), + )) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + + let update_file_path = self + .dump + .path() + .join("tasks") + .join("update_files") + .join(format!("{}.jsonl", task.uid.to_string())); + + if update_file_path.exists() { + Ok(( + task, + Some(Box::new(UpdateFile::new(&update_file_path)?) as Box), + )) + } else { + Ok((task, None)) + } + })) + } + + pub fn keys(&mut self) -> Box> + '_> { + Box::new( + (&mut self.keys) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + ) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { + reader: BufReader::new(File::open(path)?), + }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +pub struct V6IndexReader { + metadata: IndexMetadata, + documents: BufReader, + settings: BufReader, +} + +impl V6IndexReader { + pub fn new(_name: String, path: &Path) -> Result { + let metadata = File::open(path.join("metadata.json"))?; + + let ret = V6IndexReader { + metadata: serde_json::from_reader(metadata)?, + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + settings: BufReader::new(File::open(path.join("settings.json"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + let settings: Settings = serde_json::from_reader(&mut self.settings)?; + Ok(settings.check()) + } +} diff --git a/dump/src/reader/v6/tasks.rs b/dump/src/reader/v6/tasks.rs new file mode 100644 index 000000000..25693e779 --- /dev/null +++ b/dump/src/reader/v6/tasks.rs @@ -0,0 +1,81 @@ +use meilisearch_types::{ + error::ResponseError, + milli::update::IndexDocumentsMethod, + settings::Unchecked, + tasks::{Details, Status, TaskId}, +}; +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TaskDump { + pub uid: TaskId, + #[serde(default)] + pub index_uid: Option, + pub status: Status, + // TODO use our own Kind for the user + #[serde(rename = "type")] + pub kind: KindDump, + + #[serde(skip_serializing_if = "Option::is_none")] + pub details: Option
, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde( + with = "time::serde::rfc3339::option", + skip_serializing_if = "Option::is_none", + default + )] + pub started_at: Option, + #[serde( + with = "time::serde::rfc3339::option", + skip_serializing_if = "Option::is_none", + default + )] + pub finished_at: Option, +} + +// A `Kind` specific version made for the dump. If modified you may break the dump. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum KindDump { + DocumentImport { + primary_key: Option, + method: IndexDocumentsMethod, + documents_count: u64, + allow_index_creation: bool, + }, + DocumentDeletion { + documents_ids: Vec, + }, + DocumentClear, + Settings { + settings: meilisearch_types::settings::Settings, + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion, + IndexCreation { + primary_key: Option, + }, + IndexUpdate { + primary_key: Option, + }, + IndexSwap { + lhs: String, + rhs: String, + }, + CancelTask { + tasks: Vec, + }, + DeleteTasks { + query: String, + tasks: Vec, + }, + DumpExport, + Snapshot, +} diff --git a/dump/src/writer.rs b/dump/src/writer.rs index c018b93d4..c52399b71 100644 --- a/dump/src/writer.rs +++ b/dump/src/writer.rs @@ -22,13 +22,15 @@ pub struct DumpWriter { } impl DumpWriter { - pub fn new(instance_uuid: Uuid) -> Result { + pub fn new(instance_uuid: Option) -> Result { let dir = TempDir::new()?; - fs::write( - dir.path().join("instance_uid.uuid"), - &instance_uuid.as_hyphenated().to_string(), - )?; + if let Some(instance_uuid) = instance_uuid { + fs::write( + dir.path().join("instance_uid.uuid"), + &instance_uuid.as_hyphenated().to_string(), + )?; + } let metadata = Metadata { dump_version: CURRENT_DUMP_VERSION, @@ -133,7 +135,6 @@ impl UpdateFile { writer.write_all(b"\n")?; writer.flush()?; } else { - dbg!(&self.path); let file = File::create(&self.path).unwrap(); self.writer = Some(BufWriter::new(file)); self.push_document(document)?; diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 3969e08ee..95991ed43 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -13,6 +13,7 @@ file-store = { path = "../file-store" } log = "0.4.14" meilisearch-types = { path = "../meilisearch-types" } roaring = { version = "0.10.0", features = ["serde"] } +dump = { path = "../dump" } serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } tempfile = "3.3.0" diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index d823f278e..b383a1360 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1,7 +1,11 @@ use std::collections::HashSet; +use std::fs::File; +use std::io::BufWriter; use crate::{autobatcher::BatchKind, Error, IndexScheduler, Result, TaskId}; +use dump::IndexMetadata; +use meilisearch_types::milli::documents::obkv_to_object; use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task}; use log::{debug, info}; @@ -25,7 +29,7 @@ pub(crate) enum Batch { Cancel(Task), TaskDeletion(Task), Snapshot(Vec), - Dump(Vec), + Dump(Task), IndexOperation(IndexOperation), IndexCreation { index_uid: String, @@ -100,9 +104,10 @@ impl Batch { match self { Batch::Cancel(task) | Batch::TaskDeletion(task) + | Batch::Dump(task) | Batch::IndexCreation { task, .. } | Batch::IndexUpdate { task, .. } => vec![task.uid], - Batch::Snapshot(tasks) | Batch::Dump(tasks) | Batch::IndexDeletion { tasks, .. } => { + Batch::Snapshot(tasks) | Batch::IndexDeletion { tasks, .. } => { tasks.iter().map(|task| task.uid).collect() } Batch::IndexOperation(operation) => match operation { @@ -402,8 +407,11 @@ impl IndexScheduler { // 4. we batch the dumps. let to_dump = self.get_kind(rtxn, Kind::DumpExport)? & enqueued; - if !to_dump.is_empty() { - return Ok(Some(Batch::Dump(self.get_existing_tasks(rtxn, to_dump)?))); + if let Some(to_dump) = to_dump.min() { + return Ok(Some(Batch::Dump( + self.get_task(rtxn, to_dump)? + .ok_or(Error::CorruptedTaskQueue)?, + ))); } // 5. We take the next task and try to batch all the tasks associated with this index. @@ -477,7 +485,80 @@ impl IndexScheduler { Ok(vec![task]) } Batch::Snapshot(_) => todo!(), - Batch::Dump(_) => todo!(), + Batch::Dump(mut task) => { + let KindWithContent::DumpExport { keys, instance_uid, dump_uid } = &task.kind else { + unreachable!(); + }; + let dump = dump::DumpWriter::new(instance_uid.clone())?; + let mut d_keys = dump.create_keys()?; + + // 1. dump the keys + for key in keys { + d_keys.push_key(key)?; + } + + let rtxn = self.env.read_txn()?; + + // 2. dump the tasks + let mut tasks = dump.create_tasks_queue()?; + for ret in self.all_tasks.iter(&rtxn)? { + let (_, task) = ret?; + let mut dump_content_file = tasks.push_task(&task)?; + + // 2.1. Dump the `content_file` associated with the task if there is one. + if let Some(content_file) = task.content_uuid() { + let content_file = self.file_store.get_update(*content_file)?; + + let reader = DocumentsBatchReader::from_reader(content_file) + .map_err(milli::Error::from)?; + + let (mut cursor, documents_batch_index) = + reader.into_cursor_and_fields_index(); + + while let Some(doc) = cursor.next_document().map_err(milli::Error::from)? { + dump_content_file + .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?; + } + } + } + + // TODO: maybe `self.indexes` could use this rtxn instead of creating its own + drop(rtxn); + + // 3. Dump the indexes + for (uid, index) in self.indexes()? { + let rtxn = index.read_txn()?; + let metadata = IndexMetadata { + uid: uid.clone(), + primary_key: index.primary_key(&rtxn)?.map(String::from), + created_at: index.created_at(&rtxn)?, + updated_at: index.updated_at(&rtxn)?, + }; + let mut index_dumper = dump.create_index(&uid, &metadata)?; + + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + // 3.1. Dump the documents + for ret in index.all_documents(&rtxn)? { + let (_id, doc) = ret?; + let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?; + index_dumper.push_document(&document)?; + } + + // 3.2. Dump the settings + let settings = meilisearch_types::settings::settings(&index, &rtxn)?; + index_dumper.settings(&settings)?; + } + + let path = self.dumps_path.join(format!("{}.dump", dump_uid)); + let file = File::create(path).unwrap(); + dump.persist_to(BufWriter::new(file)).unwrap(); + + task.status = Status::Succeeded; + + Ok(vec![task]) + } Batch::IndexOperation(operation) => { #[rustfmt::skip] let index = match operation { @@ -679,14 +760,14 @@ impl IndexScheduler { task.status = Status::Succeeded; task.details = Some(Details::DocumentAddition { received_documents: number_of_documents, - indexed_documents, + indexed_documents: Some(indexed_documents), }); } Err(error) => { task.status = Status::Failed; task.details = Some(Details::DocumentAddition { received_documents: count, - indexed_documents: count, + indexed_documents: Some(count), }); task.error = Some(error.into()) } diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index 7ae96e0b7..49ce1d021 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -24,6 +24,8 @@ pub enum Error { #[error("`{0}` is not a type. Available types are")] InvalidKind(String), + #[error(transparent)] + Dump(#[from] dump::Error), #[error(transparent)] Heed(#[from] heed::Error), #[error(transparent)] @@ -48,8 +50,9 @@ impl ErrorCode for Error { Error::InvalidKind(_) => Code::BadRequest, // TODO: TAMO: are all these errors really internal? + Error::Dump(e) => e.error_code(), + Error::Milli(e) => e.error_code(), Error::Heed(_) => Code::Internal, - Error::Milli(_) => Code::Internal, Error::FileStore(_) => Code::Internal, Error::IoError(_) => Code::Internal, Error::Anyhow(_) => Code::Internal, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 7a46e90a5..a2d182dd9 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -154,6 +154,9 @@ pub struct IndexScheduler { /// Weither autobatching is enabled or not. pub(crate) autobatching_enabled: bool, + /// The path used to create the dumps. + pub(crate) dumps_path: PathBuf, + // ================= test /// The next entry is dedicated to the tests. /// It provide a way to break in multiple part of the scheduler. @@ -175,6 +178,7 @@ impl IndexScheduler { tasks_path: PathBuf, update_file_path: PathBuf, indexes_path: PathBuf, + dumps_path: PathBuf, index_size: usize, indexer_config: IndexerConfig, autobatching_enabled: bool, @@ -183,6 +187,7 @@ impl IndexScheduler { std::fs::create_dir_all(&tasks_path)?; std::fs::create_dir_all(&update_file_path)?; std::fs::create_dir_all(&indexes_path)?; + std::fs::create_dir_all(&dumps_path)?; let mut options = heed::EnvOpenOptions::new(); options.max_dbs(6); @@ -205,6 +210,7 @@ impl IndexScheduler { // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things wake_up: Arc::new(SignalEvent::auto(true)), autobatching_enabled, + dumps_path, #[cfg(test)] test_breakpoint_sdr, @@ -227,6 +233,7 @@ impl IndexScheduler { index_mapper: self.index_mapper.clone(), wake_up: self.wake_up.clone(), autobatching_enabled: self.autobatching_enabled, + dumps_path: self.dumps_path.clone(), #[cfg(test)] test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), @@ -342,7 +349,7 @@ impl IndexScheduler { started_at: None, finished_at: None, error: None, - details: task.default_details(), + details: (&task).into(), status: Status::Enqueued, kind: task, }; @@ -367,9 +374,9 @@ impl IndexScheduler { match wtxn.commit() { Ok(()) => (), - e @ Err(_) => { + _e @ Err(_) => { todo!("remove the data associated with the task"); - e?; + // _e?; } } @@ -436,6 +443,7 @@ impl IndexScheduler { // TODO the info field should've been set by the process_batch function self.update_task(&mut wtxn, &task)?; } + log::info!("A batch of tasks was successfully completed."); } // In case of a failure we must get back and patch all the tasks with the error. Err(err) => { @@ -453,7 +461,6 @@ impl IndexScheduler { } *self.processing_tasks.write().unwrap() = (finished_at, RoaringBitmap::new()); wtxn.commit()?; - log::info!("A batch of tasks was successfully completed."); #[cfg(test)] self.test_breakpoint_sdr @@ -542,6 +549,7 @@ mod tests { tempdir.path().join("db_path"), tempdir.path().join("file_store"), tempdir.path().join("indexes"), + tempdir.path().join("dumps"), 1024 * 1024, IndexerConfig::default(), autobatching, // enable autobatching diff --git a/meilisearch-http/src/analytics/mock_analytics.rs b/meilisearch-http/src/analytics/mock_analytics.rs index 01838f223..05ed48e65 100644 --- a/meilisearch-http/src/analytics/mock_analytics.rs +++ b/meilisearch-http/src/analytics/mock_analytics.rs @@ -1,16 +1,19 @@ use std::{any::Any, sync::Arc}; use actix_web::HttpRequest; +use meilisearch_types::InstanceUid; use serde_json::Value; use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; use super::{find_user_id, Analytics}; -pub struct MockAnalytics; +pub struct MockAnalytics { + instance_uid: Option, +} #[derive(Default)] -pub struct SearchAggregator {} +pub struct SearchAggregator; #[allow(dead_code)] impl SearchAggregator { @@ -23,13 +26,17 @@ impl SearchAggregator { impl MockAnalytics { #[allow(clippy::new_ret_no_self)] - pub fn new(opt: &Opt) -> (Arc, String) { - let user = find_user_id(&opt.db_path).unwrap_or_default(); - (Arc::new(Self), user) + pub fn new(opt: &Opt) -> Arc { + let instance_uid = find_user_id(&opt.db_path); + Arc::new(Self { instance_uid }) } } impl Analytics for MockAnalytics { + fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> { + self.instance_uid.as_ref() + } + // These methods are noop and should be optimized out fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} fn get_search(&self, _aggregate: super::SearchAggregator) {} diff --git a/meilisearch-http/src/analytics/mod.rs b/meilisearch-http/src/analytics/mod.rs index b51f306a9..17a1901a1 100644 --- a/meilisearch-http/src/analytics/mod.rs +++ b/meilisearch-http/src/analytics/mod.rs @@ -5,8 +5,10 @@ mod segment_analytics; use std::fs; use std::path::{Path, PathBuf}; +use std::str::FromStr; use actix_web::HttpRequest; +use meilisearch_types::InstanceUid; use once_cell::sync::Lazy; use platform_dirs::AppDirs; use serde_json::Value; @@ -51,13 +53,16 @@ fn config_user_id_path(db_path: &Path) -> Option { } /// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid` -fn find_user_id(db_path: &Path) -> Option { +fn find_user_id(db_path: &Path) -> Option { fs::read_to_string(db_path.join("instance-uid")) .ok() .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) + .and_then(|uid| InstanceUid::from_str(&uid).ok()) } pub trait Analytics: Sync + Send { + fn instance_uid(&self) -> Option<&InstanceUid>; + /// The method used to publish most analytics that do not need to be batched every hours fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 42b2e88e5..d08d457e0 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -37,6 +37,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result { opt.db_path.join("tasks"), opt.db_path.join("update_files"), opt.db_path.join("indexes"), + opt.dumps_dir.clone(), opt.max_index_size.get_bytes() as usize, (&opt.indexer_options).try_into()?, true, diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 61cac48b2..3d628f742 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -53,15 +53,15 @@ async fn main() -> anyhow::Result<()> { let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] - let (analytics, user) = if !opt.no_analytics { + let analytics = if !opt.no_analytics { analytics::SegmentAnalytics::new(&opt, &meilisearch).await } else { analytics::MockAnalytics::new(&opt) }; #[cfg(any(debug_assertions, not(feature = "analytics")))] - let (analytics, user) = analytics::MockAnalytics::new(&opt); + let analytics = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, &user, config_read_from); + print_launch_resume(&opt, analytics.clone(), config_read_from); run_http(index_scheduler, auth_controller, opt, analytics).await?; @@ -133,7 +133,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option) { +pub fn print_launch_resume(opt: &Opt, analytics: Arc, config_read_from: Option) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { @@ -186,8 +186,8 @@ Anonymous telemetry:\t\"Enabled\"" } } - if !user.is_empty() { - eprintln!("Instance UID:\t\t\"{}\"", user); + if let Some(instance_uid) = analytics.instance_uid() { + eprintln!("Instance UID:\t\t\"{}\"", instance_uid.to_string()); } eprintln!(); diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index c792357ea..e03112444 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -2,13 +2,17 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; use log::debug; +use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use time::macros::format_description; +use time::OffsetDateTime; use crate::analytics::Analytics; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::SummarizedTaskView; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); @@ -16,16 +20,28 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub async fn create_dump( index_scheduler: GuardedData, Data>, + auth_controller: GuardedData, AuthController>, req: HttpRequest, analytics: web::Data, ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); - let task = KindWithContent::DumpExport { - output: "todo".to_string().into(), - }; - let res = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let dump_uid = OffsetDateTime::now_utc() + .format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )) + .unwrap(); - debug!("returns: {:?}", res); - Ok(HttpResponse::Accepted().json(res)) + let task = KindWithContent::DumpExport { + keys: auth_controller.list_keys()?, + instance_uid: analytics.instance_uid().cloned(), + dump_uid, + }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); + + debug!("returns: {:?}", task); + Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index cabdae502..a4a67ea7e 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -109,7 +109,10 @@ pub async fn delete_document( index_uid, documents_ids: vec![document_id], }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -314,7 +317,10 @@ pub async fn delete_documents( index_uid: path.into_inner(), documents_ids: ids, }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) @@ -327,7 +333,10 @@ pub async fn clear_all_documents( let task = KindWithContent::DocumentClear { index_uid: path.into_inner(), }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 7a6d4607f..97e8ca3d6 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -13,7 +13,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; -use super::Pagination; +use super::{Pagination, SummarizedTaskView}; pub mod documents; pub mod search; @@ -108,7 +108,10 @@ pub async fn create_index( index_uid: uid, primary_key, }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); Ok(HttpResponse::Accepted().json(task)) } else { @@ -156,7 +159,10 @@ pub async fn update_index( primary_key: body.primary_key, }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) @@ -169,7 +175,10 @@ pub async fn delete_index( let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner(), }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index f9eec1427..537c999f6 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -1,27 +1,16 @@ -use std::collections::BTreeSet; -use std::marker::PhantomData; - use actix_web::web::Data; -use fst::IntoStreamer; use log::debug; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; use meilisearch_types::error::ResponseError; -use meilisearch_types::heed::RoTxn; -use meilisearch_types::milli::update::Setting; -use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET}; -use meilisearch_types::settings::{ - Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings, - Unchecked, -}; +use meilisearch_types::settings::{settings, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; -use meilisearch_types::Index; use serde_json::json; use crate::analytics::Analytics; use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS; +use crate::routes::SummarizedTaskView; #[macro_export] macro_rules! make_setting_route { @@ -33,14 +22,14 @@ macro_rules! make_setting_route { use index_scheduler::IndexScheduler; use meilisearch_types::milli::update::Setting; - use meilisearch_types::settings::Settings; + use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::tasks::KindWithContent; use meilisearch_types::error::ResponseError; use $crate::analytics::Analytics; use $crate::extractors::authentication::{policies::*, GuardedData}; use $crate::extractors::sequential_extractor::SeqHandler; - use $crate::routes::indexes::settings::settings; + use $crate::routes::SummarizedTaskView; pub async fn delete( index_scheduler: GuardedData< @@ -61,8 +50,10 @@ macro_rules! make_setting_route { is_deletion: true, allow_index_creation, }; - let task = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) @@ -97,8 +88,10 @@ macro_rules! make_setting_route { is_deletion: false, allow_index_creation, }; - let task = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) @@ -459,7 +452,10 @@ pub async fn update_all( is_deletion: false, allow_index_creation, }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) @@ -489,113 +485,11 @@ pub async fn delete_all( is_deletion: true, allow_index_creation, }; - let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } - -pub fn settings(index: &Index, rtxn: &RoTxn) -> Result, milli::Error> { - let displayed_attributes = index - .displayed_fields(rtxn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let searchable_attributes = index - .user_defined_searchable_fields(rtxn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); - - let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); - - let criteria = index - .criteria(rtxn)? - .into_iter() - .map(|c| c.to_string()) - .collect(); - - let stop_words = index - .stop_words(rtxn)? - .map(|stop_words| -> Result, milli::Error> { - Ok(stop_words.stream().into_strs()?.into_iter().collect()) - }) - .transpose()? - .unwrap_or_default(); - let distinct_field = index.distinct_field(rtxn)?.map(String::from); - - // in milli each word in the synonyms map were split on their separator. Since we lost - // this information we are going to put space between words. - let synonyms = index - .synonyms(rtxn)? - .iter() - .map(|(key, values)| { - ( - key.join(" "), - values.iter().map(|value| value.join(" ")).collect(), - ) - }) - .collect(); - - let min_typo_word_len = MinWordSizeTyposSetting { - one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), - two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?), - }; - - let disabled_words = match index.exact_words(rtxn)? { - Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), - None => BTreeSet::new(), - }; - - let disabled_attributes = index - .exact_attributes(rtxn)? - .into_iter() - .map(String::from) - .collect(); - - let typo_tolerance = TypoSettings { - enabled: Setting::Set(index.authorize_typos(rtxn)?), - min_word_size_for_typos: Setting::Set(min_typo_word_len), - disable_on_words: Setting::Set(disabled_words), - disable_on_attributes: Setting::Set(disabled_attributes), - }; - - let faceting = FacetingSettings { - max_values_per_facet: Setting::Set( - index - .max_values_per_facet(rtxn)? - .unwrap_or(DEFAULT_VALUES_PER_FACET), - ), - }; - - let pagination = PaginationSettings { - max_total_hits: Setting::Set( - index - .pagination_max_total_hits(rtxn)? - .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), - ), - }; - - Ok(Settings { - displayed_attributes: match displayed_attributes { - Some(attrs) => Setting::Set(attrs), - None => Setting::Reset, - }, - searchable_attributes: match searchable_attributes { - Some(attrs) => Setting::Set(attrs), - None => Setting::Reset, - }, - filterable_attributes: Setting::Set(filterable_attributes), - sortable_attributes: Setting::Set(sortable_attributes), - ranking_rules: Setting::Set(criteria), - stop_words: Setting::Set(stop_words), - distinct_attribute: match distinct_field { - Some(field) => Setting::Set(field), - None => Setting::Reset, - }, - synonyms: Setting::Set(synonyms), - typo_tolerance: Setting::Set(typo_tolerance), - faceting: Setting::Set(faceting), - pagination: Setting::Set(pagination), - _kind: PhantomData, - }) -} diff --git a/meilisearch-http/src/routes/tasks.rs b/meilisearch-http/src/routes/tasks.rs index f1248c8dd..869f6b370 100644 --- a/meilisearch-http/src/routes/tasks.rs +++ b/meilisearch-http/src/routes/tasks.rs @@ -121,7 +121,7 @@ impl From
for DetailsView { indexed_documents, } => DetailsView { received_documents: Some(received_documents), - indexed_documents: Some(indexed_documents), + indexed_documents, ..DetailsView::default() }, Details::Settings { settings } => DetailsView { diff --git a/meilisearch-http/src/search.rs b/meilisearch-http/src/search.rs index f53fdb036..2f2785823 100644 --- a/meilisearch-http/src/search.rs +++ b/meilisearch-http/src/search.rs @@ -4,6 +4,7 @@ use std::str::FromStr; use std::time::Instant; use either::Either; +use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; use milli::{ @@ -24,10 +25,6 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); -/// The maximimum number of results that the engine -/// will be able to return in one search call. -pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; - #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 468cbbee6..498153ad1 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -10,6 +10,7 @@ csv = "1.1.6" either = { version = "1.6.1", features = ["serde"] } milli = { git = "https://github.com/meilisearch/milli.git", branch = "indexation-abortion", default-features = false } enum-iterator = "0.7.0" +fst = "0.4.7" proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } roaring = { version = "0.10.0", features = ["serde"] } diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs index 44dd67af7..43a9d819a 100644 --- a/meilisearch-types/src/lib.rs +++ b/meilisearch-types/src/lib.rs @@ -9,5 +9,7 @@ pub mod tasks; pub use milli; pub use milli::heed; pub use milli::Index; +use uuid::Uuid; pub type Document = serde_json::Map; +pub type InstanceUid = Uuid; diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index a6d13d99f..0bc27df3f 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -2,9 +2,15 @@ use std::collections::{BTreeMap, BTreeSet}; use std::marker::PhantomData; use std::num::NonZeroUsize; +use fst::IntoStreamer; use milli::update::Setting; +use milli::{Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; +/// The maximimum number of results that the engine +/// will be able to return in one search call. +pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; + fn serialize_with_wildcard( field: &Setting>, s: S, @@ -366,6 +372,114 @@ pub fn apply_settings_to_builder( } } +pub fn settings( + index: &Index, + rtxn: &crate::heed::RoTxn, +) -> Result, milli::Error> { + let displayed_attributes = index + .displayed_fields(rtxn)? + .map(|fields| fields.into_iter().map(String::from).collect()); + + let searchable_attributes = index + .user_defined_searchable_fields(rtxn)? + .map(|fields| fields.into_iter().map(String::from).collect()); + + let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); + + let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); + + let criteria = index + .criteria(rtxn)? + .into_iter() + .map(|c| c.to_string()) + .collect(); + + let stop_words = index + .stop_words(rtxn)? + .map(|stop_words| -> Result, milli::Error> { + Ok(stop_words.stream().into_strs()?.into_iter().collect()) + }) + .transpose()? + .unwrap_or_default(); + let distinct_field = index.distinct_field(rtxn)?.map(String::from); + + // in milli each word in the synonyms map were split on their separator. Since we lost + // this information we are going to put space between words. + let synonyms = index + .synonyms(rtxn)? + .iter() + .map(|(key, values)| { + ( + key.join(" "), + values.iter().map(|value| value.join(" ")).collect(), + ) + }) + .collect(); + + let min_typo_word_len = MinWordSizeTyposSetting { + one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), + two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?), + }; + + let disabled_words = match index.exact_words(rtxn)? { + Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), + None => BTreeSet::new(), + }; + + let disabled_attributes = index + .exact_attributes(rtxn)? + .into_iter() + .map(String::from) + .collect(); + + let typo_tolerance = TypoSettings { + enabled: Setting::Set(index.authorize_typos(rtxn)?), + min_word_size_for_typos: Setting::Set(min_typo_word_len), + disable_on_words: Setting::Set(disabled_words), + disable_on_attributes: Setting::Set(disabled_attributes), + }; + + let faceting = FacetingSettings { + max_values_per_facet: Setting::Set( + index + .max_values_per_facet(rtxn)? + .unwrap_or(DEFAULT_VALUES_PER_FACET), + ), + }; + + let pagination = PaginationSettings { + max_total_hits: Setting::Set( + index + .pagination_max_total_hits(rtxn)? + .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), + ), + }; + + Ok(Settings { + displayed_attributes: match displayed_attributes { + Some(attrs) => Setting::Set(attrs), + None => Setting::Reset, + }, + searchable_attributes: match searchable_attributes { + Some(attrs) => Setting::Set(attrs), + None => Setting::Reset, + }, + filterable_attributes: Setting::Set(filterable_attributes), + sortable_attributes: Setting::Set(sortable_attributes), + ranking_rules: Setting::Set(criteria), + stop_words: Setting::Set(stop_words), + distinct_attribute: match distinct_field { + Some(field) => Setting::Set(field), + None => Setting::Reset, + }, + synonyms: Setting::Set(synonyms), + typo_tolerance: Setting::Set(typo_tolerance), + faceting: Setting::Set(faceting), + pagination: Setting::Set(pagination), + _kind: PhantomData, + }) +} + #[cfg(test)] pub(crate) mod test { use proptest::prelude::*; diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index d7b59717a..1b408601f 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -3,7 +3,6 @@ use roaring::RoaringBitmap; use serde::{Deserialize, Serialize, Serializer}; use std::{ fmt::{Display, Write}, - path::PathBuf, str::FromStr, }; use time::{Duration, OffsetDateTime}; @@ -11,7 +10,9 @@ use uuid::Uuid; use crate::{ error::{Code, ResponseError}, + keys::Key, settings::{Settings, Unchecked}, + InstanceUid, }; pub type TaskId = u32; @@ -71,6 +72,26 @@ impl Task { IndexSwap { lhs, rhs } => Some(vec![lhs, rhs]), } } + + /// Return the content-uuid if there is one + pub fn content_uuid(&self) -> Option<&Uuid> { + match self.kind { + KindWithContent::DocumentImport { + ref content_file, .. + } => Some(content_file), + KindWithContent::DocumentDeletion { .. } + | KindWithContent::DocumentClear { .. } + | KindWithContent::Settings { .. } + | KindWithContent::IndexDeletion { .. } + | KindWithContent::IndexCreation { .. } + | KindWithContent::IndexUpdate { .. } + | KindWithContent::IndexSwap { .. } + | KindWithContent::CancelTask { .. } + | KindWithContent::DeleteTasks { .. } + | KindWithContent::DumpExport { .. } + | KindWithContent::Snapshot => None, + } + } } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] @@ -120,7 +141,9 @@ pub enum KindWithContent { tasks: RoaringBitmap, }, DumpExport { - output: PathBuf, + dump_uid: String, + keys: Vec, + instance_uid: Option, }, Snapshot, } @@ -167,7 +190,7 @@ impl KindWithContent { documents_count, .. } => Some(Details::DocumentAddition { received_documents: *documents_count, - indexed_documents: 0, + indexed_documents: Some(0), }), KindWithContent::DocumentDeletion { index_uid: _, @@ -204,6 +227,38 @@ impl KindWithContent { } } +impl From<&KindWithContent> for Option
{ + fn from(kind: &KindWithContent) -> Self { + match kind { + KindWithContent::DocumentImport { + documents_count, .. + } => Some(Details::DocumentAddition { + received_documents: *documents_count, + indexed_documents: None, + }), + KindWithContent::DocumentDeletion { .. } => None, + KindWithContent::DocumentClear { .. } => None, + KindWithContent::Settings { new_settings, .. } => Some(Details::Settings { + settings: new_settings.clone(), + }), + KindWithContent::IndexDeletion { .. } => None, + KindWithContent::IndexCreation { primary_key, .. } => Some(Details::IndexInfo { + primary_key: primary_key.clone(), + }), + KindWithContent::IndexUpdate { primary_key, .. } => Some(Details::IndexInfo { + primary_key: primary_key.clone(), + }), + KindWithContent::IndexSwap { .. } => None, + KindWithContent::CancelTask { .. } => None, + KindWithContent::DeleteTasks { .. } => todo!(), + KindWithContent::DumpExport { dump_uid, .. } => Some(Details::Dump { + dump_uid: dump_uid.clone(), + }), + KindWithContent::Snapshot => None, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub enum Status { @@ -289,7 +344,7 @@ impl FromStr for Kind { pub enum Details { DocumentAddition { received_documents: u64, - indexed_documents: u64, + indexed_documents: Option, }, Settings { settings: Settings,