write the dump export

This commit is contained in:
Tamo 2022-10-13 15:02:59 +02:00 committed by Clément Renault
parent 7ce336306d
commit 9323f9f1c4
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
25 changed files with 686 additions and 184 deletions

View file

@ -16,7 +16,6 @@ time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsi
tar = "0.4.38"
anyhow = "1.0.65"
log = "0.4.17"
index-scheduler = { path = "../index-scheduler" }
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
http = "0.2.8"

View file

@ -1,3 +1,4 @@
use meilisearch_types::error::{Code, ErrorCode};
use thiserror::Error;
#[derive(Debug, Error)]
@ -16,3 +17,19 @@ pub enum Error {
#[error(transparent)]
Uuid(#[from] uuid::Error),
}
impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
// Are these three really Internal errors?
Error::Io(_) => Code::Internal,
Error::Serde(_) => Code::Internal,
Error::Uuid(_) => Code::Internal,
// all these errors should never be raised when creating a dump, thus no error code should be associated.
Error::DumpV1Unsupported => Code::Internal,
Error::BadIndexName => Code::Internal,
Error::MalformedTask => Code::Internal,
}
}
}

View file

@ -52,12 +52,11 @@ pub(crate) mod test {
};
use big_s::S;
use index_scheduler::task::Details;
use maplit::btreeset;
use meilisearch_auth::{Action, Key};
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::{self, update::Setting};
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::tasks::{DetailsView, Kind, Status, TaskView};
use meilisearch_types::tasks::{Kind, Status};
use meilisearch_types::{index_uid::IndexUid, star_or::StarOr};
use serde_json::{json, Map, Value};
use time::{macros::datetime, Duration};
@ -116,7 +115,7 @@ pub(crate) mod test {
settings.check()
}
pub fn create_test_tasks() -> Vec<(TaskView, Option<Vec<Document>>)> {
pub fn create_test_tasks() -> Vec<(Task, Option<Vec<Document>>)> {
vec![
(
TaskView {

View file

@ -124,7 +124,7 @@ impl CompatV5ToV6 {
indexed_documents,
} => v6::Details::DocumentAddition {
received_documents: received_documents as u64,
indexed_documents: indexed_documents.map_or(0, |i| i as u64),
indexed_documents: indexed_documents.map(|i| i as u64),
},
v5::Details::Settings { settings } => v6::Details::Settings {
settings: settings.into(),

202
dump/src/reader/v6/mod.rs Normal file
View file

@ -0,0 +1,202 @@
use std::{
fs::{self, File},
io::{BufRead, BufReader},
path::Path,
str::FromStr,
};
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::{Error, IndexMetadata, Result, Version};
mod tasks;
pub use meilisearch_types::milli;
use super::Document;
pub type Metadata = crate::Metadata;
pub type Settings<T> = meilisearch_types::settings::Settings<T>;
pub type Checked = meilisearch_types::settings::Checked;
pub type Unchecked = meilisearch_types::settings::Unchecked;
pub type Task = tasks::TaskDump;
pub type Key = meilisearch_types::keys::Key;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
pub type Status = meilisearch_types::tasks::Status;
pub type Kind = tasks::KindDump;
pub type Details = meilisearch_types::tasks::Details;
// everything related to the settings
pub type Setting<T> = meilisearch_types::milli::update::Setting<T>;
pub type TypoTolerance = meilisearch_types::settings::TypoSettings;
pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting;
pub type FacetingSettings = meilisearch_types::settings::FacetingSettings;
pub type PaginationSettings = meilisearch_types::settings::PaginationSettings;
// everything related to the api keys
pub type Action = meilisearch_types::keys::Action;
pub type StarOr<T> = meilisearch_types::star_or::StarOr<T>;
pub type IndexUid = meilisearch_types::index_uid::IndexUid;
// everything related to the errors
pub type ResponseError = meilisearch_types::error::ResponseError;
pub type Code = meilisearch_types::error::Code;
pub struct V6Reader {
dump: TempDir,
instance_uid: Uuid,
metadata: Metadata,
tasks: BufReader<File>,
keys: BufReader<File>,
}
impl V6Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?;
let instance_uid = Uuid::from_str(&instance_uid)?;
Ok(V6Reader {
metadata: serde_json::from_reader(&*meta_file)?,
instance_uid,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
dump,
})
}
pub fn version(&self) -> Version {
Version::V6
}
pub fn date(&self) -> Option<OffsetDateTime> {
Some(self.metadata.dump_date)
}
pub fn instance_uid(&self) -> Result<Option<Uuid>> {
Ok(Some(self.instance_uid))
}
pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<V6IndexReader>> + '_>> {
let entries = fs::read_dir(self.dump.path().join("indexes"))?;
Ok(Box::new(
entries
.map(|entry| -> Result<Option<_>> {
let entry = entry?;
if entry.file_type()?.is_dir() {
let index = V6IndexReader::new(
entry
.file_name()
.to_str()
.ok_or(Error::BadIndexName)?
.to_string(),
&entry.path(),
)?;
Ok(Some(index))
} else {
Ok(None)
}
})
.filter_map(|entry| entry.transpose()),
))
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?)?;
let update_file_path = self
.dump
.path()
.join("tasks")
.join("update_files")
.join(format!("{}.jsonl", task.uid.to_string()));
if update_file_path.exists() {
Ok((
task,
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
))
} else {
Ok((task, None))
}
}))
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
Box::new(
(&mut self.keys)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
)
}
}
pub struct UpdateFile {
reader: BufReader<File>,
}
impl UpdateFile {
fn new(path: &Path) -> Result<Self> {
Ok(UpdateFile {
reader: BufReader::new(File::open(path)?),
})
}
}
impl Iterator for UpdateFile {
type Item = Result<Document>;
fn next(&mut self) -> Option<Self::Item> {
(&mut self.reader)
.lines()
.map(|line| {
line.map_err(Error::from)
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
})
.next()
}
}
pub struct V6IndexReader {
metadata: IndexMetadata,
documents: BufReader<File>,
settings: BufReader<File>,
}
impl V6IndexReader {
pub fn new(_name: String, path: &Path) -> Result<Self> {
let metadata = File::open(path.join("metadata.json"))?;
let ret = V6IndexReader {
metadata: serde_json::from_reader(metadata)?,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
};
Ok(ret)
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
Ok(settings.check())
}
}

View file

@ -0,0 +1,81 @@
use meilisearch_types::{
error::ResponseError,
milli::update::IndexDocumentsMethod,
settings::Unchecked,
tasks::{Details, Status, TaskId},
};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskDump {
pub uid: TaskId,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
// TODO use our own Kind for the user
#[serde(rename = "type")]
pub kind: KindDump,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<Details>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<ResponseError>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(
with = "time::serde::rfc3339::option",
skip_serializing_if = "Option::is_none",
default
)]
pub started_at: Option<OffsetDateTime>,
#[serde(
with = "time::serde::rfc3339::option",
skip_serializing_if = "Option::is_none",
default
)]
pub finished_at: Option<OffsetDateTime>,
}
// A `Kind` specific version made for the dump. If modified you may break the dump.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum KindDump {
DocumentImport {
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_count: u64,
allow_index_creation: bool,
},
DocumentDeletion {
documents_ids: Vec<String>,
},
DocumentClear,
Settings {
settings: meilisearch_types::settings::Settings<Unchecked>,
is_deletion: bool,
allow_index_creation: bool,
},
IndexDeletion,
IndexCreation {
primary_key: Option<String>,
},
IndexUpdate {
primary_key: Option<String>,
},
IndexSwap {
lhs: String,
rhs: String,
},
CancelTask {
tasks: Vec<TaskId>,
},
DeleteTasks {
query: String,
tasks: Vec<TaskId>,
},
DumpExport,
Snapshot,
}

View file

@ -22,13 +22,15 @@ pub struct DumpWriter {
}
impl DumpWriter {
pub fn new(instance_uuid: Uuid) -> Result<DumpWriter> {
pub fn new(instance_uuid: Option<Uuid>) -> Result<DumpWriter> {
let dir = TempDir::new()?;
fs::write(
dir.path().join("instance_uid.uuid"),
&instance_uuid.as_hyphenated().to_string(),
)?;
if let Some(instance_uuid) = instance_uuid {
fs::write(
dir.path().join("instance_uid.uuid"),
&instance_uuid.as_hyphenated().to_string(),
)?;
}
let metadata = Metadata {
dump_version: CURRENT_DUMP_VERSION,
@ -133,7 +135,6 @@ impl UpdateFile {
writer.write_all(b"\n")?;
writer.flush()?;
} else {
dbg!(&self.path);
let file = File::create(&self.path).unwrap();
self.writer = Some(BufWriter::new(file));
self.push_document(document)?;