mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
write the dump export
This commit is contained in:
parent
7ce336306d
commit
9323f9f1c4
25 changed files with 686 additions and 184 deletions
|
@ -16,7 +16,6 @@ time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsi
|
|||
tar = "0.4.38"
|
||||
anyhow = "1.0.65"
|
||||
log = "0.4.17"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
http = "0.2.8"
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
|
@ -16,3 +17,19 @@ pub enum Error {
|
|||
#[error(transparent)]
|
||||
Uuid(#[from] uuid::Error),
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
// Are these three really Internal errors?
|
||||
Error::Io(_) => Code::Internal,
|
||||
Error::Serde(_) => Code::Internal,
|
||||
Error::Uuid(_) => Code::Internal,
|
||||
|
||||
// all these errors should never be raised when creating a dump, thus no error code should be associated.
|
||||
Error::DumpV1Unsupported => Code::Internal,
|
||||
Error::BadIndexName => Code::Internal,
|
||||
Error::MalformedTask => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,12 +52,11 @@ pub(crate) mod test {
|
|||
};
|
||||
|
||||
use big_s::S;
|
||||
use index_scheduler::task::Details;
|
||||
use maplit::btreeset;
|
||||
use meilisearch_auth::{Action, Key};
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli::{self, update::Setting};
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use meilisearch_types::tasks::{DetailsView, Kind, Status, TaskView};
|
||||
use meilisearch_types::tasks::{Kind, Status};
|
||||
use meilisearch_types::{index_uid::IndexUid, star_or::StarOr};
|
||||
use serde_json::{json, Map, Value};
|
||||
use time::{macros::datetime, Duration};
|
||||
|
@ -116,7 +115,7 @@ pub(crate) mod test {
|
|||
settings.check()
|
||||
}
|
||||
|
||||
pub fn create_test_tasks() -> Vec<(TaskView, Option<Vec<Document>>)> {
|
||||
pub fn create_test_tasks() -> Vec<(Task, Option<Vec<Document>>)> {
|
||||
vec![
|
||||
(
|
||||
TaskView {
|
||||
|
|
|
@ -124,7 +124,7 @@ impl CompatV5ToV6 {
|
|||
indexed_documents,
|
||||
} => v6::Details::DocumentAddition {
|
||||
received_documents: received_documents as u64,
|
||||
indexed_documents: indexed_documents.map_or(0, |i| i as u64),
|
||||
indexed_documents: indexed_documents.map(|i| i as u64),
|
||||
},
|
||||
v5::Details::Settings { settings } => v6::Details::Settings {
|
||||
settings: settings.into(),
|
||||
|
|
202
dump/src/reader/v6/mod.rs
Normal file
202
dump/src/reader/v6/mod.rs
Normal file
|
@ -0,0 +1,202 @@
|
|||
use std::{
|
||||
fs::{self, File},
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{Error, IndexMetadata, Result, Version};
|
||||
|
||||
mod tasks;
|
||||
|
||||
pub use meilisearch_types::milli;
|
||||
|
||||
use super::Document;
|
||||
|
||||
pub type Metadata = crate::Metadata;
|
||||
|
||||
pub type Settings<T> = meilisearch_types::settings::Settings<T>;
|
||||
pub type Checked = meilisearch_types::settings::Checked;
|
||||
pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
|
||||
pub type Task = tasks::TaskDump;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
pub type Status = meilisearch_types::tasks::Status;
|
||||
pub type Kind = tasks::KindDump;
|
||||
pub type Details = meilisearch_types::tasks::Details;
|
||||
|
||||
// everything related to the settings
|
||||
pub type Setting<T> = meilisearch_types::milli::update::Setting<T>;
|
||||
pub type TypoTolerance = meilisearch_types::settings::TypoSettings;
|
||||
pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting;
|
||||
pub type FacetingSettings = meilisearch_types::settings::FacetingSettings;
|
||||
pub type PaginationSettings = meilisearch_types::settings::PaginationSettings;
|
||||
|
||||
// everything related to the api keys
|
||||
pub type Action = meilisearch_types::keys::Action;
|
||||
pub type StarOr<T> = meilisearch_types::star_or::StarOr<T>;
|
||||
pub type IndexUid = meilisearch_types::index_uid::IndexUid;
|
||||
|
||||
// everything related to the errors
|
||||
pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||
pub type Code = meilisearch_types::error::Code;
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Uuid,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
keys: BufReader<File>,
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
pub fn open(dump: TempDir) -> Result<Self> {
|
||||
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
||||
let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?;
|
||||
let instance_uid = Uuid::from_str(&instance_uid)?;
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
dump,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn version(&self) -> Version {
|
||||
Version::V6
|
||||
}
|
||||
|
||||
pub fn date(&self) -> Option<OffsetDateTime> {
|
||||
Some(self.metadata.dump_date)
|
||||
}
|
||||
|
||||
pub fn instance_uid(&self) -> Result<Option<Uuid>> {
|
||||
Ok(Some(self.instance_uid))
|
||||
}
|
||||
|
||||
pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<V6IndexReader>> + '_>> {
|
||||
let entries = fs::read_dir(self.dump.path().join("indexes"))?;
|
||||
Ok(Box::new(
|
||||
entries
|
||||
.map(|entry| -> Result<Option<_>> {
|
||||
let entry = entry?;
|
||||
if entry.file_type()?.is_dir() {
|
||||
let index = V6IndexReader::new(
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.ok_or(Error::BadIndexName)?
|
||||
.to_string(),
|
||||
&entry.path(),
|
||||
)?;
|
||||
Ok(Some(index))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
})
|
||||
.filter_map(|entry| entry.transpose()),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn tasks(
|
||||
&mut self,
|
||||
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Task = serde_json::from_str(&line?)?;
|
||||
|
||||
let update_file_path = self
|
||||
.dump
|
||||
.path()
|
||||
.join("tasks")
|
||||
.join("update_files")
|
||||
.join(format!("{}.jsonl", task.uid.to_string()));
|
||||
|
||||
if update_file_path.exists() {
|
||||
Ok((
|
||||
task,
|
||||
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
|
||||
))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
|
||||
Box::new(
|
||||
(&mut self.keys)
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
reader: BufReader<File>,
|
||||
}
|
||||
|
||||
impl UpdateFile {
|
||||
fn new(path: &Path) -> Result<Self> {
|
||||
Ok(UpdateFile {
|
||||
reader: BufReader::new(File::open(path)?),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for UpdateFile {
|
||||
type Item = Result<Document>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
(&mut self.reader)
|
||||
.lines()
|
||||
.map(|line| {
|
||||
line.map_err(Error::from)
|
||||
.and_then(|line| serde_json::from_str(&line).map_err(Error::from))
|
||||
})
|
||||
.next()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct V6IndexReader {
|
||||
metadata: IndexMetadata,
|
||||
documents: BufReader<File>,
|
||||
settings: BufReader<File>,
|
||||
}
|
||||
|
||||
impl V6IndexReader {
|
||||
pub fn new(_name: String, path: &Path) -> Result<Self> {
|
||||
let metadata = File::open(path.join("metadata.json"))?;
|
||||
|
||||
let ret = V6IndexReader {
|
||||
metadata: serde_json::from_reader(metadata)?,
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn metadata(&self) -> &IndexMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
|
||||
Ok((&mut self.documents)
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
Ok(settings.check())
|
||||
}
|
||||
}
|
81
dump/src/reader/v6/tasks.rs
Normal file
81
dump/src/reader/v6/tasks.rs
Normal file
|
@ -0,0 +1,81 @@
|
|||
use meilisearch_types::{
|
||||
error::ResponseError,
|
||||
milli::update::IndexDocumentsMethod,
|
||||
settings::Unchecked,
|
||||
tasks::{Details, Status, TaskId},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskDump {
|
||||
pub uid: TaskId,
|
||||
#[serde(default)]
|
||||
pub index_uid: Option<String>,
|
||||
pub status: Status,
|
||||
// TODO use our own Kind for the user
|
||||
#[serde(rename = "type")]
|
||||
pub kind: KindDump,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub details: Option<Details>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<ResponseError>,
|
||||
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(
|
||||
with = "time::serde::rfc3339::option",
|
||||
skip_serializing_if = "Option::is_none",
|
||||
default
|
||||
)]
|
||||
pub started_at: Option<OffsetDateTime>,
|
||||
#[serde(
|
||||
with = "time::serde::rfc3339::option",
|
||||
skip_serializing_if = "Option::is_none",
|
||||
default
|
||||
)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum KindDump {
|
||||
DocumentImport {
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
documents_count: u64,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
DocumentDeletion {
|
||||
documents_ids: Vec<String>,
|
||||
},
|
||||
DocumentClear,
|
||||
Settings {
|
||||
settings: meilisearch_types::settings::Settings<Unchecked>,
|
||||
is_deletion: bool,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
IndexDeletion,
|
||||
IndexCreation {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
IndexUpdate {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
IndexSwap {
|
||||
lhs: String,
|
||||
rhs: String,
|
||||
},
|
||||
CancelTask {
|
||||
tasks: Vec<TaskId>,
|
||||
},
|
||||
DeleteTasks {
|
||||
query: String,
|
||||
tasks: Vec<TaskId>,
|
||||
},
|
||||
DumpExport,
|
||||
Snapshot,
|
||||
}
|
|
@ -22,13 +22,15 @@ pub struct DumpWriter {
|
|||
}
|
||||
|
||||
impl DumpWriter {
|
||||
pub fn new(instance_uuid: Uuid) -> Result<DumpWriter> {
|
||||
pub fn new(instance_uuid: Option<Uuid>) -> Result<DumpWriter> {
|
||||
let dir = TempDir::new()?;
|
||||
|
||||
fs::write(
|
||||
dir.path().join("instance_uid.uuid"),
|
||||
&instance_uuid.as_hyphenated().to_string(),
|
||||
)?;
|
||||
if let Some(instance_uuid) = instance_uuid {
|
||||
fs::write(
|
||||
dir.path().join("instance_uid.uuid"),
|
||||
&instance_uuid.as_hyphenated().to_string(),
|
||||
)?;
|
||||
}
|
||||
|
||||
let metadata = Metadata {
|
||||
dump_version: CURRENT_DUMP_VERSION,
|
||||
|
@ -133,7 +135,6 @@ impl UpdateFile {
|
|||
writer.write_all(b"\n")?;
|
||||
writer.flush()?;
|
||||
} else {
|
||||
dbg!(&self.path);
|
||||
let file = File::create(&self.path).unwrap();
|
||||
self.writer = Some(BufWriter::new(file));
|
||||
self.push_document(document)?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue