write the v5 dump import

This commit is contained in:
Tamo 2022-10-04 19:53:20 +02:00 committed by Clément Renault
parent 101f55ce8b
commit 1473a71e33
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 246 additions and 34 deletions

View File

@ -0,0 +1,81 @@
use serde::Deserialize;
use time::OffsetDateTime;
use uuid::Uuid;
use super::meta::{IndexUid, StarOr};
pub type KeyId = Uuid;
#[derive(Debug, Deserialize)]
pub struct Key {
pub description: Option<String>,
pub name: Option<String>,
pub uid: KeyId,
pub actions: Vec<Action>,
pub indexes: Vec<StarOr<IndexUid>>,
#[serde(with = "time::serde::rfc3339::option")]
pub expires_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
}
#[derive(Copy, Clone, Deserialize, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
pub enum Action {
#[serde(rename = "*")]
All = 0,
#[serde(rename = "search")]
Search,
#[serde(rename = "documents.*")]
DocumentsAll,
#[serde(rename = "documents.add")]
DocumentsAdd,
#[serde(rename = "documents.get")]
DocumentsGet,
#[serde(rename = "documents.delete")]
DocumentsDelete,
#[serde(rename = "indexes.*")]
IndexesAll,
#[serde(rename = "indexes.create")]
IndexesAdd,
#[serde(rename = "indexes.get")]
IndexesGet,
#[serde(rename = "indexes.update")]
IndexesUpdate,
#[serde(rename = "indexes.delete")]
IndexesDelete,
#[serde(rename = "tasks.*")]
TasksAll,
#[serde(rename = "tasks.get")]
TasksGet,
#[serde(rename = "settings.*")]
SettingsAll,
#[serde(rename = "settings.get")]
SettingsGet,
#[serde(rename = "settings.update")]
SettingsUpdate,
#[serde(rename = "stats.*")]
StatsAll,
#[serde(rename = "stats.get")]
StatsGet,
#[serde(rename = "metrics.*")]
MetricsAll,
#[serde(rename = "metrics.get")]
MetricsGet,
#[serde(rename = "dumps.*")]
DumpsAll,
#[serde(rename = "dumps.create")]
DumpsCreate,
#[serde(rename = "version")]
Version,
#[serde(rename = "keys.create")]
KeysAdd,
#[serde(rename = "keys.get")]
KeysGet,
#[serde(rename = "keys.update")]
KeysUpdate,
#[serde(rename = "keys.delete")]
KeysDelete,
}

View File

@ -1,14 +1,137 @@
use serde::{Deserialize, Serialize};
use std::{
fmt::{self, Display, Formatter},
marker::PhantomData,
str::FromStr,
};
use serde::{de::Visitor, Deserialize, Deserializer};
use uuid::Uuid;
#[derive(Serialize, Deserialize, Debug)]
use super::settings::{Settings, Unchecked};
#[derive(Deserialize, Debug)]
pub struct IndexUuid {
pub uid: String,
pub index_meta: IndexMeta,
}
#[derive(Serialize, Deserialize, Debug)]
#[derive(Deserialize, Debug)]
pub struct IndexMeta {
pub uuid: Uuid,
pub creation_task_id: usize,
}
// There is one in each indexes under `meta.json`.
#[derive(Deserialize)]
pub struct DumpMeta {
pub settings: Settings<Unchecked>,
pub primary_key: Option<String>,
}
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct IndexUid(pub String);
impl TryFrom<String> for IndexUid {
type Error = IndexUidFormatError;
fn try_from(uid: String) -> Result<Self, Self::Error> {
if !uid
.chars()
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|| uid.is_empty()
|| uid.len() > 400
{
Err(IndexUidFormatError { invalid_uid: uid })
} else {
Ok(IndexUid(uid))
}
}
}
impl FromStr for IndexUid {
type Err = IndexUidFormatError;
fn from_str(uid: &str) -> Result<IndexUid, IndexUidFormatError> {
uid.to_string().try_into()
}
}
impl From<IndexUid> for String {
fn from(uid: IndexUid) -> Self {
uid.into_inner()
}
}
#[derive(Debug)]
pub struct IndexUidFormatError {
pub invalid_uid: String,
}
impl Display for IndexUidFormatError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"invalid index uid `{}`, the uid must be an integer \
or a string containing only alphanumeric characters \
a-z A-Z 0-9, hyphens - and underscores _.",
self.invalid_uid,
)
}
}
impl std::error::Error for IndexUidFormatError {}
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
#[derive(Debug)]
pub enum StarOr<T> {
Star,
Other(T),
}
impl<'de, T, E> Deserialize<'de> for StarOr<T>
where
T: FromStr<Err = E>,
E: Display,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
/// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag.
/// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to
/// deserialize everything as a `StarOr::Other`, including "*".
/// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is
/// not supported on untagged enums.
struct StarOrVisitor<T>(PhantomData<T>);
impl<'de, T, FE> Visitor<'de> for StarOrVisitor<T>
where
T: FromStr<Err = FE>,
FE: Display,
{
type Value = StarOr<T>;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("a string")
}
fn visit_str<SE>(self, v: &str) -> Result<Self::Value, SE>
where
SE: serde::de::Error,
{
match v {
"*" => Ok(StarOr::Star),
v => {
let other = FromStr::from_str(v).map_err(|e: T::Err| {
SE::custom(format!("Invalid `other` value: {}", e))
})?;
Ok(StarOr::Other(other))
}
}
}
}
deserializer.deserialize_str(StarOrVisitor(PhantomData))
}
}

View File

@ -46,13 +46,15 @@ use uuid::Uuid;
use crate::{IndexMetadata, Result, Version};
use self::{
meta::IndexUuid,
keys::Key,
meta::{DumpMeta, IndexUuid},
settings::{Checked, Settings, Unchecked},
tasks::Task,
};
use super::{DumpReader, IndexReader};
mod keys;
mod meta;
mod settings;
mod tasks;
@ -75,27 +77,6 @@ pub struct V5Reader {
index_uuid: Vec<IndexUuid>,
}
struct V5IndexReader {
metadata: IndexMetadata,
documents: BufReader<File>,
settings: BufReader<File>,
}
impl V5IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let metadata = File::open(path.join("metadata.json"))?;
let ret = V5IndexReader {
metadata: serde_json::from_reader(metadata)?,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
};
Ok(ret)
}
}
impl V5Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
@ -124,8 +105,7 @@ impl DumpReader for V5Reader {
type Task = Task;
type UpdateFile = File;
// TODO: remove this
type Key = meilisearch_auth::Key;
type Key = Key;
fn version(&self) -> Version {
Version::V5
@ -190,7 +170,6 @@ impl DumpReader for V5Reader {
}))
}
// TODO: do it
fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Self::Key>> + '_> {
Box::new(
(&mut self.keys)
@ -200,6 +179,36 @@ impl DumpReader for V5Reader {
}
}
struct V5IndexReader {
metadata: IndexMetadata,
settings: Settings<Checked>,
documents: BufReader<File>,
}
impl V5IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let meta = File::open(path.join("meta.json"))?;
let meta: DumpMeta = serde_json::from_reader(meta)?;
let metadata = IndexMetadata {
uid: name,
primary_key: meta.primary_key,
// FIXME: Iterate over the whole task queue to find the creation and last update date.
created_at: OffsetDateTime::now_utc(),
updated_at: OffsetDateTime::now_utc(),
};
let ret = V5IndexReader {
metadata,
settings: meta.settings.check(),
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
};
Ok(ret)
}
}
impl IndexReader for V5IndexReader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = Settings<Checked>;
@ -215,7 +224,6 @@ impl IndexReader for V5IndexReader {
}
fn settings(&mut self) -> Result<Self::Settings> {
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
Ok(settings.check())
Ok(self.settings.clone())
}
}

View File

@ -2,7 +2,10 @@ use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use super::settings::{Settings, Unchecked};
use super::{
meta::IndexUid,
settings::{Settings, Unchecked},
};
pub type TaskId = u32;
pub type BatchId = u32;
@ -56,9 +59,6 @@ pub enum TaskContent {
},
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct IndexUid(String);
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum IndexDocumentsMethod {
/// Replace the previous document with the new one,