introduce the index metadata

This commit is contained in:
Tamo 2022-10-04 19:13:30 +02:00 committed by Clément Renault
parent e845cc2b6f
commit 101f55ce8b
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
8 changed files with 753 additions and 53 deletions

View File

@ -21,6 +21,17 @@ struct Metadata {
pub dump_date: OffsetDateTime,
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
pub uid: String,
pub primary_key: Option<String>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
}
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)]
pub enum Version {
V1,
@ -49,12 +60,21 @@ pub(crate) mod test {
use time::{macros::datetime, Duration};
use uuid::Uuid;
use crate::{reader, DumpWriter, Version};
use crate::{reader, DumpWriter, IndexMetadata, Version};
pub fn create_test_instance_uid() -> Uuid {
Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap()
}
pub fn create_test_index_metadata() -> IndexMetadata {
IndexMetadata {
uid: S("doggo"),
primary_key: None,
created_at: datetime!(2022-11-20 12:00 UTC),
updated_at: datetime!(2022-11-21 00:00 UTC),
}
}
pub fn create_test_documents() -> Vec<Map<String, Value>> {
vec![
json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 })
@ -186,7 +206,9 @@ pub(crate) mod test {
let documents = create_test_documents();
let settings = create_test_settings();
let mut index = dump.create_index("doggos").unwrap();
let mut index = dump
.create_index("doggos", &create_test_index_metadata())
.unwrap();
for document in &documents {
index.push_document(document).unwrap();
}
@ -217,7 +239,7 @@ pub(crate) mod test {
}
#[test]
fn test_creating_dump() {
fn test_creating_and_read_dump() {
let mut file = create_test_dump();
let mut dump = reader::open(&mut file).unwrap();
@ -234,12 +256,14 @@ pub(crate) mod test {
let mut index = indexes.next().unwrap().unwrap();
assert!(indexes.next().is_none()); // there was only one index in the dump
assert_eq!(index.name(), "doggos");
for (document, expected) in index.documents().unwrap().zip(create_test_documents()) {
assert_eq!(document.unwrap(), expected);
}
assert_eq!(index.settings().unwrap(), create_test_settings());
assert_eq!(index.metadata(), &create_test_index_metadata());
drop(index);
drop(indexes);
// ==== checking the task queue
for (task, expected) in dump.tasks().zip(create_test_tasks()) {

View File

@ -12,7 +12,7 @@ use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::{Result, Version};
use crate::{IndexMetadata, Result, Version};
// use self::loaders::{v2, v3, v4, v5};
@ -20,6 +20,7 @@ use crate::{Result, Version};
// mod compat;
// mod loaders;
// mod v1;
mod v5;
mod v6;
pub fn open(
@ -97,10 +98,13 @@ pub trait DumpReader {
) -> Result<
Box<
dyn Iterator<
Item = Result<
Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>,
>,
>,
Item = Result<
Box<
dyn IndexReader<Document = Self::Document, Settings = Self::Settings>
+ '_,
>,
>,
> + '_,
>,
>;
@ -117,7 +121,7 @@ pub trait IndexReader {
type Document;
type Settings;
fn name(&self) -> &str;
fn metadata(&self) -> &IndexMetadata;
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>>;
fn settings(&mut self) -> Result<Self::Settings>;
}

View File

@ -0,0 +1,14 @@
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Serialize, Deserialize, Debug)]
pub struct IndexUuid {
pub uid: String,
pub index_meta: IndexMeta,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct IndexMeta {
pub uuid: Uuid,
pub creation_task_id: usize,
}

221
dump/src/reader/v5/mod.rs Normal file
View File

@ -0,0 +1,221 @@
//! Here is what a dump v5 look like.
//!
//! ```text
//! .
//! ├── indexes
//! │   ├── 22c269d8-fbbd-4416-bd46-7c7c02849325
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8
//! │   │   ├── documents.jsonl
//! │   │   └── meta.json
//! │   └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa
//! │   ├── documents.jsonl
//! │   └── meta.json
//! ├── index_uuids
//! │   └── data.jsonl
//! ├── instance-uid
//! ├── keys
//! ├── metadata.json
//! └── updates
//! ├── data.jsonl
//! └── updates_files
//! └── c83a004a-da98-4b94-b245-3256266c7281
//! ```
//!
//! Here is what `index_uuids/data.jsonl` looks like;
//!
//! ```json
//! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}}
//! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}}
//! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}}
//! ```
//!
use std::{
fs::{self, File},
io::{BufRead, BufReader},
path::Path,
};
use serde::{Deserialize, Serialize};
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::{IndexMetadata, Result, Version};
use self::{
meta::IndexUuid,
settings::{Checked, Settings, Unchecked},
tasks::Task,
};
use super::{DumpReader, IndexReader};
mod meta;
mod settings;
mod tasks;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
db_version: String,
index_db_size: usize,
update_db_size: usize,
#[serde(with = "time::serde::rfc3339")]
dump_date: OffsetDateTime,
}
pub struct V5Reader {
dump: TempDir,
metadata: Metadata,
tasks: BufReader<File>,
keys: BufReader<File>,
index_uuid: Vec<IndexUuid>,
}
struct V5IndexReader {
metadata: IndexMetadata,
documents: BufReader<File>,
settings: BufReader<File>,
}
impl V5IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let metadata = File::open(path.join("metadata.json"))?;
let ret = V5IndexReader {
metadata: serde_json::from_reader(metadata)?,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
};
Ok(ret)
}
}
impl V5Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
let index_uuid = BufReader::new(index_uuid);
let index_uuid = index_uuid
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
.collect::<Result<Vec<_>>>()?;
Ok(V5Reader {
metadata,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
index_uuid,
dump,
})
}
}
impl DumpReader for V5Reader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = Settings<Checked>;
type Task = Task;
type UpdateFile = File;
// TODO: remove this
type Key = meilisearch_auth::Key;
fn version(&self) -> Version {
Version::V5
}
fn date(&self) -> Option<OffsetDateTime> {
Some(self.metadata.dump_date)
}
fn instance_uid(&self) -> Result<Option<Uuid>> {
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
Ok(Some(Uuid::parse_str(&uuid)?))
}
fn indexes(
&self,
) -> Result<
Box<
dyn Iterator<
Item = Result<
Box<
dyn super::IndexReader<
Document = Self::Document,
Settings = Self::Settings,
> + '_,
>,
>,
> + '_,
>,
> {
Ok(Box::new(self.index_uuid.iter().map(|index| -> Result<_> {
Ok(Box::new(V5IndexReader::new(
index.uid.clone(),
&self
.dump
.path()
.join("indexes")
.join(index.index_meta.uuid.to_string()),
)?)
as Box<
dyn IndexReader<Document = Self::Document, Settings = Self::Settings>,
>)
})))
}
fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Self::Task = serde_json::from_str(&line?)?;
if let Some(uuid) = task.get_content_uuid() {
let update_file_path = self
.dump
.path()
.join("updates")
.join("update_files")
.join(uuid.to_string());
Ok((task, Some(File::open(update_file_path)?)))
} else {
Ok((task, None))
}
}))
}
// TODO: do it
fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Self::Key>> + '_> {
Box::new(
(&mut self.keys)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
)
}
}
impl IndexReader for V5IndexReader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = Settings<Checked>;
fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> {
Ok(Box::new((&mut self.documents).lines().map(
|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) },
)))
}
fn settings(&mut self) -> Result<Self::Settings> {
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
Ok(settings.check())
}
}

View File

@ -0,0 +1,251 @@
use std::{
collections::{BTreeMap, BTreeSet},
marker::PhantomData,
};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Unchecked;
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
pub struct Settings<T> {
#[serde(default)]
pub displayed_attributes: Setting<Vec<String>>,
#[serde(default)]
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default)]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default)]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default)]
pub ranking_rules: Setting<Vec<String>>,
#[serde(default)]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default)]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default)]
pub distinct_attribute: Setting<String>,
#[serde(default)]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(default)]
pub faceting: Setting<FacetingSettings>,
#[serde(default)]
pub pagination: Setting<PaginationSettings>,
#[serde(skip)]
pub _kind: PhantomData<T>,
}
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
let wildcard = vec!["*".to_string()];
match field {
Setting::Set(value) => Some(value),
Setting::Reset => Some(&wildcard),
Setting::NotSet => None,
}
.serialize(s)
}
#[derive(Debug, Clone, PartialEq, Copy)]
pub enum Setting<T> {
Set(T),
Reset,
NotSet,
}
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub fn set(self) -> Option<T> {
match self {
Self::Set(value) => Some(value),
_ => None,
}
}
pub const fn as_ref(&self) -> Setting<&T> {
match *self {
Self::Set(ref value) => Setting::Set(value),
Self::Reset => Setting::Reset,
Self::NotSet => Setting::NotSet,
}
}
pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet)
}
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
pub fn or_reset(self, val: T) -> Self {
match self {
Self::Reset => Self::Set(val),
otherwise => otherwise,
}
}
}
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(|x| match x {
Some(x) => Self::Set(x),
None => Self::Reset, // Reset is forced by sending null value
})
}
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct MinWordSizeTyposSetting {
#[serde(default)]
pub one_typo: Setting<u8>,
#[serde(default)]
pub two_typos: Setting<u8>,
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct TypoSettings {
#[serde(default)]
pub enabled: Setting<bool>,
#[serde(default)]
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
#[serde(default)]
pub disable_on_words: Setting<BTreeSet<String>>,
#[serde(default)]
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct FacetingSettings {
#[serde(default)]
pub max_values_per_facet: Setting<usize>,
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct PaginationSettings {
#[serde(default)]
pub max_total_hits: Setting<usize>,
}
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
_kind: PhantomData,
}
}
pub fn into_unchecked(self) -> Settings<Unchecked> {
let Self {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
faceting,
pagination,
..
} = self;
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
faceting,
pagination,
_kind: PhantomData,
}
}
}
impl Settings<Unchecked> {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
_kind: PhantomData,
}
}
}

173
dump/src/reader/v5/tasks.rs Normal file
View File

@ -0,0 +1,173 @@
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use super::settings::{Settings, Unchecked};
pub type TaskId = u32;
pub type BatchId = u32;
#[derive(Clone, Debug, Deserialize, PartialEq)]
pub struct Task {
pub id: TaskId,
/// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task)
/// then this is None
// TODO: when next forward breaking dumps, it would be a good idea to move this field inside of
// the TaskContent.
pub content: TaskContent,
pub events: Vec<TaskEvent>,
}
#[derive(Clone, Debug, Deserialize, PartialEq)]
#[allow(clippy::large_enum_variant)]
pub enum TaskContent {
DocumentAddition {
index_uid: IndexUid,
content_uuid: Uuid,
merge_strategy: IndexDocumentsMethod,
primary_key: Option<String>,
documents_count: usize,
allow_index_creation: bool,
},
DocumentDeletion {
index_uid: IndexUid,
deletion: DocumentDeletion,
},
SettingsUpdate {
index_uid: IndexUid,
settings: Settings<Unchecked>,
/// Indicates whether the task was a deletion
is_deletion: bool,
allow_index_creation: bool,
},
IndexDeletion {
index_uid: IndexUid,
},
IndexCreation {
index_uid: IndexUid,
primary_key: Option<String>,
},
IndexUpdate {
index_uid: IndexUid,
primary_key: Option<String>,
},
Dump {
uid: String,
},
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct IndexUid(String);
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum IndexDocumentsMethod {
/// Replace the previous document with the new one,
/// removing all the already known attributes.
ReplaceDocuments,
/// Merge the previous version of the document with the new version,
/// replacing old attributes values with the new ones and add the new attributes.
UpdateDocuments,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum DocumentDeletion {
Clear,
Ids(Vec<String>),
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum TaskEvent {
Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
Batched {
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
batch_id: BatchId,
},
Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
Succeeded {
result: TaskResult,
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
},
Failed {
error: ResponseError,
#[serde(with = "time::serde::rfc3339")]
timestamp: OffsetDateTime,
},
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum TaskResult {
DocumentAddition { indexed_documents: u64 },
DocumentDeletion { deleted_documents: u64 },
ClearAll { deleted_documents: u64 },
Other,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct ResponseError {
message: String,
#[serde(rename = "code")]
error_code: String,
#[serde(rename = "type")]
error_type: String,
#[serde(rename = "link")]
error_link: String,
}
impl Task {
/// Return true when a task is finished.
/// A task is finished when its last state is either `Succeeded` or `Failed`.
pub fn is_finished(&self) -> bool {
self.events.last().map_or(false, |event| {
matches!(
event,
TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }
)
})
}
/// Return the content_uuid of the `Task` if there is one.
pub fn get_content_uuid(&self) -> Option<Uuid> {
match self {
Task {
content: TaskContent::DocumentAddition { content_uuid, .. },
..
} => Some(*content_uuid),
_ => None,
}
}
pub fn index_uid(&self) -> Option<&str> {
match &self.content {
TaskContent::DocumentAddition { index_uid, .. }
| TaskContent::DocumentDeletion { index_uid, .. }
| TaskContent::SettingsUpdate { index_uid, .. }
| TaskContent::IndexDeletion { index_uid }
| TaskContent::IndexCreation { index_uid, .. }
| TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()),
TaskContent::Dump { .. } => None,
}
}
}
impl IndexUid {
pub fn into_inner(self) -> String {
self.0
}
/// Return a reference over the inner str.
pub fn as_str(&self) -> &str {
&self.0
}
}
impl std::ops::Deref for IndexUid {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}

View File

@ -2,6 +2,7 @@ use std::{
fs::{self, File},
io::{BufRead, BufReader},
path::Path,
str::FromStr,
};
use index::{Checked, Unchecked};
@ -9,50 +10,35 @@ use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::{Error, Result, Version};
use crate::{Error, IndexMetadata, Result, Version};
use super::{DumpReader, IndexReader};
type Metadata = crate::Metadata;
pub fn date(dump: &Path) -> Result<OffsetDateTime> {
let metadata = fs::read(dump.join("metadata.json"))?;
let metadata: Metadata = serde_json::from_reader(&*metadata)?;
Ok(metadata.dump_date)
}
pub struct V6Reader {
dump: TempDir,
instance_uid: Uuid,
metadata: Metadata,
tasks: BufReader<File>,
keys: BufReader<File>,
}
struct V6IndexReader {
name: String,
metadata: IndexMetadata,
documents: BufReader<File>,
settings: BufReader<File>,
}
impl V6IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let ret = V6IndexReader {
name,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
};
Ok(ret)
}
}
impl V6Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?;
let instance_uid = Uuid::from_str(&instance_uid)?;
Ok(V6Reader {
metadata,
metadata: serde_json::from_reader(&*meta_file)?,
instance_uid,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
dump,
@ -60,6 +46,20 @@ impl V6Reader {
}
}
impl V6IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let metadata = File::open(path.join("metadata.json"))?;
let ret = V6IndexReader {
metadata: serde_json::from_reader(metadata)?,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
};
Ok(ret)
}
}
impl DumpReader for V6Reader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = index::Settings<Checked>;
@ -78,8 +78,7 @@ impl DumpReader for V6Reader {
}
fn instance_uid(&self) -> Result<Option<Uuid>> {
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
Ok(Some(Uuid::parse_str(&uuid)?))
Ok(Some(self.instance_uid))
}
fn indexes(
@ -87,15 +86,15 @@ impl DumpReader for V6Reader {
) -> Result<
Box<
dyn Iterator<
Item = Result<
Box<
dyn super::IndexReader<
Document = Self::Document,
Settings = Self::Settings,
Item = Result<
Box<
dyn super::IndexReader<
Document = Self::Document,
Settings = Self::Settings,
> + '_,
>,
>,
>,
>,
> + '_,
>,
> {
let entries = fs::read_dir(self.dump.path().join("indexes"))?;
@ -164,8 +163,8 @@ impl IndexReader for V6IndexReader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = index::Settings<Checked>;
fn name(&self) -> &str {
&self.name
fn metadata(&self) -> &IndexMetadata {
&self.metadata
}
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> {

View File

@ -8,13 +8,12 @@ use flate2::{write::GzEncoder, Compression};
use index::{Checked, Settings};
use index_scheduler::TaskView;
use meilisearch_auth::Key;
use serde::Serialize;
use serde_json::{Map, Value};
use tempfile::TempDir;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::{Metadata, Result, CURRENT_DUMP_VERSION};
use crate::{IndexMetadata, Metadata, Result, CURRENT_DUMP_VERSION};
pub struct DumpWriter {
dir: TempDir,
@ -23,8 +22,9 @@ pub struct DumpWriter {
impl DumpWriter {
pub fn new(instance_uuid: Uuid) -> Result<DumpWriter> {
let dir = TempDir::new()?;
fs::write(
dir.path().join("instance-uid"),
dir.path().join("instance_uid.uuid"),
&instance_uuid.as_hyphenated().to_string(),
)?;
@ -43,8 +43,8 @@ impl DumpWriter {
Ok(DumpWriter { dir })
}
pub fn create_index(&self, index_name: &str) -> Result<IndexWriter> {
IndexWriter::new(self.dir.path().join("indexes").join(index_name))
pub fn create_index(&self, index_name: &str, metadata: &IndexMetadata) -> Result<IndexWriter> {
IndexWriter::new(self.dir.path().join("indexes").join(index_name), metadata)
}
pub fn create_keys(&self) -> Result<KeyWriter> {
@ -126,9 +126,12 @@ pub struct IndexWriter {
}
impl IndexWriter {
pub(crate) fn new(path: PathBuf) -> Result<Self> {
pub(self) fn new(path: PathBuf, metadata: &IndexMetadata) -> Result<Self> {
std::fs::create_dir(&path)?;
let metadata_file = File::create(path.join("metadata.json"))?;
serde_json::to_writer(metadata_file, metadata)?;
let documents = File::create(path.join("documents.jsonl"))?;
let settings = File::create(path.join("settings.json"))?;
@ -243,14 +246,15 @@ pub(crate) mod test {
---- indexes/
---- doggos/
---- settings.json
---- documents.jsonl
---- documents.jsonl
---- metadata.json
---- tasks/
---- update_files/
---- 1
---- queue.jsonl
---- keys.jsonl
---- metadata.json
---- instance-uid
---- instance_uid.uuid
"###);
// ==== checking the top level infos
@ -264,7 +268,7 @@ pub(crate) mod test {
}
"###);
let instance_uid = fs::read_to_string(dump_path.join("instance-uid")).unwrap();
let instance_uid = fs::read_to_string(dump_path.join("instance_uid.uuid")).unwrap();
assert_eq!(
Uuid::from_str(&instance_uid).unwrap(),
create_test_instance_uid()
@ -284,6 +288,16 @@ pub(crate) mod test {
serde_json::from_str::<Settings<Unchecked>>(&test_settings).unwrap(),
create_test_settings().into_unchecked()
);
let metadata = fs::read_to_string(dump_path.join("indexes/doggos/metadata.json")).unwrap();
let metadata: IndexMetadata = serde_json::from_str(&metadata).unwrap();
insta::assert_json_snapshot!(metadata, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }, @r###"
{
"uid": "doggo",
"primaryKey": null,
"createdAt": "[date]",
"updatedAt": "[date]"
}
"###);
// ==== checking the task queue
let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap();