diff --git a/dump/src/reader/v1/mod.rs b/dump/src/reader/v1/mod.rs index f638262cc..1932b602a 100644 --- a/dump/src/reader/v1/mod.rs +++ b/dump/src/reader/v1/mod.rs @@ -1,173 +1,263 @@ use std::{ - convert::Infallible, fs::{self, File}, io::{BufRead, BufReader}, - path::Path, + path::{Path, PathBuf}, }; use tempfile::TempDir; use time::OffsetDateTime; -use self::update::UpdateStatus; - -use super::{DumpReader, IndexReader}; -use crate::{Error, Result, Version}; +use super::{compat::v1_to_v2::CompatV1ToV2, Document}; +use crate::{IndexMetadata, Result, Version}; +use serde::Deserialize; pub mod settings; pub mod update; -pub mod v1; pub struct V1Reader { - dump: TempDir, - metadata: v1::Metadata, - indexes: Vec, + pub dump: TempDir, + pub db_version: String, + pub dump_version: crate::Version, + indexes: Vec, } -struct V1IndexReader { - name: String, +pub struct IndexUuid { + pub name: String, + pub uid: String, +} +pub type Task = self::update::UpdateStatus; + +struct V1Index { + metadata: IndexMetadataV1, + path: PathBuf, +} + +impl V1Index { + pub fn new(path: PathBuf, metadata: Index) -> Self { + Self { metadata: metadata.into(), path } + } + + pub fn open(&self) -> Result { + V1IndexReader::new(&self.path, self.metadata.clone()) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata.metadata + } +} + +pub struct V1IndexReader { + metadata: IndexMetadataV1, documents: BufReader, settings: BufReader, updates: BufReader, - - current_update: Option, } impl V1IndexReader { - pub fn new(name: String, path: &Path) -> Result { - let mut ret = V1IndexReader { - name, + pub fn new(path: &Path, metadata: IndexMetadataV1) -> Result { + Ok(V1IndexReader { + metadata, documents: BufReader::new(File::open(path.join("documents.jsonl"))?), settings: BufReader::new(File::open(path.join("settings.json"))?), updates: BufReader::new(File::open(path.join("updates.jsonl"))?), - current_update: None, - }; - ret.next_update(); - - Ok(ret) + }) } - pub fn next_update(&mut self) -> Result> { - let current_update = if let Some(line) = self.updates.lines().next() { - Some(serde_json::from_str(&line?)?) - } else { - None - }; + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata.metadata + } - Ok(std::mem::replace(&mut self.current_update, current_update)) + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result { + Ok(serde_json::from_reader(&mut self.settings)?) + } + + pub fn tasks(self) -> impl Iterator> { + self.updates.lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) } } impl V1Reader { pub fn open(dump: TempDir) -> Result { - let mut meta_file = fs::read(dump.path().join("metadata.json"))?; - let metadata = serde_json::from_reader(&*meta_file)?; + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata: Metadata = serde_json::from_reader(&*meta_file)?; let mut indexes = Vec::new(); - let entries = fs::read_dir(dump.path())?; - for entry in entries { - let entry = entry?; - if entry.file_type()?.is_dir() { - indexes.push(V1IndexReader::new( - entry - .file_name() - .to_str() - .ok_or(Error::BadIndexName)? - .to_string(), - &entry.path(), - )?); - } + for index in metadata.indexes.into_iter() { + let index_path = dump.path().join(&index.uid); + indexes.push(V1Index::new(index_path, index)); } Ok(V1Reader { dump, - metadata, indexes, + db_version: metadata.db_version, + dump_version: metadata.dump_version, }) } - fn next_update(&mut self) -> Result> { - if let Some((idx, _)) = self - .indexes + pub fn to_v2(self) -> CompatV1ToV2 { + CompatV1ToV2 { from: self } + } + + pub fn index_uuid(&self) -> Vec { + self.indexes .iter() - .map(|index| index.current_update) - .enumerate() - .filter_map(|(idx, update)| update.map(|u| (idx, u))) - .min_by_key(|(_, update)| update.enqueued_at()) - { - self.indexes[idx].next_update() - } else { - Ok(None) + .map(|index| IndexUuid { + name: index.metadata.name.to_owned(), + uid: index.metadata().uid.to_owned(), + }) + .collect() + } + + pub fn version(&self) -> Version { + Version::V1 + } + + pub fn date(&self) -> Option { + None + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.indexes.iter().map(|index| index.open())) + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Index { + pub name: String, + pub uid: String, + #[serde(with = "time::serde::rfc3339")] + created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + updated_at: OffsetDateTime, + pub primary_key: Option, +} + +#[derive(Clone)] +pub struct IndexMetadataV1 { + pub name: String, + pub metadata: crate::IndexMetadata, +} + +impl From for IndexMetadataV1 { + fn from(index: Index) -> Self { + IndexMetadataV1 { + name: index.name, + metadata: crate::IndexMetadata { + uid: index.uid, + primary_key: index.primary_key, + created_at: index.created_at, + updated_at: index.updated_at, + }, } } } -impl IndexReader for &V1IndexReader { - type Document = serde_json::Map; - type Settings = settings::Settings; - - fn name(&self) -> &str { - todo!() - } - - fn documents(&self) -> Result>>> { - todo!() - } - - fn settings(&self) -> Result { - todo!() - } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + pub indexes: Vec, + pub db_version: String, + pub dump_version: crate::Version, } -impl DumpReader for V1Reader { - type Document = serde_json::Map; - type Settings = settings::Settings; +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; - type Task = update::UpdateStatus; - type UpdateFile = Infallible; + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; - type Key = Infallible; + use super::*; - fn date(&self) -> Option { - None - } + #[test] + fn read_dump_v1() { + let dump = File::open("tests/assets/v1.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); - fn version(&self) -> Version { - Version::V1 - } + let dump = V1Reader::open(dir).unwrap(); - fn indexes( - &self, - ) -> Result< - Box< - dyn Iterator< - Item = Result< - Box< - dyn super::IndexReader< - Document = Self::Document, - Settings = Self::Settings, - >, - >, - >, - >, - >, - > { - Ok(Box::new(self.indexes.iter().map(|index| { - let index = Box::new(index) - as Box>; - Ok(index) - }))) - } + // top level infos + assert_eq!(dump.date(), None); - fn tasks(&self) -> Box)>>> { - Box::new(std::iter::from_fn(|| { - self.next_update() - .transpose() - .map(|result| result.map(|task| (task, None))) - })) - } + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); - fn keys(&self) -> Box>> { - Box::new(std::iter::empty()) + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut dnd_spells = indexes.pop().unwrap(); + + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(products.settings().unwrap()); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // products tasks + let tasks = products.tasks().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"91de507f206ad21964584021932ba7a7"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(movies.settings().unwrap()); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5"); + + // movies tasks + let tasks = movies.tasks().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"55eef4de2bef7e84c5ce0bee47488f56"); + + // spells + insta::assert_json_snapshot!(dnd_spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + insta::assert_json_snapshot!(dnd_spells.settings().unwrap()); + let documents = dnd_spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed"); + + // spells tasks + let tasks = dnd_spells.tasks().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"836dd7d64d5ad20ad901c44b1b161a4c"); } } diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap new file mode 100644 index 000000000..f71df0ae6 --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: dnd_spells.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-12.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-12.snap new file mode 100644 index 000000000..f71df0ae6 --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-12.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: dnd_spells.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap new file mode 100644 index 000000000..b117c5f3d --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap @@ -0,0 +1,38 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: products.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": { + "android": [ + "phone", + "smartphone" + ], + "iphone": [ + "phone", + "smartphone" + ], + "phone": [ + "android", + "iphone", + "smartphone" + ] + }, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-5.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-5.snap new file mode 100644 index 000000000..aa9ed082a --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-5.snap @@ -0,0 +1,28 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: movies.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness", + "asc(release_date)" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [ + "id", + "genres" + ] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap new file mode 100644 index 000000000..aa9ed082a --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap @@ -0,0 +1,28 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: movies.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness", + "asc(release_date)" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [ + "id", + "genres" + ] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-7.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-7.snap new file mode 100644 index 000000000..aa9ed082a --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-7.snap @@ -0,0 +1,28 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: movies.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness", + "asc(release_date)" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [ + "id", + "genres" + ] +} diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-8.snap b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-8.snap new file mode 100644 index 000000000..f71df0ae6 --- /dev/null +++ b/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-8.snap @@ -0,0 +1,24 @@ +--- +source: dump/src/reader/v1/mod.rs +expression: dnd_spells.settings().unwrap() +--- +{ + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "wordsPosition", + "exactness" + ], + "distinctAttribute": null, + "searchableAttributes": [ + "*" + ], + "displayedAttributes": [ + "*" + ], + "stopWords": [], + "synonyms": {}, + "attributesForFaceting": [] +} diff --git a/dump/src/reader/v1/update.rs b/dump/src/reader/v1/update.rs index c9ccaf309..b6408f42a 100644 --- a/dump/src/reader/v1/update.rs +++ b/dump/src/reader/v1/update.rs @@ -1,54 +1,8 @@ use serde::{Deserialize, Serialize}; -use serde_json::Value; use time::OffsetDateTime; use super::settings::SettingsUpdate; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Update { - data: UpdateData, - #[serde(with = "time::serde::rfc3339")] - enqueued_at: OffsetDateTime, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum UpdateData { - ClearAll, - Customs(Vec), - // (primary key, documents) - DocumentsAddition { - primary_key: Option, - documents: Vec>, - }, - DocumentsPartial { - primary_key: Option, - documents: Vec>, - }, - DocumentsDeletion(Vec), - Settings(Box), -} - -impl UpdateData { - pub fn update_type(&self) -> UpdateType { - match self { - UpdateData::ClearAll => UpdateType::ClearAll, - UpdateData::Customs(_) => UpdateType::Customs, - UpdateData::DocumentsAddition { documents, .. } => UpdateType::DocumentsAddition { - number: documents.len(), - }, - UpdateData::DocumentsPartial { documents, .. } => UpdateType::DocumentsPartial { - number: documents.len(), - }, - UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion { - number: deletion.len(), - }, - UpdateData::Settings(update) => UpdateType::Settings { - settings: update.clone(), - }, - } - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "name")] pub enum UpdateType { diff --git a/dump/src/reader/v1/v1.rs b/dump/src/reader/v1/v1.rs deleted file mode 100644 index 0f4312508..000000000 --- a/dump/src/reader/v1/v1.rs +++ /dev/null @@ -1,22 +0,0 @@ -use serde::Deserialize; -use time::OffsetDateTime; - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Index { - pub name: String, - pub uid: String, - #[serde(with = "time::serde::rfc3339")] - created_at: OffsetDateTime, - #[serde(with = "time::serde::rfc3339")] - updated_at: OffsetDateTime, - pub primary_key: Option, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Metadata { - indexes: Vec, - db_version: String, - dump_version: crate::Version, -} diff --git a/dump/tests/assets/v1.dump b/dump/tests/assets/v1.dump new file mode 100644 index 000000000..f1e295936 Binary files /dev/null and b/dump/tests/assets/v1.dump differ