Add v1 reader

This commit is contained in:
Louis Dureuil 2022-11-30 14:54:34 +01:00
parent d44652209d
commit b8de369e33
No known key found for this signature in database
11 changed files with 398 additions and 182 deletions

View File

@ -1,173 +1,263 @@
use std::{ use std::{
convert::Infallible,
fs::{self, File}, fs::{self, File},
io::{BufRead, BufReader}, io::{BufRead, BufReader},
path::Path, path::{Path, PathBuf},
}; };
use tempfile::TempDir; use tempfile::TempDir;
use time::OffsetDateTime; use time::OffsetDateTime;
use self::update::UpdateStatus; use super::{compat::v1_to_v2::CompatV1ToV2, Document};
use crate::{IndexMetadata, Result, Version};
use super::{DumpReader, IndexReader}; use serde::Deserialize;
use crate::{Error, Result, Version};
pub mod settings; pub mod settings;
pub mod update; pub mod update;
pub mod v1;
pub struct V1Reader { pub struct V1Reader {
dump: TempDir, pub dump: TempDir,
metadata: v1::Metadata, pub db_version: String,
indexes: Vec<V1IndexReader>, pub dump_version: crate::Version,
indexes: Vec<V1Index>,
} }
struct V1IndexReader { pub struct IndexUuid {
name: String, pub name: String,
pub uid: String,
}
pub type Task = self::update::UpdateStatus;
struct V1Index {
metadata: IndexMetadataV1,
path: PathBuf,
}
impl V1Index {
pub fn new(path: PathBuf, metadata: Index) -> Self {
Self { metadata: metadata.into(), path }
}
pub fn open(&self) -> Result<V1IndexReader> {
V1IndexReader::new(&self.path, self.metadata.clone())
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata.metadata
}
}
pub struct V1IndexReader {
metadata: IndexMetadataV1,
documents: BufReader<File>, documents: BufReader<File>,
settings: BufReader<File>, settings: BufReader<File>,
updates: BufReader<File>, updates: BufReader<File>,
current_update: Option<UpdateStatus>,
} }
impl V1IndexReader { impl V1IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> { pub fn new(path: &Path, metadata: IndexMetadataV1) -> Result<Self> {
let mut ret = V1IndexReader { Ok(V1IndexReader {
name, metadata,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?), documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?), settings: BufReader::new(File::open(path.join("settings.json"))?),
updates: BufReader::new(File::open(path.join("updates.jsonl"))?), updates: BufReader::new(File::open(path.join("updates.jsonl"))?),
current_update: None, })
};
ret.next_update();
Ok(ret)
} }
pub fn next_update(&mut self) -> Result<Option<UpdateStatus>> { pub fn metadata(&self) -> &IndexMetadata {
let current_update = if let Some(line) = self.updates.lines().next() { &self.metadata.metadata
Some(serde_json::from_str(&line?)?) }
} else {
None
};
Ok(std::mem::replace(&mut self.current_update, current_update)) pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<self::settings::Settings> {
Ok(serde_json::from_reader(&mut self.settings)?)
}
pub fn tasks(self) -> impl Iterator<Item = Result<Task>> {
self.updates.lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
} }
} }
impl V1Reader { impl V1Reader {
pub fn open(dump: TempDir) -> Result<Self> { pub fn open(dump: TempDir) -> Result<Self> {
let mut meta_file = fs::read(dump.path().join("metadata.json"))?; let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?; let metadata: Metadata = serde_json::from_reader(&*meta_file)?;
let mut indexes = Vec::new(); let mut indexes = Vec::new();
let entries = fs::read_dir(dump.path())?; for index in metadata.indexes.into_iter() {
for entry in entries { let index_path = dump.path().join(&index.uid);
let entry = entry?; indexes.push(V1Index::new(index_path, index));
if entry.file_type()?.is_dir() {
indexes.push(V1IndexReader::new(
entry
.file_name()
.to_str()
.ok_or(Error::BadIndexName)?
.to_string(),
&entry.path(),
)?);
}
} }
Ok(V1Reader { Ok(V1Reader {
dump, dump,
metadata,
indexes, indexes,
db_version: metadata.db_version,
dump_version: metadata.dump_version,
}) })
} }
fn next_update(&mut self) -> Result<Option<UpdateStatus>> { pub fn to_v2(self) -> CompatV1ToV2 {
if let Some((idx, _)) = self CompatV1ToV2 { from: self }
.indexes }
pub fn index_uuid(&self) -> Vec<IndexUuid> {
self.indexes
.iter() .iter()
.map(|index| index.current_update) .map(|index| IndexUuid {
.enumerate() name: index.metadata.name.to_owned(),
.filter_map(|(idx, update)| update.map(|u| (idx, u))) uid: index.metadata().uid.to_owned(),
.min_by_key(|(_, update)| update.enqueued_at()) })
{ .collect()
self.indexes[idx].next_update()
} else {
Ok(None)
}
}
}
impl IndexReader for &V1IndexReader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = settings::Settings;
fn name(&self) -> &str {
todo!()
} }
fn documents(&self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>>>> { pub fn version(&self) -> Version {
todo!()
}
fn settings(&self) -> Result<Self::Settings> {
todo!()
}
}
impl DumpReader for V1Reader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = settings::Settings;
type Task = update::UpdateStatus;
type UpdateFile = Infallible;
type Key = Infallible;
fn date(&self) -> Option<OffsetDateTime> {
None
}
fn version(&self) -> Version {
Version::V1 Version::V1
} }
fn indexes( pub fn date(&self) -> Option<OffsetDateTime> {
&self, None
) -> Result<
Box<
dyn Iterator<
Item = Result<
Box<
dyn super::IndexReader<
Document = Self::Document,
Settings = Self::Settings,
>,
>,
>,
>,
>,
> {
Ok(Box::new(self.indexes.iter().map(|index| {
let index = Box::new(index)
as Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>;
Ok(index)
})))
} }
fn tasks(&self) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>>> { pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V1IndexReader>> + '_> {
Box::new(std::iter::from_fn(|| { Ok(self.indexes.iter().map(|index| index.open()))
self.next_update() }
.transpose() }
.map(|result| result.map(|task| (task, None)))
})) #[derive(Debug, Deserialize)]
} #[serde(rename_all = "camelCase")]
pub struct Index {
fn keys(&self) -> Box<dyn Iterator<Item = Result<Self::Key>>> { pub name: String,
Box::new(std::iter::empty()) pub uid: String,
#[serde(with = "time::serde::rfc3339")]
created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
#[derive(Clone)]
pub struct IndexMetadataV1 {
pub name: String,
pub metadata: crate::IndexMetadata,
}
impl From<Index> for IndexMetadataV1 {
fn from(index: Index) -> Self {
IndexMetadataV1 {
name: index.name,
metadata: crate::IndexMetadata {
uid: index.uid,
primary_key: index.primary_key,
created_at: index.created_at,
updated_at: index.updated_at,
},
}
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
pub indexes: Vec<Index>,
pub db_version: String,
pub dump_version: crate::Version,
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use meili_snap::insta;
use tempfile::TempDir;
use super::*;
#[test]
fn read_dump_v1() {
let dump = File::open("tests/assets/v1.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let dump = V1Reader::open(dir).unwrap();
// top level infos
assert_eq!(dump.date(), None);
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut dnd_spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// products tasks
let tasks = products.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"91de507f206ad21964584021932ba7a7");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5");
// movies tasks
let tasks = movies.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"55eef4de2bef7e84c5ce0bee47488f56");
// spells
insta::assert_json_snapshot!(dnd_spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(dnd_spells.settings().unwrap());
let documents = dnd_spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed");
// spells tasks
let tasks = dnd_spells.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"836dd7d64d5ad20ad901c44b1b161a4c");
} }
} }

View File

@ -0,0 +1,24 @@
---
source: dump/src/reader/v1/mod.rs
expression: dnd_spells.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": []
}

View File

@ -0,0 +1,24 @@
---
source: dump/src/reader/v1/mod.rs
expression: dnd_spells.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": []
}

View File

@ -0,0 +1,38 @@
---
source: dump/src/reader/v1/mod.rs
expression: products.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"attributesForFaceting": []
}

View File

@ -0,0 +1,28 @@
---
source: dump/src/reader/v1/mod.rs
expression: movies.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness",
"asc(release_date)"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": [
"id",
"genres"
]
}

View File

@ -0,0 +1,28 @@
---
source: dump/src/reader/v1/mod.rs
expression: movies.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness",
"asc(release_date)"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": [
"id",
"genres"
]
}

View File

@ -0,0 +1,28 @@
---
source: dump/src/reader/v1/mod.rs
expression: movies.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness",
"asc(release_date)"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": [
"id",
"genres"
]
}

View File

@ -0,0 +1,24 @@
---
source: dump/src/reader/v1/mod.rs
expression: dnd_spells.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": []
}

View File

@ -1,54 +1,8 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value;
use time::OffsetDateTime; use time::OffsetDateTime;
use super::settings::SettingsUpdate; use super::settings::SettingsUpdate;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Update {
data: UpdateData,
#[serde(with = "time::serde::rfc3339")]
enqueued_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateData {
ClearAll,
Customs(Vec<u8>),
// (primary key, documents)
DocumentsAddition {
primary_key: Option<String>,
documents: Vec<serde_json::Map<String, Value>>,
},
DocumentsPartial {
primary_key: Option<String>,
documents: Vec<serde_json::Map<String, Value>>,
},
DocumentsDeletion(Vec<String>),
Settings(Box<SettingsUpdate>),
}
impl UpdateData {
pub fn update_type(&self) -> UpdateType {
match self {
UpdateData::ClearAll => UpdateType::ClearAll,
UpdateData::Customs(_) => UpdateType::Customs,
UpdateData::DocumentsAddition { documents, .. } => UpdateType::DocumentsAddition {
number: documents.len(),
},
UpdateData::DocumentsPartial { documents, .. } => UpdateType::DocumentsPartial {
number: documents.len(),
},
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
number: deletion.len(),
},
UpdateData::Settings(update) => UpdateType::Settings {
settings: update.clone(),
},
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "name")] #[serde(tag = "name")]
pub enum UpdateType { pub enum UpdateType {

View File

@ -1,22 +0,0 @@
use serde::Deserialize;
use time::OffsetDateTime;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Index {
pub name: String,
pub uid: String,
#[serde(with = "time::serde::rfc3339")]
created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
indexes: Vec<Index>,
db_version: String,
dump_version: crate::Version,
}

BIN
dump/tests/assets/v1.dump Normal file

Binary file not shown.