Move crates under a sub folder to clean up the code

This commit is contained in:
Clément Renault 2024-10-21 08:18:43 +02:00
parent 30f3c30389
commit 9c1e54a2c8
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
1062 changed files with 19 additions and 20 deletions

View file

@ -0,0 +1,262 @@
use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use serde::Deserialize;
use tempfile::TempDir;
use time::OffsetDateTime;
use super::compat::v1_to_v2::CompatV1ToV2;
use super::Document;
use crate::{IndexMetadata, Result, Version};
pub mod settings;
pub mod update;
pub struct V1Reader {
pub dump: TempDir,
pub db_version: String,
pub dump_version: crate::Version,
indexes: Vec<V1Index>,
}
pub struct IndexUuid {
pub name: String,
pub uid: String,
}
pub type Task = self::update::UpdateStatus;
struct V1Index {
metadata: IndexMetadataV1,
path: PathBuf,
}
impl V1Index {
pub fn new(path: PathBuf, metadata: Index) -> Self {
Self { metadata: metadata.into(), path }
}
pub fn open(&self) -> Result<V1IndexReader> {
V1IndexReader::new(&self.path, self.metadata.clone())
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata.metadata
}
}
pub struct V1IndexReader {
metadata: IndexMetadataV1,
documents: BufReader<File>,
settings: BufReader<File>,
updates: BufReader<File>,
}
impl V1IndexReader {
pub fn new(path: &Path, metadata: IndexMetadataV1) -> Result<Self> {
Ok(V1IndexReader {
metadata,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
updates: BufReader::new(File::open(path.join("updates.jsonl"))?),
})
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata.metadata
}
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<self::settings::Settings> {
Ok(serde_json::from_reader(&mut self.settings)?)
}
pub fn tasks(self) -> impl Iterator<Item = Result<Task>> {
self.updates.lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
}
}
impl V1Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata: Metadata = serde_json::from_reader(&*meta_file)?;
let mut indexes = Vec::new();
for index in metadata.indexes.into_iter() {
let index_path = dump.path().join(&index.uid);
indexes.push(V1Index::new(index_path, index));
}
Ok(V1Reader {
dump,
indexes,
db_version: metadata.db_version,
dump_version: metadata.dump_version,
})
}
pub fn to_v2(self) -> CompatV1ToV2 {
CompatV1ToV2 { from: self }
}
pub fn index_uuid(&self) -> Vec<IndexUuid> {
self.indexes
.iter()
.map(|index| IndexUuid {
name: index.metadata.name.to_owned(),
uid: index.metadata().uid.to_owned(),
})
.collect()
}
pub fn version(&self) -> Version {
Version::V1
}
pub fn date(&self) -> Option<OffsetDateTime> {
None
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V1IndexReader>> + '_> {
Ok(self.indexes.iter().map(|index| index.open()))
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Index {
pub name: String,
pub uid: String,
#[serde(with = "time::serde::rfc3339")]
created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
#[derive(Clone)]
pub struct IndexMetadataV1 {
pub name: String,
pub metadata: crate::IndexMetadata,
}
impl From<Index> for IndexMetadataV1 {
fn from(index: Index) -> Self {
IndexMetadataV1 {
name: index.name,
metadata: crate::IndexMetadata {
uid: index.uid,
primary_key: index.primary_key,
created_at: index.created_at,
updated_at: index.updated_at,
},
}
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
pub indexes: Vec<Index>,
pub db_version: String,
pub dump_version: crate::Version,
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use meili_snap::insta;
use tempfile::TempDir;
use super::*;
#[test]
fn read_dump_v1() {
let dump = File::open("tests/assets/v1.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let dump = V1Reader::open(dir).unwrap();
// top level infos
assert_eq!(dump.date(), None);
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut dnd_spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-02T13:23:39.976870431Z",
"updatedAt": "2022-10-02T13:27:54.353262482Z"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// products tasks
let tasks = products.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"91de507f206ad21964584021932ba7a7");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-02T13:15:29.477512777Z",
"updatedAt": "2022-10-02T13:21:12.671204856Z"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5");
// movies tasks
let tasks = movies.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"55eef4de2bef7e84c5ce0bee47488f56");
// spells
insta::assert_json_snapshot!(dnd_spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-02T13:38:26.358882984Z",
"updatedAt": "2022-10-02T13:38:26.385609433Z"
}
"###);
insta::assert_json_snapshot!(dnd_spells.settings().unwrap());
let documents = dnd_spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed");
// spells tasks
let tasks = dnd_spells.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"836dd7d64d5ad20ad901c44b1b161a4c");
}
}

View file

@ -0,0 +1,93 @@
use std::collections::{BTreeMap, BTreeSet};
use std::result::Result as StdResult;
use std::str::FromStr;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize};
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct Settings {
#[serde(default, deserialize_with = "deserialize_some")]
pub ranking_rules: Option<Option<Vec<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub distinct_attribute: Option<Option<String>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub stop_words: Option<Option<BTreeSet<String>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
#[serde(default, deserialize_with = "deserialize_some")]
pub attributes_for_faceting: Option<Option<Vec<String>>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SettingsUpdate {
pub ranking_rules: UpdateState<Vec<RankingRule>>,
pub distinct_attribute: UpdateState<String>,
pub primary_key: UpdateState<String>,
pub searchable_attributes: UpdateState<Vec<String>>,
pub displayed_attributes: UpdateState<BTreeSet<String>>,
pub stop_words: UpdateState<BTreeSet<String>>,
pub synonyms: UpdateState<BTreeMap<String, Vec<String>>>,
pub attributes_for_faceting: UpdateState<Vec<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateState<T> {
Update(T),
Clear,
Nothing,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RankingRule {
Typo,
Words,
Proximity,
Attribute,
WordsPosition,
Exactness,
Asc(String),
Desc(String),
}
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());
impl FromStr for RankingRule {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"typo" => Self::Typo,
"words" => Self::Words,
"proximity" => Self::Proximity,
"attribute" => Self::Attribute,
"wordsPosition" => Self::WordsPosition,
"exactness" => Self::Exactness,
text => {
let caps = ASC_DESC_REGEX.captures(text).ok_or(())?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
match order {
"asc" => Self::Asc(field_name.to_string()),
"desc" => Self::Desc(field_name.to_string()),
_ => return Err(()),
}
}
})
}
}
// Any value that is present is considered Some value, including null.
fn deserialize_some<'de, T, D>(deserializer: D) -> StdResult<Option<T>, D::Error>
where
T: Deserialize<'de>,
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(Some)
}

View file

@ -0,0 +1,24 @@
---
source: dump/src/reader/v1/mod.rs
expression: dnd_spells.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": []
}

View file

@ -0,0 +1,38 @@
---
source: dump/src/reader/v1/mod.rs
expression: products.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"attributesForFaceting": []
}

View file

@ -0,0 +1,28 @@
---
source: dump/src/reader/v1/mod.rs
expression: movies.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness",
"asc(release_date)"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": [
"id",
"genres"
]
}

View file

@ -0,0 +1,74 @@
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use super::settings::SettingsUpdate;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "name")]
pub enum UpdateType {
ClearAll,
Customs,
DocumentsAddition { number: usize },
DocumentsPartial { number: usize },
DocumentsDeletion { number: usize },
Settings { settings: Box<SettingsUpdate> },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ProcessedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_code: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error_link: Option<String>,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct EnqueuedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "status")]
pub enum UpdateStatus {
Enqueued {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Failed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
Processed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
}
impl UpdateStatus {
pub fn enqueued_at(&self) -> &OffsetDateTime {
match self {
UpdateStatus::Enqueued { content } => &content.enqueued_at,
UpdateStatus::Failed { content } | UpdateStatus::Processed { content } => {
&content.enqueued_at
}
}
}
}