mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-26 14:40:05 +01:00
introduce the index metadata
This commit is contained in:
parent
e845cc2b6f
commit
101f55ce8b
@ -21,6 +21,17 @@ struct Metadata {
|
||||
pub dump_date: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMetadata {
|
||||
pub uid: String,
|
||||
pub primary_key: Option<String>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub created_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub updated_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)]
|
||||
pub enum Version {
|
||||
V1,
|
||||
@ -49,12 +60,21 @@ pub(crate) mod test {
|
||||
use time::{macros::datetime, Duration};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{reader, DumpWriter, Version};
|
||||
use crate::{reader, DumpWriter, IndexMetadata, Version};
|
||||
|
||||
pub fn create_test_instance_uid() -> Uuid {
|
||||
Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap()
|
||||
}
|
||||
|
||||
pub fn create_test_index_metadata() -> IndexMetadata {
|
||||
IndexMetadata {
|
||||
uid: S("doggo"),
|
||||
primary_key: None,
|
||||
created_at: datetime!(2022-11-20 12:00 UTC),
|
||||
updated_at: datetime!(2022-11-21 00:00 UTC),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_test_documents() -> Vec<Map<String, Value>> {
|
||||
vec![
|
||||
json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 })
|
||||
@ -186,7 +206,9 @@ pub(crate) mod test {
|
||||
let documents = create_test_documents();
|
||||
let settings = create_test_settings();
|
||||
|
||||
let mut index = dump.create_index("doggos").unwrap();
|
||||
let mut index = dump
|
||||
.create_index("doggos", &create_test_index_metadata())
|
||||
.unwrap();
|
||||
for document in &documents {
|
||||
index.push_document(document).unwrap();
|
||||
}
|
||||
@ -217,7 +239,7 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_creating_dump() {
|
||||
fn test_creating_and_read_dump() {
|
||||
let mut file = create_test_dump();
|
||||
let mut dump = reader::open(&mut file).unwrap();
|
||||
|
||||
@ -234,12 +256,14 @@ pub(crate) mod test {
|
||||
let mut index = indexes.next().unwrap().unwrap();
|
||||
assert!(indexes.next().is_none()); // there was only one index in the dump
|
||||
|
||||
assert_eq!(index.name(), "doggos");
|
||||
|
||||
for (document, expected) in index.documents().unwrap().zip(create_test_documents()) {
|
||||
assert_eq!(document.unwrap(), expected);
|
||||
}
|
||||
assert_eq!(index.settings().unwrap(), create_test_settings());
|
||||
assert_eq!(index.metadata(), &create_test_index_metadata());
|
||||
|
||||
drop(index);
|
||||
drop(indexes);
|
||||
|
||||
// ==== checking the task queue
|
||||
for (task, expected) in dump.tasks().zip(create_test_tasks()) {
|
||||
|
@ -12,7 +12,7 @@ use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{Result, Version};
|
||||
use crate::{IndexMetadata, Result, Version};
|
||||
|
||||
// use self::loaders::{v2, v3, v4, v5};
|
||||
|
||||
@ -20,6 +20,7 @@ use crate::{Result, Version};
|
||||
// mod compat;
|
||||
// mod loaders;
|
||||
// mod v1;
|
||||
mod v5;
|
||||
mod v6;
|
||||
|
||||
pub fn open(
|
||||
@ -97,10 +98,13 @@ pub trait DumpReader {
|
||||
) -> Result<
|
||||
Box<
|
||||
dyn Iterator<
|
||||
Item = Result<
|
||||
Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>,
|
||||
>,
|
||||
>,
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn IndexReader<Document = Self::Document, Settings = Self::Settings>
|
||||
+ '_,
|
||||
>,
|
||||
>,
|
||||
> + '_,
|
||||
>,
|
||||
>;
|
||||
|
||||
@ -117,7 +121,7 @@ pub trait IndexReader {
|
||||
type Document;
|
||||
type Settings;
|
||||
|
||||
fn name(&self) -> &str;
|
||||
fn metadata(&self) -> &IndexMetadata;
|
||||
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>>;
|
||||
fn settings(&mut self) -> Result<Self::Settings>;
|
||||
}
|
||||
|
14
dump/src/reader/v5/meta.rs
Normal file
14
dump/src/reader/v5/meta.rs
Normal file
@ -0,0 +1,14 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexUuid {
|
||||
pub uid: String,
|
||||
pub index_meta: IndexMeta,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexMeta {
|
||||
pub uuid: Uuid,
|
||||
pub creation_task_id: usize,
|
||||
}
|
221
dump/src/reader/v5/mod.rs
Normal file
221
dump/src/reader/v5/mod.rs
Normal file
@ -0,0 +1,221 @@
|
||||
//! Here is what a dump v5 look like.
|
||||
//!
|
||||
//! ```text
|
||||
//! .
|
||||
//! ├── indexes
|
||||
//! │ ├── 22c269d8-fbbd-4416-bd46-7c7c02849325
|
||||
//! │ │ ├── documents.jsonl
|
||||
//! │ │ └── meta.json
|
||||
//! │ ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8
|
||||
//! │ │ ├── documents.jsonl
|
||||
//! │ │ └── meta.json
|
||||
//! │ └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa
|
||||
//! │ ├── documents.jsonl
|
||||
//! │ └── meta.json
|
||||
//! ├── index_uuids
|
||||
//! │ └── data.jsonl
|
||||
//! ├── instance-uid
|
||||
//! ├── keys
|
||||
//! ├── metadata.json
|
||||
//! └── updates
|
||||
//! ├── data.jsonl
|
||||
//! └── updates_files
|
||||
//! └── c83a004a-da98-4b94-b245-3256266c7281
|
||||
//! ```
|
||||
//!
|
||||
//! Here is what `index_uuids/data.jsonl` looks like;
|
||||
//!
|
||||
//! ```json
|
||||
//! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}}
|
||||
//! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}}
|
||||
//! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}}
|
||||
//! ```
|
||||
//!
|
||||
|
||||
use std::{
|
||||
fs::{self, File},
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{IndexMetadata, Result, Version};
|
||||
|
||||
use self::{
|
||||
meta::IndexUuid,
|
||||
settings::{Checked, Settings, Unchecked},
|
||||
tasks::Task,
|
||||
};
|
||||
|
||||
use super::{DumpReader, IndexReader};
|
||||
|
||||
mod meta;
|
||||
mod settings;
|
||||
mod tasks;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Metadata {
|
||||
db_version: String,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
dump_date: OffsetDateTime,
|
||||
}
|
||||
|
||||
pub struct V5Reader {
|
||||
dump: TempDir,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
keys: BufReader<File>,
|
||||
index_uuid: Vec<IndexUuid>,
|
||||
}
|
||||
|
||||
struct V5IndexReader {
|
||||
metadata: IndexMetadata,
|
||||
|
||||
documents: BufReader<File>,
|
||||
settings: BufReader<File>,
|
||||
}
|
||||
|
||||
impl V5IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
let metadata = File::open(path.join("metadata.json"))?;
|
||||
|
||||
let ret = V5IndexReader {
|
||||
metadata: serde_json::from_reader(metadata)?,
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl V5Reader {
|
||||
pub fn open(dump: TempDir) -> Result<Self> {
|
||||
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
||||
let metadata = serde_json::from_reader(&*meta_file)?;
|
||||
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
|
||||
let index_uuid = BufReader::new(index_uuid);
|
||||
let index_uuid = index_uuid
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(V5Reader {
|
||||
metadata,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
index_uuid,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpReader for V5Reader {
|
||||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = Settings<Checked>;
|
||||
|
||||
type Task = Task;
|
||||
type UpdateFile = File;
|
||||
|
||||
// TODO: remove this
|
||||
type Key = meilisearch_auth::Key;
|
||||
|
||||
fn version(&self) -> Version {
|
||||
Version::V5
|
||||
}
|
||||
|
||||
fn date(&self) -> Option<OffsetDateTime> {
|
||||
Some(self.metadata.dump_date)
|
||||
}
|
||||
|
||||
fn instance_uid(&self) -> Result<Option<Uuid>> {
|
||||
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
|
||||
Ok(Some(Uuid::parse_str(&uuid)?))
|
||||
}
|
||||
|
||||
fn indexes(
|
||||
&self,
|
||||
) -> Result<
|
||||
Box<
|
||||
dyn Iterator<
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn super::IndexReader<
|
||||
Document = Self::Document,
|
||||
Settings = Self::Settings,
|
||||
> + '_,
|
||||
>,
|
||||
>,
|
||||
> + '_,
|
||||
>,
|
||||
> {
|
||||
Ok(Box::new(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||
Ok(Box::new(V5IndexReader::new(
|
||||
index.uid.clone(),
|
||||
&self
|
||||
.dump
|
||||
.path()
|
||||
.join("indexes")
|
||||
.join(index.index_meta.uuid.to_string()),
|
||||
)?)
|
||||
as Box<
|
||||
dyn IndexReader<Document = Self::Document, Settings = Self::Settings>,
|
||||
>)
|
||||
})))
|
||||
}
|
||||
|
||||
fn tasks(
|
||||
&mut self,
|
||||
) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Self::Task = serde_json::from_str(&line?)?;
|
||||
if let Some(uuid) = task.get_content_uuid() {
|
||||
let update_file_path = self
|
||||
.dump
|
||||
.path()
|
||||
.join("updates")
|
||||
.join("update_files")
|
||||
.join(uuid.to_string());
|
||||
Ok((task, Some(File::open(update_file_path)?)))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
// TODO: do it
|
||||
fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Self::Key>> + '_> {
|
||||
Box::new(
|
||||
(&mut self.keys)
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexReader for V5IndexReader {
|
||||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = Settings<Checked>;
|
||||
|
||||
fn metadata(&self) -> &IndexMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> {
|
||||
Ok(Box::new((&mut self.documents).lines().map(
|
||||
|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) },
|
||||
)))
|
||||
}
|
||||
|
||||
fn settings(&mut self) -> Result<Self::Settings> {
|
||||
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
Ok(settings.check())
|
||||
}
|
||||
}
|
251
dump/src/reader/v5/settings.rs
Normal file
251
dump/src/reader/v5/settings.rs
Normal file
@ -0,0 +1,251 @@
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
marker::PhantomData,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct Unchecked;
|
||||
|
||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
||||
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(default)]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default)]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default)]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default)]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default)]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
#[serde(default)]
|
||||
pub typo_tolerance: Setting<TypoSettings>,
|
||||
#[serde(default)]
|
||||
pub faceting: Setting<FacetingSettings>,
|
||||
#[serde(default)]
|
||||
pub pagination: Setting<PaginationSettings>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let wildcard = vec!["*".to_string()];
|
||||
match field {
|
||||
Setting::Set(value) => Some(value),
|
||||
Setting::Reset => Some(&wildcard),
|
||||
Setting::NotSet => None,
|
||||
}
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Copy)]
|
||||
pub enum Setting<T> {
|
||||
Set(T),
|
||||
Reset,
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
fn default() -> Self {
|
||||
Self::NotSet
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Setting<T> {
|
||||
pub fn set(self) -> Option<T> {
|
||||
match self {
|
||||
Self::Set(value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn as_ref(&self) -> Setting<&T> {
|
||||
match *self {
|
||||
Self::Set(ref value) => Setting::Set(value),
|
||||
Self::Reset => Setting::Reset,
|
||||
Self::NotSet => Setting::NotSet,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn is_not_set(&self) -> bool {
|
||||
matches!(self, Self::NotSet)
|
||||
}
|
||||
|
||||
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
|
||||
pub fn or_reset(self, val: T) -> Self {
|
||||
match self {
|
||||
Self::Reset => Self::Set(val),
|
||||
otherwise => otherwise,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
|
||||
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(|x| match x {
|
||||
Some(x) => Self::Set(x),
|
||||
None => Self::Reset, // Reset is forced by sending null value
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MinWordSizeTyposSetting {
|
||||
#[serde(default)]
|
||||
pub one_typo: Setting<u8>,
|
||||
#[serde(default)]
|
||||
pub two_typos: Setting<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TypoSettings {
|
||||
#[serde(default)]
|
||||
pub enabled: Setting<bool>,
|
||||
#[serde(default)]
|
||||
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
|
||||
#[serde(default)]
|
||||
pub disable_on_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub disable_on_attributes: Setting<BTreeSet<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FacetingSettings {
|
||||
#[serde(default)]
|
||||
pub max_values_per_facet: Setting<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PaginationSettings {
|
||||
#[serde(default)]
|
||||
pub max_total_hits: Setting<usize>,
|
||||
}
|
||||
|
||||
impl Settings<Checked> {
|
||||
pub fn cleared() -> Settings<Checked> {
|
||||
Settings {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
pagination: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
||||
let Self {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
..
|
||||
} = self;
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Settings<Unchecked> {
|
||||
pub fn check(self) -> Settings<Checked> {
|
||||
let displayed_attributes = match self.displayed_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
let searchable_attributes = match self.searchable_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
pagination: self.pagination,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
173
dump/src/reader/v5/tasks.rs
Normal file
173
dump/src/reader/v5/tasks.rs
Normal file
@ -0,0 +1,173 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::settings::{Settings, Unchecked};
|
||||
|
||||
pub type TaskId = u32;
|
||||
pub type BatchId = u32;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq)]
|
||||
pub struct Task {
|
||||
pub id: TaskId,
|
||||
/// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task)
|
||||
/// then this is None
|
||||
// TODO: when next forward breaking dumps, it would be a good idea to move this field inside of
|
||||
// the TaskContent.
|
||||
pub content: TaskContent,
|
||||
pub events: Vec<TaskEvent>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum TaskContent {
|
||||
DocumentAddition {
|
||||
index_uid: IndexUid,
|
||||
content_uuid: Uuid,
|
||||
merge_strategy: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
documents_count: usize,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: IndexUid,
|
||||
deletion: DocumentDeletion,
|
||||
},
|
||||
SettingsUpdate {
|
||||
index_uid: IndexUid,
|
||||
settings: Settings<Unchecked>,
|
||||
/// Indicates whether the task was a deletion
|
||||
is_deletion: bool,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
IndexDeletion {
|
||||
index_uid: IndexUid,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
IndexUpdate {
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
Dump {
|
||||
uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct IndexUid(String);
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum IndexDocumentsMethod {
|
||||
/// Replace the previous document with the new one,
|
||||
/// removing all the already known attributes.
|
||||
ReplaceDocuments,
|
||||
|
||||
/// Merge the previous version of the document with the new version,
|
||||
/// replacing old attributes values with the new ones and add the new attributes.
|
||||
UpdateDocuments,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum DocumentDeletion {
|
||||
Clear,
|
||||
Ids(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum TaskEvent {
|
||||
Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
||||
Batched {
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
batch_id: BatchId,
|
||||
},
|
||||
Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
||||
Succeeded {
|
||||
result: TaskResult,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
Failed {
|
||||
error: ResponseError,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum TaskResult {
|
||||
DocumentAddition { indexed_documents: u64 },
|
||||
DocumentDeletion { deleted_documents: u64 },
|
||||
ClearAll { deleted_documents: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResponseError {
|
||||
message: String,
|
||||
#[serde(rename = "code")]
|
||||
error_code: String,
|
||||
#[serde(rename = "type")]
|
||||
error_type: String,
|
||||
#[serde(rename = "link")]
|
||||
error_link: String,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
/// Return true when a task is finished.
|
||||
/// A task is finished when its last state is either `Succeeded` or `Failed`.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
self.events.last().map_or(false, |event| {
|
||||
matches!(
|
||||
event,
|
||||
TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the content_uuid of the `Task` if there is one.
|
||||
pub fn get_content_uuid(&self) -> Option<Uuid> {
|
||||
match self {
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} => Some(*content_uuid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_uid(&self) -> Option<&str> {
|
||||
match &self.content {
|
||||
TaskContent::DocumentAddition { index_uid, .. }
|
||||
| TaskContent::DocumentDeletion { index_uid, .. }
|
||||
| TaskContent::SettingsUpdate { index_uid, .. }
|
||||
| TaskContent::IndexDeletion { index_uid }
|
||||
| TaskContent::IndexCreation { index_uid, .. }
|
||||
| TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()),
|
||||
TaskContent::Dump { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexUid {
|
||||
pub fn into_inner(self) -> String {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Return a reference over the inner str.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for IndexUid {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@ use std::{
|
||||
fs::{self, File},
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use index::{Checked, Unchecked};
|
||||
@ -9,50 +10,35 @@ use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{Error, Result, Version};
|
||||
use crate::{Error, IndexMetadata, Result, Version};
|
||||
|
||||
use super::{DumpReader, IndexReader};
|
||||
|
||||
type Metadata = crate::Metadata;
|
||||
|
||||
pub fn date(dump: &Path) -> Result<OffsetDateTime> {
|
||||
let metadata = fs::read(dump.join("metadata.json"))?;
|
||||
let metadata: Metadata = serde_json::from_reader(&*metadata)?;
|
||||
Ok(metadata.dump_date)
|
||||
}
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Uuid,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
keys: BufReader<File>,
|
||||
}
|
||||
|
||||
struct V6IndexReader {
|
||||
name: String,
|
||||
metadata: IndexMetadata,
|
||||
documents: BufReader<File>,
|
||||
settings: BufReader<File>,
|
||||
}
|
||||
|
||||
impl V6IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
let ret = V6IndexReader {
|
||||
name,
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
pub fn open(dump: TempDir) -> Result<Self> {
|
||||
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
||||
let metadata = serde_json::from_reader(&*meta_file)?;
|
||||
let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?;
|
||||
let instance_uid = Uuid::from_str(&instance_uid)?;
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata,
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
dump,
|
||||
@ -60,6 +46,20 @@ impl V6Reader {
|
||||
}
|
||||
}
|
||||
|
||||
impl V6IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
let metadata = File::open(path.join("metadata.json"))?;
|
||||
|
||||
let ret = V6IndexReader {
|
||||
metadata: serde_json::from_reader(metadata)?,
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpReader for V6Reader {
|
||||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = index::Settings<Checked>;
|
||||
@ -78,8 +78,7 @@ impl DumpReader for V6Reader {
|
||||
}
|
||||
|
||||
fn instance_uid(&self) -> Result<Option<Uuid>> {
|
||||
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
|
||||
Ok(Some(Uuid::parse_str(&uuid)?))
|
||||
Ok(Some(self.instance_uid))
|
||||
}
|
||||
|
||||
fn indexes(
|
||||
@ -87,15 +86,15 @@ impl DumpReader for V6Reader {
|
||||
) -> Result<
|
||||
Box<
|
||||
dyn Iterator<
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn super::IndexReader<
|
||||
Document = Self::Document,
|
||||
Settings = Self::Settings,
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn super::IndexReader<
|
||||
Document = Self::Document,
|
||||
Settings = Self::Settings,
|
||||
> + '_,
|
||||
>,
|
||||
>,
|
||||
>,
|
||||
>,
|
||||
> + '_,
|
||||
>,
|
||||
> {
|
||||
let entries = fs::read_dir(self.dump.path().join("indexes"))?;
|
||||
@ -164,8 +163,8 @@ impl IndexReader for V6IndexReader {
|
||||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = index::Settings<Checked>;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
&self.name
|
||||
fn metadata(&self) -> &IndexMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> {
|
||||
|
@ -8,13 +8,12 @@ use flate2::{write::GzEncoder, Compression};
|
||||
use index::{Checked, Settings};
|
||||
use index_scheduler::TaskView;
|
||||
use meilisearch_auth::Key;
|
||||
use serde::Serialize;
|
||||
use serde_json::{Map, Value};
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{Metadata, Result, CURRENT_DUMP_VERSION};
|
||||
use crate::{IndexMetadata, Metadata, Result, CURRENT_DUMP_VERSION};
|
||||
|
||||
pub struct DumpWriter {
|
||||
dir: TempDir,
|
||||
@ -23,8 +22,9 @@ pub struct DumpWriter {
|
||||
impl DumpWriter {
|
||||
pub fn new(instance_uuid: Uuid) -> Result<DumpWriter> {
|
||||
let dir = TempDir::new()?;
|
||||
|
||||
fs::write(
|
||||
dir.path().join("instance-uid"),
|
||||
dir.path().join("instance_uid.uuid"),
|
||||
&instance_uuid.as_hyphenated().to_string(),
|
||||
)?;
|
||||
|
||||
@ -43,8 +43,8 @@ impl DumpWriter {
|
||||
Ok(DumpWriter { dir })
|
||||
}
|
||||
|
||||
pub fn create_index(&self, index_name: &str) -> Result<IndexWriter> {
|
||||
IndexWriter::new(self.dir.path().join("indexes").join(index_name))
|
||||
pub fn create_index(&self, index_name: &str, metadata: &IndexMetadata) -> Result<IndexWriter> {
|
||||
IndexWriter::new(self.dir.path().join("indexes").join(index_name), metadata)
|
||||
}
|
||||
|
||||
pub fn create_keys(&self) -> Result<KeyWriter> {
|
||||
@ -126,9 +126,12 @@ pub struct IndexWriter {
|
||||
}
|
||||
|
||||
impl IndexWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
pub(self) fn new(path: PathBuf, metadata: &IndexMetadata) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
|
||||
let metadata_file = File::create(path.join("metadata.json"))?;
|
||||
serde_json::to_writer(metadata_file, metadata)?;
|
||||
|
||||
let documents = File::create(path.join("documents.jsonl"))?;
|
||||
let settings = File::create(path.join("settings.json"))?;
|
||||
|
||||
@ -243,14 +246,15 @@ pub(crate) mod test {
|
||||
├---- indexes/
|
||||
│ └---- doggos/
|
||||
│ │ ├---- settings.json
|
||||
│ │ └---- documents.jsonl
|
||||
│ │ ├---- documents.jsonl
|
||||
│ │ └---- metadata.json
|
||||
├---- tasks/
|
||||
│ ├---- update_files/
|
||||
│ │ └---- 1
|
||||
│ └---- queue.jsonl
|
||||
├---- keys.jsonl
|
||||
├---- metadata.json
|
||||
└---- instance-uid
|
||||
└---- instance_uid.uuid
|
||||
"###);
|
||||
|
||||
// ==== checking the top level infos
|
||||
@ -264,7 +268,7 @@ pub(crate) mod test {
|
||||
}
|
||||
"###);
|
||||
|
||||
let instance_uid = fs::read_to_string(dump_path.join("instance-uid")).unwrap();
|
||||
let instance_uid = fs::read_to_string(dump_path.join("instance_uid.uuid")).unwrap();
|
||||
assert_eq!(
|
||||
Uuid::from_str(&instance_uid).unwrap(),
|
||||
create_test_instance_uid()
|
||||
@ -284,6 +288,16 @@ pub(crate) mod test {
|
||||
serde_json::from_str::<Settings<Unchecked>>(&test_settings).unwrap(),
|
||||
create_test_settings().into_unchecked()
|
||||
);
|
||||
let metadata = fs::read_to_string(dump_path.join("indexes/doggos/metadata.json")).unwrap();
|
||||
let metadata: IndexMetadata = serde_json::from_str(&metadata).unwrap();
|
||||
insta::assert_json_snapshot!(metadata, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }, @r###"
|
||||
{
|
||||
"uid": "doggo",
|
||||
"primaryKey": null,
|
||||
"createdAt": "[date]",
|
||||
"updatedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// ==== checking the task queue
|
||||
let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap();
|
||||
|
Loading…
x
Reference in New Issue
Block a user