mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
introduce the index metadata
This commit is contained in:
parent
b5ebab5c66
commit
22b2fa0576
8 changed files with 753 additions and 53 deletions
|
@ -12,7 +12,7 @@ use tempfile::TempDir;
|
|||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{Result, Version};
|
||||
use crate::{IndexMetadata, Result, Version};
|
||||
|
||||
// use self::loaders::{v2, v3, v4, v5};
|
||||
|
||||
|
@ -20,6 +20,7 @@ use crate::{Result, Version};
|
|||
// mod compat;
|
||||
// mod loaders;
|
||||
// mod v1;
|
||||
mod v5;
|
||||
mod v6;
|
||||
|
||||
pub fn open(
|
||||
|
@ -97,10 +98,13 @@ pub trait DumpReader {
|
|||
) -> Result<
|
||||
Box<
|
||||
dyn Iterator<
|
||||
Item = Result<
|
||||
Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>,
|
||||
>,
|
||||
>,
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn IndexReader<Document = Self::Document, Settings = Self::Settings>
|
||||
+ '_,
|
||||
>,
|
||||
>,
|
||||
> + '_,
|
||||
>,
|
||||
>;
|
||||
|
||||
|
@ -117,7 +121,7 @@ pub trait IndexReader {
|
|||
type Document;
|
||||
type Settings;
|
||||
|
||||
fn name(&self) -> &str;
|
||||
fn metadata(&self) -> &IndexMetadata;
|
||||
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>>;
|
||||
fn settings(&mut self) -> Result<Self::Settings>;
|
||||
}
|
||||
|
|
14
dump/src/reader/v5/meta.rs
Normal file
14
dump/src/reader/v5/meta.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexUuid {
|
||||
pub uid: String,
|
||||
pub index_meta: IndexMeta,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexMeta {
|
||||
pub uuid: Uuid,
|
||||
pub creation_task_id: usize,
|
||||
}
|
221
dump/src/reader/v5/mod.rs
Normal file
221
dump/src/reader/v5/mod.rs
Normal file
|
@ -0,0 +1,221 @@
|
|||
//! Here is what a dump v5 look like.
|
||||
//!
|
||||
//! ```text
|
||||
//! .
|
||||
//! ├── indexes
|
||||
//! │ ├── 22c269d8-fbbd-4416-bd46-7c7c02849325
|
||||
//! │ │ ├── documents.jsonl
|
||||
//! │ │ └── meta.json
|
||||
//! │ ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8
|
||||
//! │ │ ├── documents.jsonl
|
||||
//! │ │ └── meta.json
|
||||
//! │ └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa
|
||||
//! │ ├── documents.jsonl
|
||||
//! │ └── meta.json
|
||||
//! ├── index_uuids
|
||||
//! │ └── data.jsonl
|
||||
//! ├── instance-uid
|
||||
//! ├── keys
|
||||
//! ├── metadata.json
|
||||
//! └── updates
|
||||
//! ├── data.jsonl
|
||||
//! └── updates_files
|
||||
//! └── c83a004a-da98-4b94-b245-3256266c7281
|
||||
//! ```
|
||||
//!
|
||||
//! Here is what `index_uuids/data.jsonl` looks like;
|
||||
//!
|
||||
//! ```json
|
||||
//! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}}
|
||||
//! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}}
|
||||
//! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}}
|
||||
//! ```
|
||||
//!
|
||||
|
||||
use std::{
|
||||
fs::{self, File},
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{IndexMetadata, Result, Version};
|
||||
|
||||
use self::{
|
||||
meta::IndexUuid,
|
||||
settings::{Checked, Settings, Unchecked},
|
||||
tasks::Task,
|
||||
};
|
||||
|
||||
use super::{DumpReader, IndexReader};
|
||||
|
||||
mod meta;
|
||||
mod settings;
|
||||
mod tasks;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Metadata {
|
||||
db_version: String,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
dump_date: OffsetDateTime,
|
||||
}
|
||||
|
||||
pub struct V5Reader {
|
||||
dump: TempDir,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
keys: BufReader<File>,
|
||||
index_uuid: Vec<IndexUuid>,
|
||||
}
|
||||
|
||||
struct V5IndexReader {
|
||||
metadata: IndexMetadata,
|
||||
|
||||
documents: BufReader<File>,
|
||||
settings: BufReader<File>,
|
||||
}
|
||||
|
||||
impl V5IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
let metadata = File::open(path.join("metadata.json"))?;
|
||||
|
||||
let ret = V5IndexReader {
|
||||
metadata: serde_json::from_reader(metadata)?,
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl V5Reader {
|
||||
pub fn open(dump: TempDir) -> Result<Self> {
|
||||
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
||||
let metadata = serde_json::from_reader(&*meta_file)?;
|
||||
let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?;
|
||||
let index_uuid = BufReader::new(index_uuid);
|
||||
let index_uuid = index_uuid
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(V5Reader {
|
||||
metadata,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
index_uuid,
|
||||
dump,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpReader for V5Reader {
|
||||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = Settings<Checked>;
|
||||
|
||||
type Task = Task;
|
||||
type UpdateFile = File;
|
||||
|
||||
// TODO: remove this
|
||||
type Key = meilisearch_auth::Key;
|
||||
|
||||
fn version(&self) -> Version {
|
||||
Version::V5
|
||||
}
|
||||
|
||||
fn date(&self) -> Option<OffsetDateTime> {
|
||||
Some(self.metadata.dump_date)
|
||||
}
|
||||
|
||||
fn instance_uid(&self) -> Result<Option<Uuid>> {
|
||||
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
|
||||
Ok(Some(Uuid::parse_str(&uuid)?))
|
||||
}
|
||||
|
||||
fn indexes(
|
||||
&self,
|
||||
) -> Result<
|
||||
Box<
|
||||
dyn Iterator<
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn super::IndexReader<
|
||||
Document = Self::Document,
|
||||
Settings = Self::Settings,
|
||||
> + '_,
|
||||
>,
|
||||
>,
|
||||
> + '_,
|
||||
>,
|
||||
> {
|
||||
Ok(Box::new(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||
Ok(Box::new(V5IndexReader::new(
|
||||
index.uid.clone(),
|
||||
&self
|
||||
.dump
|
||||
.path()
|
||||
.join("indexes")
|
||||
.join(index.index_meta.uuid.to_string()),
|
||||
)?)
|
||||
as Box<
|
||||
dyn IndexReader<Document = Self::Document, Settings = Self::Settings>,
|
||||
>)
|
||||
})))
|
||||
}
|
||||
|
||||
fn tasks(
|
||||
&mut self,
|
||||
) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>> + '_> {
|
||||
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
|
||||
let task: Self::Task = serde_json::from_str(&line?)?;
|
||||
if let Some(uuid) = task.get_content_uuid() {
|
||||
let update_file_path = self
|
||||
.dump
|
||||
.path()
|
||||
.join("updates")
|
||||
.join("update_files")
|
||||
.join(uuid.to_string());
|
||||
Ok((task, Some(File::open(update_file_path)?)))
|
||||
} else {
|
||||
Ok((task, None))
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
// TODO: do it
|
||||
fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Self::Key>> + '_> {
|
||||
Box::new(
|
||||
(&mut self.keys)
|
||||
.lines()
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexReader for V5IndexReader {
|
||||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = Settings<Checked>;
|
||||
|
||||
fn metadata(&self) -> &IndexMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> {
|
||||
Ok(Box::new((&mut self.documents).lines().map(
|
||||
|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) },
|
||||
)))
|
||||
}
|
||||
|
||||
fn settings(&mut self) -> Result<Self::Settings> {
|
||||
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
Ok(settings.check())
|
||||
}
|
||||
}
|
251
dump/src/reader/v5/settings.rs
Normal file
251
dump/src/reader/v5/settings.rs
Normal file
|
@ -0,0 +1,251 @@
|
|||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
marker::PhantomData,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct Unchecked;
|
||||
|
||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
||||
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(default)]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default)]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default)]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default)]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default)]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
#[serde(default)]
|
||||
pub typo_tolerance: Setting<TypoSettings>,
|
||||
#[serde(default)]
|
||||
pub faceting: Setting<FacetingSettings>,
|
||||
#[serde(default)]
|
||||
pub pagination: Setting<PaginationSettings>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let wildcard = vec!["*".to_string()];
|
||||
match field {
|
||||
Setting::Set(value) => Some(value),
|
||||
Setting::Reset => Some(&wildcard),
|
||||
Setting::NotSet => None,
|
||||
}
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Copy)]
|
||||
pub enum Setting<T> {
|
||||
Set(T),
|
||||
Reset,
|
||||
NotSet,
|
||||
}
|
||||
|
||||
impl<T> Default for Setting<T> {
|
||||
fn default() -> Self {
|
||||
Self::NotSet
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Setting<T> {
|
||||
pub fn set(self) -> Option<T> {
|
||||
match self {
|
||||
Self::Set(value) => Some(value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn as_ref(&self) -> Setting<&T> {
|
||||
match *self {
|
||||
Self::Set(ref value) => Setting::Set(value),
|
||||
Self::Reset => Setting::Reset,
|
||||
Self::NotSet => Setting::NotSet,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn is_not_set(&self) -> bool {
|
||||
matches!(self, Self::NotSet)
|
||||
}
|
||||
|
||||
/// If `Self` is `Reset`, then map self to `Set` with the provided `val`.
|
||||
pub fn or_reset(self, val: T) -> Self {
|
||||
match self {
|
||||
Self::Reset => Self::Set(val),
|
||||
otherwise => otherwise,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
|
||||
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(|x| match x {
|
||||
Some(x) => Self::Set(x),
|
||||
None => Self::Reset, // Reset is forced by sending null value
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MinWordSizeTyposSetting {
|
||||
#[serde(default)]
|
||||
pub one_typo: Setting<u8>,
|
||||
#[serde(default)]
|
||||
pub two_typos: Setting<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TypoSettings {
|
||||
#[serde(default)]
|
||||
pub enabled: Setting<bool>,
|
||||
#[serde(default)]
|
||||
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
|
||||
#[serde(default)]
|
||||
pub disable_on_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default)]
|
||||
pub disable_on_attributes: Setting<BTreeSet<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FacetingSettings {
|
||||
#[serde(default)]
|
||||
pub max_values_per_facet: Setting<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PaginationSettings {
|
||||
#[serde(default)]
|
||||
pub max_total_hits: Setting<usize>,
|
||||
}
|
||||
|
||||
impl Settings<Checked> {
|
||||
pub fn cleared() -> Settings<Checked> {
|
||||
Settings {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
pagination: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
||||
let Self {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
..
|
||||
} = self;
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
faceting,
|
||||
pagination,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Settings<Unchecked> {
|
||||
pub fn check(self) -> Settings<Checked> {
|
||||
let displayed_attributes = match self.displayed_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
let searchable_attributes = match self.searchable_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
pagination: self.pagination,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
173
dump/src/reader/v5/tasks.rs
Normal file
173
dump/src/reader/v5/tasks.rs
Normal file
|
@ -0,0 +1,173 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::settings::{Settings, Unchecked};
|
||||
|
||||
pub type TaskId = u32;
|
||||
pub type BatchId = u32;
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq)]
|
||||
pub struct Task {
|
||||
pub id: TaskId,
|
||||
/// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task)
|
||||
/// then this is None
|
||||
// TODO: when next forward breaking dumps, it would be a good idea to move this field inside of
|
||||
// the TaskContent.
|
||||
pub content: TaskContent,
|
||||
pub events: Vec<TaskEvent>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum TaskContent {
|
||||
DocumentAddition {
|
||||
index_uid: IndexUid,
|
||||
content_uuid: Uuid,
|
||||
merge_strategy: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
documents_count: usize,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
DocumentDeletion {
|
||||
index_uid: IndexUid,
|
||||
deletion: DocumentDeletion,
|
||||
},
|
||||
SettingsUpdate {
|
||||
index_uid: IndexUid,
|
||||
settings: Settings<Unchecked>,
|
||||
/// Indicates whether the task was a deletion
|
||||
is_deletion: bool,
|
||||
allow_index_creation: bool,
|
||||
},
|
||||
IndexDeletion {
|
||||
index_uid: IndexUid,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
IndexUpdate {
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
Dump {
|
||||
uid: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct IndexUid(String);
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum IndexDocumentsMethod {
|
||||
/// Replace the previous document with the new one,
|
||||
/// removing all the already known attributes.
|
||||
ReplaceDocuments,
|
||||
|
||||
/// Merge the previous version of the document with the new version,
|
||||
/// replacing old attributes values with the new ones and add the new attributes.
|
||||
UpdateDocuments,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum DocumentDeletion {
|
||||
Clear,
|
||||
Ids(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum TaskEvent {
|
||||
Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
||||
Batched {
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
batch_id: BatchId,
|
||||
},
|
||||
Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime),
|
||||
Succeeded {
|
||||
result: TaskResult,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
Failed {
|
||||
error: ResponseError,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
timestamp: OffsetDateTime,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum TaskResult {
|
||||
DocumentAddition { indexed_documents: u64 },
|
||||
DocumentDeletion { deleted_documents: u64 },
|
||||
ClearAll { deleted_documents: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResponseError {
|
||||
message: String,
|
||||
#[serde(rename = "code")]
|
||||
error_code: String,
|
||||
#[serde(rename = "type")]
|
||||
error_type: String,
|
||||
#[serde(rename = "link")]
|
||||
error_link: String,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
/// Return true when a task is finished.
|
||||
/// A task is finished when its last state is either `Succeeded` or `Failed`.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
self.events.last().map_or(false, |event| {
|
||||
matches!(
|
||||
event,
|
||||
TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the content_uuid of the `Task` if there is one.
|
||||
pub fn get_content_uuid(&self) -> Option<Uuid> {
|
||||
match self {
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} => Some(*content_uuid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_uid(&self) -> Option<&str> {
|
||||
match &self.content {
|
||||
TaskContent::DocumentAddition { index_uid, .. }
|
||||
| TaskContent::DocumentDeletion { index_uid, .. }
|
||||
| TaskContent::SettingsUpdate { index_uid, .. }
|
||||
| TaskContent::IndexDeletion { index_uid }
|
||||
| TaskContent::IndexCreation { index_uid, .. }
|
||||
| TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()),
|
||||
TaskContent::Dump { .. } => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexUid {
|
||||
pub fn into_inner(self) -> String {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Return a reference over the inner str.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for IndexUid {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@ use std::{
|
|||
fs::{self, File},
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use index::{Checked, Unchecked};
|
||||
|
@ -9,50 +10,35 @@ use tempfile::TempDir;
|
|||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{Error, Result, Version};
|
||||
use crate::{Error, IndexMetadata, Result, Version};
|
||||
|
||||
use super::{DumpReader, IndexReader};
|
||||
|
||||
type Metadata = crate::Metadata;
|
||||
|
||||
pub fn date(dump: &Path) -> Result<OffsetDateTime> {
|
||||
let metadata = fs::read(dump.join("metadata.json"))?;
|
||||
let metadata: Metadata = serde_json::from_reader(&*metadata)?;
|
||||
Ok(metadata.dump_date)
|
||||
}
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Uuid,
|
||||
metadata: Metadata,
|
||||
tasks: BufReader<File>,
|
||||
keys: BufReader<File>,
|
||||
}
|
||||
|
||||
struct V6IndexReader {
|
||||
name: String,
|
||||
metadata: IndexMetadata,
|
||||
documents: BufReader<File>,
|
||||
settings: BufReader<File>,
|
||||
}
|
||||
|
||||
impl V6IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
let ret = V6IndexReader {
|
||||
name,
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
pub fn open(dump: TempDir) -> Result<Self> {
|
||||
let meta_file = fs::read(dump.path().join("metadata.json"))?;
|
||||
let metadata = serde_json::from_reader(&*meta_file)?;
|
||||
let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?;
|
||||
let instance_uid = Uuid::from_str(&instance_uid)?;
|
||||
|
||||
Ok(V6Reader {
|
||||
metadata,
|
||||
metadata: serde_json::from_reader(&*meta_file)?,
|
||||
instance_uid,
|
||||
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
|
||||
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
|
||||
dump,
|
||||
|
@ -60,6 +46,20 @@ impl V6Reader {
|
|||
}
|
||||
}
|
||||
|
||||
impl V6IndexReader {
|
||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
||||
let metadata = File::open(path.join("metadata.json"))?;
|
||||
|
||||
let ret = V6IndexReader {
|
||||
metadata: serde_json::from_reader(metadata)?,
|
||||
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
|
||||
settings: BufReader::new(File::open(path.join("settings.json"))?),
|
||||
};
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpReader for V6Reader {
|
||||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = index::Settings<Checked>;
|
||||
|
@ -78,8 +78,7 @@ impl DumpReader for V6Reader {
|
|||
}
|
||||
|
||||
fn instance_uid(&self) -> Result<Option<Uuid>> {
|
||||
let uuid = fs::read_to_string(self.dump.path().join("instance-uid"))?;
|
||||
Ok(Some(Uuid::parse_str(&uuid)?))
|
||||
Ok(Some(self.instance_uid))
|
||||
}
|
||||
|
||||
fn indexes(
|
||||
|
@ -87,15 +86,15 @@ impl DumpReader for V6Reader {
|
|||
) -> Result<
|
||||
Box<
|
||||
dyn Iterator<
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn super::IndexReader<
|
||||
Document = Self::Document,
|
||||
Settings = Self::Settings,
|
||||
Item = Result<
|
||||
Box<
|
||||
dyn super::IndexReader<
|
||||
Document = Self::Document,
|
||||
Settings = Self::Settings,
|
||||
> + '_,
|
||||
>,
|
||||
>,
|
||||
>,
|
||||
>,
|
||||
> + '_,
|
||||
>,
|
||||
> {
|
||||
let entries = fs::read_dir(self.dump.path().join("indexes"))?;
|
||||
|
@ -164,8 +163,8 @@ impl IndexReader for V6IndexReader {
|
|||
type Document = serde_json::Map<String, serde_json::Value>;
|
||||
type Settings = index::Settings<Checked>;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
&self.name
|
||||
fn metadata(&self) -> &IndexMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>> + '_>> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue