mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Merge #3343
3343: Extract creation and last updated timestamp for v3 dump r=curquiza a=FrancisMurillo # Pull Request ## Related issue Fixes #2988 ## What does this PR do? Inspired by the v4 dump implementation, this extracts the first `createdAt` and last `updatedAt` fields by parsing the task queue. Questions: - Should the parsing of the tasks be cached instead of being parsed for every index since it might add a performance penalty? - I am not sure if the `created_at` and `processed_at` fields are correct - Should I assume the data is sorted in some order like with `uuid` or `updateId`? I assumed the list is unordered. - I was planning to populate my dev instance with data and dump my data. Is there a way to dump with previous versions? ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Francis Murillo <evacuee.overlap.vs3op@aleeas.com>
This commit is contained in:
commit
60018d0fe4
@ -112,8 +112,11 @@ impl V3Reader {
|
|||||||
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V3IndexReader>> + '_> {
|
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V3IndexReader>> + '_> {
|
||||||
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
Ok(self.index_uuid.iter().map(|index| -> Result<_> {
|
||||||
V3IndexReader::new(
|
V3IndexReader::new(
|
||||||
index.uid.clone(),
|
|
||||||
&self.dump.path().join("indexes").join(index.uuid.to_string()),
|
&self.dump.path().join("indexes").join(index.uuid.to_string()),
|
||||||
|
index,
|
||||||
|
BufReader::new(
|
||||||
|
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@ -155,16 +158,42 @@ pub struct V3IndexReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl V3IndexReader {
|
impl V3IndexReader {
|
||||||
pub fn new(name: String, path: &Path) -> Result<Self> {
|
pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
|
||||||
let meta = File::open(path.join("meta.json"))?;
|
let meta = File::open(path.join("meta.json"))?;
|
||||||
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
let meta: DumpMeta = serde_json::from_reader(meta)?;
|
||||||
|
|
||||||
|
let mut created_at = None;
|
||||||
|
let mut updated_at = None;
|
||||||
|
|
||||||
|
for line in tasks.lines() {
|
||||||
|
let task: Task = serde_json::from_str(&line?)?;
|
||||||
|
|
||||||
|
if !(task.uuid == index_uuid.uuid && task.is_finished()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_created_at = match task.update.meta() {
|
||||||
|
Kind::DocumentAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
let new_updated_at = task.update.finished_at();
|
||||||
|
|
||||||
|
if created_at.is_none() || created_at > new_created_at {
|
||||||
|
created_at = new_created_at;
|
||||||
|
}
|
||||||
|
|
||||||
|
if updated_at.is_none() || updated_at < new_updated_at {
|
||||||
|
updated_at = new_updated_at;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let current_time = OffsetDateTime::now_utc();
|
||||||
|
|
||||||
let metadata = IndexMetadata {
|
let metadata = IndexMetadata {
|
||||||
uid: name,
|
uid: index_uuid.uid.clone(),
|
||||||
primary_key: meta.primary_key,
|
primary_key: meta.primary_key,
|
||||||
// FIXME: Iterate over the whole task queue to find the creation and last update date.
|
created_at: created_at.unwrap_or(current_time),
|
||||||
created_at: OffsetDateTime::now_utc(),
|
updated_at: updated_at.unwrap_or(current_time),
|
||||||
updated_at: OffsetDateTime::now_utc(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let ret = V3IndexReader {
|
let ret = V3IndexReader {
|
||||||
@ -263,12 +292,12 @@ pub(crate) mod test {
|
|||||||
assert!(indexes.is_empty());
|
assert!(indexes.is_empty());
|
||||||
|
|
||||||
// products
|
// products
|
||||||
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
insta::assert_json_snapshot!(products.metadata(), @r###"
|
||||||
{
|
{
|
||||||
"uid": "products",
|
"uid": "products",
|
||||||
"primaryKey": "sku",
|
"primaryKey": "sku",
|
||||||
"createdAt": "[now]",
|
"createdAt": "2022-10-07T11:38:54.74389899Z",
|
||||||
"updatedAt": "[now]"
|
"updatedAt": "2022-10-07T11:38:55.963185778Z"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -278,12 +307,12 @@ pub(crate) mod test {
|
|||||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
|
||||||
|
|
||||||
// movies
|
// movies
|
||||||
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
insta::assert_json_snapshot!(movies.metadata(), @r###"
|
||||||
{
|
{
|
||||||
"uid": "movies",
|
"uid": "movies",
|
||||||
"primaryKey": "id",
|
"primaryKey": "id",
|
||||||
"createdAt": "[now]",
|
"createdAt": "2022-10-07T11:38:54.026649575Z",
|
||||||
"updatedAt": "[now]"
|
"updatedAt": "2022-10-07T11:39:04.188852537Z"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -308,12 +337,12 @@ pub(crate) mod test {
|
|||||||
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
|
||||||
|
|
||||||
// spells
|
// spells
|
||||||
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
|
insta::assert_json_snapshot!(spells.metadata(), @r###"
|
||||||
{
|
{
|
||||||
"uid": "dnd_spells",
|
"uid": "dnd_spells",
|
||||||
"primaryKey": "index",
|
"primaryKey": "index",
|
||||||
"createdAt": "[now]",
|
"createdAt": "2022-10-07T11:38:56.265951133Z",
|
||||||
"updatedAt": "[now]"
|
"updatedAt": "2022-10-07T11:38:56.521004328Z"
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -74,6 +74,26 @@ impl UpdateStatus {
|
|||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn enqueued_at(&self) -> Option<OffsetDateTime> {
|
||||||
|
match self {
|
||||||
|
UpdateStatus::Processing(u) => Some(u.from.enqueued_at),
|
||||||
|
UpdateStatus::Enqueued(u) => Some(u.enqueued_at),
|
||||||
|
UpdateStatus::Processed(u) => Some(u.from.from.enqueued_at),
|
||||||
|
UpdateStatus::Aborted(u) => Some(u.from.enqueued_at),
|
||||||
|
UpdateStatus::Failed(u) => Some(u.from.from.enqueued_at),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finished_at(&self) -> Option<OffsetDateTime> {
|
||||||
|
match self {
|
||||||
|
UpdateStatus::Processing(_) => None,
|
||||||
|
UpdateStatus::Enqueued(_) => None,
|
||||||
|
UpdateStatus::Processed(u) => Some(u.processed_at),
|
||||||
|
UpdateStatus::Aborted(_) => None,
|
||||||
|
UpdateStatus::Failed(u) => Some(u.failed_at),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Clone)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user