From 61b3a29ff3e67828c82f1ab8b68d6e4d5af5de76 Mon Sep 17 00:00:00 2001 From: funilrys Date: Mon, 31 Oct 2022 18:18:35 +0100 Subject: [PATCH 01/11] Extract the dates out of the dumpv5. This patch possibly fixes #2986. This patch introduces a way to fill the IndexMetadata.created_at and IndexMetadata.updated_at keys from the tasks events. This is done by reading the creation date of the first event (created_at) and the creation date of the last event (updated_at). --- dump/src/reader/v5/mod.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs index 98c6c26dd..77efc7a81 100644 --- a/dump/src/reader/v5/mod.rs +++ b/dump/src/reader/v5/mod.rs @@ -56,6 +56,7 @@ pub type Checked = settings::Checked; pub type Unchecked = settings::Unchecked; pub type Task = tasks::Task; +pub type TaskEvent = tasks::TaskEvent; pub type Key = keys::Key; // ===== Other types to clarify the code of the compat module @@ -141,6 +142,7 @@ impl V5Reader { V5IndexReader::new( index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), + BufReader::new(self.tasks.get_ref().try_clone().unwrap()), ) })) } @@ -189,16 +191,31 @@ pub struct V5IndexReader { } impl V5IndexReader { - pub fn new(name: String, path: &Path) -> Result { + pub fn new(name: String, path: &Path, tasks: BufReader) -> Result { let meta = File::open(path.join("meta.json"))?; let meta: meta::DumpMeta = serde_json::from_reader(meta)?; + let mut index_tasks: Vec = vec![]; + + for line in tasks.lines() { + let task: Task = serde_json::from_str(&line?)?; + + if task.index_uid().unwrap_or_default() == name { + index_tasks.push(task) + } + } + let metadata = IndexMetadata { uid: name, primary_key: meta.primary_key, - // FIXME: Iterate over the whole task queue to find the creation and last update date. - created_at: OffsetDateTime::now_utc(), - updated_at: OffsetDateTime::now_utc(), + created_at: match index_tasks.first().unwrap().events.first() { + Some(TaskEvent::Created(ts)) => *ts, + _ => OffsetDateTime::now_utc(), + }, + updated_at: match index_tasks.last().unwrap().events.last() { + Some(TaskEvent::Created(ts)) => *ts, + _ => OffsetDateTime::now_utc(), + }, }; let ret = V5IndexReader { From cf50f8598697a5ce44cc86be8e178e0c25e87fb5 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 18:53:41 +0100 Subject: [PATCH 02/11] Add reader.v5.tasks.Task.processed_at. There was no way to "quickly" get the processed date. --- dump/src/reader/v5/tasks.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs index 125e20559..61397dd24 100644 --- a/dump/src/reader/v5/tasks.rs +++ b/dump/src/reader/v5/tasks.rs @@ -140,6 +140,13 @@ impl Task { TaskContent::Dump { .. } => None, } } + + pub fn processed_at(&self) -> Option { + match self.events.last() { + Some(TaskEvent::Succeeded { result: _, timestamp }) => Some(*timestamp), + _ => None, + } + } } impl IndexUid { From 1be4619b91bab19920bd2ed3b25fe5ed0b933d41 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 18:59:08 +0100 Subject: [PATCH 03/11] Add reader.v5.tasks.Task.created_at. There was no way to "quickly" get the creation date. --- dump/src/reader/v5/tasks.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs index 61397dd24..528a870fc 100644 --- a/dump/src/reader/v5/tasks.rs +++ b/dump/src/reader/v5/tasks.rs @@ -147,6 +147,38 @@ impl Task { _ => None, } } + + pub fn created_at(&self) -> Option { + match &self.content { + TaskContent::IndexCreation { index_uid: _, primary_key: _ } => { + match self.events.first() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + } + } + TaskContent::DocumentAddition { + index_uid: _, + content_uuid: _, + merge_strategy: _, + primary_key: _, + documents_count: _, + allow_index_creation: _, + } => match self.events.first() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + }, + TaskContent::SettingsUpdate { + index_uid: _, + settings: _, + is_deletion: _, + allow_index_creation: _, + } => match self.events.first() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + }, + _ => None, + } + } } impl IndexUid { From a43a0712fa1481dea7c931e7f6bffa62ca97d429 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 19:01:51 +0100 Subject: [PATCH 04/11] Add reader.v5.tasks.Task.updated_at. There was no way to "quickly" get the update date. --- dump/src/reader/v5/tasks.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs index 528a870fc..17c46f34d 100644 --- a/dump/src/reader/v5/tasks.rs +++ b/dump/src/reader/v5/tasks.rs @@ -179,6 +179,14 @@ impl Task { _ => None, } } + + + pub fn updated_at(&self) -> Option { + match self.events.last() { + Some(TaskEvent::Created(ts)) => Some(*ts), + _ => None, + } + } } impl IndexUid { From 13fb5ce974caddf21168d44e122fd4c5dc31c388 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 19:03:01 +0100 Subject: [PATCH 05/11] Re-Open tasks list when needed. Indeed, before this patch we were using the reference instead of "reopening" the task list each time we needed to access it. Without this patch, all other usage of the task attribute will break. --- dump/src/reader/v5/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs index 77efc7a81..71a3c87a7 100644 --- a/dump/src/reader/v5/mod.rs +++ b/dump/src/reader/v5/mod.rs @@ -142,7 +142,7 @@ impl V5Reader { V5IndexReader::new( index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), - BufReader::new(self.tasks.get_ref().try_clone().unwrap()), + BufReader::new(File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap()), ) })) } From 528a944997a9d9ed7546f52595e3fbcaea52f9a2 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 19:10:38 +0100 Subject: [PATCH 06/11] Reimplement v5 date extraction. Indeed, before this patch the implementation wasn't correct. --- dump/src/reader/v5/mod.rs | 41 ++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs index 71a3c87a7..46209d105 100644 --- a/dump/src/reader/v5/mod.rs +++ b/dump/src/reader/v5/mod.rs @@ -142,7 +142,10 @@ impl V5Reader { V5IndexReader::new( index.uid.clone(), &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), - BufReader::new(File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap()), + &index.index_meta, + BufReader::new( + File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap(), + ), ) })) } @@ -191,31 +194,43 @@ pub struct V5IndexReader { } impl V5IndexReader { - pub fn new(name: String, path: &Path, tasks: BufReader) -> Result { + pub fn new( + name: String, + path: &Path, + index_metadata: &meta::IndexMeta, + tasks: BufReader, + ) -> Result { let meta = File::open(path.join("meta.json"))?; let meta: meta::DumpMeta = serde_json::from_reader(meta)?; - let mut index_tasks: Vec = vec![]; + let mut created_at = None; + let mut updated_at = None; for line in tasks.lines() { let task: Task = serde_json::from_str(&line?)?; - if task.index_uid().unwrap_or_default() == name { - index_tasks.push(task) + if task.index_uid().unwrap_or_default().to_string() == name { + if updated_at.is_none() { + updated_at = task.updated_at() + } + + if created_at.is_none() { + created_at = task.created_at() + } + + if task.id as usize == index_metadata.creation_task_id { + created_at = task.processed_at(); + + break; + } } } let metadata = IndexMetadata { uid: name, primary_key: meta.primary_key, - created_at: match index_tasks.first().unwrap().events.first() { - Some(TaskEvent::Created(ts)) => *ts, - _ => OffsetDateTime::now_utc(), - }, - updated_at: match index_tasks.last().unwrap().events.last() { - Some(TaskEvent::Created(ts)) => *ts, - _ => OffsetDateTime::now_utc(), - }, + created_at: created_at.unwrap_or_else(OffsetDateTime::now_utc), + updated_at: updated_at.unwrap_or_else(OffsetDateTime::now_utc), }; let ret = V5IndexReader { From c07a5932cbf8ba5dbef3587fa68057484da3a262 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 19:11:27 +0100 Subject: [PATCH 07/11] Apply fmt. --- dump/src/reader/v5/tasks.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs index 17c46f34d..7f64eb3f3 100644 --- a/dump/src/reader/v5/tasks.rs +++ b/dump/src/reader/v5/tasks.rs @@ -180,7 +180,6 @@ impl Task { } } - pub fn updated_at(&self) -> Option { match self.events.last() { Some(TaskEvent::Created(ts)) => Some(*ts), From e2775c6f49a511552618e0e54fd81d9c8ed068e1 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 19:13:06 +0100 Subject: [PATCH 08/11] Remove unused object. --- dump/src/reader/v5/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs index 46209d105..150e4d16f 100644 --- a/dump/src/reader/v5/mod.rs +++ b/dump/src/reader/v5/mod.rs @@ -56,7 +56,6 @@ pub type Checked = settings::Checked; pub type Unchecked = settings::Unchecked; pub type Task = tasks::Task; -pub type TaskEvent = tasks::TaskEvent; pub type Key = keys::Key; // ===== Other types to clarify the code of the compat module From 4e6c663a2e1226253482042a65a78517fe998419 Mon Sep 17 00:00:00 2001 From: funilrys Date: Sat, 3 Dec 2022 19:14:24 +0100 Subject: [PATCH 09/11] Release unecessary ownership. --- dump/src/reader/v5/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs index 150e4d16f..d8f6be489 100644 --- a/dump/src/reader/v5/mod.rs +++ b/dump/src/reader/v5/mod.rs @@ -208,7 +208,7 @@ impl V5IndexReader { for line in tasks.lines() { let task: Task = serde_json::from_str(&line?)?; - if task.index_uid().unwrap_or_default().to_string() == name { + if *task.index_uid().unwrap_or_default().to_string() == name { if updated_at.is_none() { updated_at = task.updated_at() } From 0bc4572905a0088b61dd866d43bd63cf9f7fa2a5 Mon Sep 17 00:00:00 2001 From: funilrys Date: Thu, 22 Dec 2022 17:53:33 +0100 Subject: [PATCH 10/11] Adjust + Cleanup changes. Indeed, I missed some of the changed that were introduced by #3190. --- dump/src/reader/v5/mod.rs | 8 ++------ dump/src/reader/v5/tasks.rs | 7 ------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs index d8f6be489..1e70b3437 100644 --- a/dump/src/reader/v5/mod.rs +++ b/dump/src/reader/v5/mod.rs @@ -210,15 +210,11 @@ impl V5IndexReader { if *task.index_uid().unwrap_or_default().to_string() == name { if updated_at.is_none() { - updated_at = task.updated_at() - } - - if created_at.is_none() { - created_at = task.created_at() + updated_at = task.processed_at() } if task.id as usize == index_metadata.creation_task_id { - created_at = task.processed_at(); + created_at = task.created_at(); break; } diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs index 7f64eb3f3..528a870fc 100644 --- a/dump/src/reader/v5/tasks.rs +++ b/dump/src/reader/v5/tasks.rs @@ -179,13 +179,6 @@ impl Task { _ => None, } } - - pub fn updated_at(&self) -> Option { - match self.events.last() { - Some(TaskEvent::Created(ts)) => Some(*ts), - _ => None, - } - } } impl IndexUid { From 3e0e8164a3841b2fce55962db06f1e4d2a273703 Mon Sep 17 00:00:00 2001 From: funilrys Date: Thu, 22 Dec 2022 18:01:54 +0100 Subject: [PATCH 11/11] fixup! Adjust + Cleanup changes. --- dump/src/reader/v5/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs index 1e70b3437..04b9ba508 100644 --- a/dump/src/reader/v5/mod.rs +++ b/dump/src/reader/v5/mod.rs @@ -143,7 +143,7 @@ impl V5Reader { &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), &index.index_meta, BufReader::new( - File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap(), + File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(), ), ) }))