From 3fb67f94f796bfe451be20e8852cd908b066a351 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 29 Mar 2023 14:44:15 +0200 Subject: [PATCH] Reduce the time to import a dump by caching some datas With this commit, for a dump containing 1M tasks we went form 1m02 to 6s --- index-scheduler/src/lib.rs | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 3e7c85148..296029435 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -31,6 +31,7 @@ mod uuid_codec; pub type Result = std::result::Result; pub type TaskId = u32; +use std::collections::HashMap; use std::ops::{Bound, RangeBounds}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -1114,6 +1115,10 @@ impl IndexScheduler { pub struct Dump<'a> { index_scheduler: &'a IndexScheduler, wtxn: RwTxn<'a, 'a>, + + indexes: HashMap, + statuses: HashMap, + kinds: HashMap, } impl<'a> Dump<'a> { @@ -1121,7 +1126,13 @@ impl<'a> Dump<'a> { // While loading a dump no one should be able to access the scheduler thus I can block everything. let wtxn = index_scheduler.env.write_txn()?; - Ok(Dump { index_scheduler, wtxn }) + Ok(Dump { + index_scheduler, + wtxn, + indexes: HashMap::new(), + statuses: HashMap::new(), + kinds: HashMap::new(), + }) } /// Register a new task coming from a dump in the scheduler. @@ -1215,26 +1226,38 @@ impl<'a> Dump<'a> { self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?; for index in task.indexes() { - self.index_scheduler.update_index(&mut self.wtxn, index, |bitmap| { - bitmap.insert(task.uid); - })?; + match self.indexes.get_mut(index) { + Some(bitmap) => { + bitmap.insert(task.uid); + } + None => { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(task.uid); + self.indexes.insert(index.to_string(), bitmap); + } + }; } - - self.index_scheduler.update_status(&mut self.wtxn, task.status, |bitmap| { - bitmap.insert(task.uid); - })?; - - self.index_scheduler.update_kind(&mut self.wtxn, task.kind.as_kind(), |bitmap| { - (bitmap.insert(task.uid)); - })?; + self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid); + self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid); Ok(task) } /// Commit all the changes and exit the importing dump state - pub fn finish(self) -> Result<()> { + pub fn finish(mut self) -> Result<()> { + for (index, bitmap) in self.indexes { + self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; + } + for (status, bitmap) in self.statuses { + self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?; + } + for (kind, bitmap) in self.kinds { + self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?; + } + self.wtxn.commit()?; self.index_scheduler.wake_up.signal(); + Ok(()) } }