mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-30 23:27:36 +01:00
store the enqueued at to eases the batch deletion
This commit is contained in:
parent
508db9020d
commit
58f90b70c7
@ -1,7 +1,7 @@
|
|||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
|
||||||
use meilisearch_types::batches::Batch;
|
use meilisearch_types::batches::{Batch, BatchEnqueuedAt};
|
||||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
|
||||||
use meilisearch_types::heed::{Database, RoTxn};
|
use meilisearch_types::heed::{Database, RoTxn};
|
||||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||||
@ -341,10 +341,14 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
|||||||
|
|
||||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||||
let mut snap = String::new();
|
let mut snap = String::new();
|
||||||
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
|
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
|
||||||
if let Some(finished_at) = finished_at {
|
if let Some(finished_at) = finished_at {
|
||||||
assert!(finished_at > started_at);
|
assert!(finished_at > started_at);
|
||||||
}
|
}
|
||||||
|
if let Some(BatchEnqueuedAt { earliest, oldest }) = enqueued_at {
|
||||||
|
assert!(started_at > earliest);
|
||||||
|
assert!(earliest >= oldest);
|
||||||
|
}
|
||||||
snap.push('{');
|
snap.push('{');
|
||||||
snap.push_str(&format!("uid: {uid}, "));
|
snap.push_str(&format!("uid: {uid}, "));
|
||||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||||
|
@ -181,6 +181,7 @@ impl BatchQueue {
|
|||||||
stats: batch.stats,
|
stats: batch.stats,
|
||||||
started_at: batch.started_at,
|
started_at: batch.started_at,
|
||||||
finished_at: batch.finished_at,
|
finished_at: batch.finished_at,
|
||||||
|
enqueued_at: batch.enqueued_at,
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -234,33 +235,36 @@ impl BatchQueue {
|
|||||||
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
|
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
|
||||||
// to the DB per batches.
|
// to the DB per batches.
|
||||||
if let Some(ref old_batch) = old_batch {
|
if let Some(ref old_batch) = old_batch {
|
||||||
let started_at = old_batch.started_at.unix_timestamp_nanos();
|
if let Some(enqueued_at) = old_batch.enqueued_at {
|
||||||
|
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
|
||||||
|
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
|
||||||
|
} else {
|
||||||
|
let started_at = old_batch.started_at.unix_timestamp_nanos();
|
||||||
|
|
||||||
// We have either one or two enqueued at to remove
|
// We have either one or two enqueued at to remove
|
||||||
let mut exit = old_batch.stats.total_nb_tasks.clamp(0, 2);
|
let mut exit = old_batch.stats.total_nb_tasks.clamp(0, 2);
|
||||||
let mut iterator = self.enqueued_at.rev_iter_mut(wtxn)?;
|
let mut iterator = self.enqueued_at.rev_iter_mut(wtxn)?;
|
||||||
while let Some(entry) = iterator.next() {
|
while let Some(entry) = iterator.next() {
|
||||||
let (key, mut value) = entry?;
|
let (key, mut value) = entry?;
|
||||||
if key > started_at {
|
if key > started_at {
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
if value.remove(old_batch.uid) {
|
|
||||||
exit = exit.saturating_sub(1);
|
|
||||||
// Safe because the key and value are owned
|
|
||||||
unsafe {
|
|
||||||
iterator.put_current(&key, &value)?;
|
|
||||||
}
|
}
|
||||||
if exit == 0 {
|
if value.remove(old_batch.uid) {
|
||||||
break;
|
exit = exit.saturating_sub(1);
|
||||||
|
// Safe because the key and value are owned
|
||||||
|
unsafe {
|
||||||
|
iterator.put_current(&key, &value)?;
|
||||||
|
}
|
||||||
|
if exit == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(enqueued_at) = batch.oldest_enqueued_at {
|
if let Some(enqueued_at) = batch.enqueued_at.as_ref() {
|
||||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
|
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
|
||||||
}
|
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
|
||||||
if let Some(enqueued_at) = batch.earliest_enqueued_at {
|
|
||||||
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the started at and finished at
|
// Update the started at and finished at
|
||||||
|
@ -102,30 +102,33 @@ fn query_batches_simple() {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(batches.len(), 1);
|
assert_eq!(batches.len(), 1);
|
||||||
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
|
||||||
|
assert!(batches[0].enqueued_at.is_some());
|
||||||
|
batches[0].enqueued_at = None;
|
||||||
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
|
||||||
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
|
||||||
snapshot!(batch, @r#"
|
snapshot!(batch, @r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
"details": {
|
"details": {
|
||||||
"primaryKey": "mouse"
|
"primaryKey": "mouse"
|
||||||
},
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
"status": {
|
"status": {
|
||||||
"processing": 1
|
"processing": 1
|
||||||
},
|
},
|
||||||
"types": {
|
"types": {
|
||||||
"indexCreation": 1
|
"indexCreation": 1
|
||||||
},
|
},
|
||||||
"indexUids": {
|
"indexUids": {
|
||||||
"catto": 1
|
"catto": 1
|
||||||
}
|
|
||||||
},
|
|
||||||
"startedAt": "1970-01-01T00:00:00Z",
|
|
||||||
"finishedAt": null
|
|
||||||
}
|
}
|
||||||
"#);
|
},
|
||||||
|
"startedAt": "1970-01-01T00:00:00Z",
|
||||||
|
"finishedAt": null,
|
||||||
|
"enqueuedAt": null
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
|
||||||
let (batches, _) = index_scheduler
|
let (batches, _) = index_scheduler
|
||||||
|
@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
|
|||||||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||||
use std::sync::atomic::Ordering;
|
use std::sync::atomic::Ordering;
|
||||||
|
|
||||||
use meilisearch_types::batches::BatchId;
|
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
use meilisearch_types::milli::{self};
|
use meilisearch_types::milli::{self};
|
||||||
@ -518,13 +518,30 @@ impl IndexScheduler {
|
|||||||
// We must remove the batch entirely
|
// We must remove the batch entirely
|
||||||
if tasks.is_empty() {
|
if tasks.is_empty() {
|
||||||
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
|
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
|
||||||
remove_n_tasks_datetime_earlier_than(
|
if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
|
||||||
wtxn,
|
remove_task_datetime(
|
||||||
self.queue.batches.started_at,
|
wtxn,
|
||||||
batch.started_at,
|
self.queue.batches.enqueued_at,
|
||||||
if batch.stats.total_nb_tasks >= 2 { 2 } else { 1 },
|
earliest,
|
||||||
batch_id,
|
batch_id,
|
||||||
)?;
|
)?;
|
||||||
|
remove_task_datetime(
|
||||||
|
wtxn,
|
||||||
|
self.queue.batches.enqueued_at,
|
||||||
|
oldest,
|
||||||
|
batch_id,
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||||
|
// and we still need to find the date by scrolling the database
|
||||||
|
remove_n_tasks_datetime_earlier_than(
|
||||||
|
wtxn,
|
||||||
|
self.queue.batches.enqueued_at,
|
||||||
|
batch.started_at,
|
||||||
|
if batch.stats.total_nb_tasks >= 2 { 2 } else { 1 },
|
||||||
|
batch_id,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
remove_task_datetime(
|
remove_task_datetime(
|
||||||
wtxn,
|
wtxn,
|
||||||
self.queue.batches.started_at,
|
self.queue.batches.started_at,
|
||||||
|
@ -56,7 +56,6 @@ succeeded [1,]
|
|||||||
### Batches Index Tasks:
|
### Batches Index Tasks:
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batches Enqueued At:
|
### Batches Enqueued At:
|
||||||
[timestamp] [0,]
|
|
||||||
[timestamp] [1,]
|
[timestamp] [1,]
|
||||||
[timestamp] [1,]
|
[timestamp] [1,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -54,7 +54,6 @@ succeeded [1,]
|
|||||||
### Batches Index Tasks:
|
### Batches Index Tasks:
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batches Enqueued At:
|
### Batches Enqueued At:
|
||||||
[timestamp] [0,]
|
|
||||||
[timestamp] [1,]
|
[timestamp] [1,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batches Started At:
|
### Batches Started At:
|
||||||
|
@ -87,7 +87,6 @@ doggo [2,3,]
|
|||||||
girafo [4,]
|
girafo [4,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batches Enqueued At:
|
### Batches Enqueued At:
|
||||||
[timestamp] [0,]
|
|
||||||
[timestamp] [1,]
|
[timestamp] [1,]
|
||||||
[timestamp] [2,]
|
[timestamp] [2,]
|
||||||
[timestamp] [3,]
|
[timestamp] [3,]
|
||||||
@ -95,7 +94,6 @@ girafo [4,]
|
|||||||
[timestamp] [5,]
|
[timestamp] [5,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batches Started At:
|
### Batches Started At:
|
||||||
[timestamp] [0,]
|
|
||||||
[timestamp] [1,]
|
[timestamp] [1,]
|
||||||
[timestamp] [2,]
|
[timestamp] [2,]
|
||||||
[timestamp] [3,]
|
[timestamp] [3,]
|
||||||
@ -103,7 +101,6 @@ girafo [4,]
|
|||||||
[timestamp] [5,]
|
[timestamp] [5,]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Batches Finished At:
|
### Batches Finished At:
|
||||||
[timestamp] [0,]
|
|
||||||
[timestamp] [1,]
|
[timestamp] [1,]
|
||||||
[timestamp] [2,]
|
[timestamp] [2,]
|
||||||
[timestamp] [3,]
|
[timestamp] [3,]
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
use std::collections::{BTreeSet, HashSet};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
|
|
||||||
use meilisearch_types::batches::{Batch, BatchId, BatchStats};
|
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::CboRoaringBitmapCodec;
|
use meilisearch_types::milli::CboRoaringBitmapCodec;
|
||||||
use meilisearch_types::task_view::DetailsView;
|
use meilisearch_types::task_view::DetailsView;
|
||||||
@ -30,8 +30,7 @@ pub struct ProcessingBatch {
|
|||||||
pub kinds: HashSet<Kind>,
|
pub kinds: HashSet<Kind>,
|
||||||
pub indexes: HashSet<String>,
|
pub indexes: HashSet<String>,
|
||||||
pub canceled_by: HashSet<TaskId>,
|
pub canceled_by: HashSet<TaskId>,
|
||||||
pub oldest_enqueued_at: Option<OffsetDateTime>,
|
pub enqueued_at: Option<BatchEnqueuedAt>,
|
||||||
pub earliest_enqueued_at: Option<OffsetDateTime>,
|
|
||||||
pub started_at: OffsetDateTime,
|
pub started_at: OffsetDateTime,
|
||||||
pub finished_at: Option<OffsetDateTime>,
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
}
|
}
|
||||||
@ -51,8 +50,7 @@ impl ProcessingBatch {
|
|||||||
kinds: HashSet::default(),
|
kinds: HashSet::default(),
|
||||||
indexes: HashSet::default(),
|
indexes: HashSet::default(),
|
||||||
canceled_by: HashSet::default(),
|
canceled_by: HashSet::default(),
|
||||||
oldest_enqueued_at: None,
|
enqueued_at: None,
|
||||||
earliest_enqueued_at: None,
|
|
||||||
started_at: OffsetDateTime::now_utc(),
|
started_at: OffsetDateTime::now_utc(),
|
||||||
finished_at: None,
|
finished_at: None,
|
||||||
}
|
}
|
||||||
@ -80,14 +78,18 @@ impl ProcessingBatch {
|
|||||||
if let Some(canceled_by) = task.canceled_by {
|
if let Some(canceled_by) = task.canceled_by {
|
||||||
self.canceled_by.insert(canceled_by);
|
self.canceled_by.insert(canceled_by);
|
||||||
}
|
}
|
||||||
self.oldest_enqueued_at =
|
match self.enqueued_at.as_mut() {
|
||||||
Some(self.oldest_enqueued_at.map_or(task.enqueued_at, |oldest_enqueued_at| {
|
Some(BatchEnqueuedAt { earliest, oldest }) => {
|
||||||
task.enqueued_at.min(oldest_enqueued_at)
|
*oldest = task.enqueued_at.min(*oldest);
|
||||||
}));
|
*earliest = task.enqueued_at.max(*earliest);
|
||||||
self.earliest_enqueued_at =
|
}
|
||||||
Some(self.earliest_enqueued_at.map_or(task.enqueued_at, |earliest_enqueued_at| {
|
None => {
|
||||||
task.enqueued_at.max(earliest_enqueued_at)
|
self.enqueued_at = Some(BatchEnqueuedAt {
|
||||||
}));
|
earliest: task.enqueued_at,
|
||||||
|
oldest: task.enqueued_at,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,6 +140,7 @@ impl ProcessingBatch {
|
|||||||
stats: self.stats.clone(),
|
stats: self.stats.clone(),
|
||||||
started_at: self.started_at,
|
started_at: self.started_at,
|
||||||
finished_at: self.finished_at,
|
finished_at: self.finished_at,
|
||||||
|
enqueued_at: self.enqueued_at,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,18 @@ pub struct Batch {
|
|||||||
pub started_at: OffsetDateTime,
|
pub started_at: OffsetDateTime,
|
||||||
#[serde(with = "time::serde::rfc3339::option")]
|
#[serde(with = "time::serde::rfc3339::option")]
|
||||||
pub finished_at: Option<OffsetDateTime>,
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
|
|
||||||
|
// Enqueued at is never displayed and is only required when removing a batch.
|
||||||
|
// It's always some except when upgrading from a database pre v1.12
|
||||||
|
pub enqueued_at: Option<BatchEnqueuedAt>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct BatchEnqueuedAt {
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
|
pub earliest: OffsetDateTime,
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
|
pub oldest: OffsetDateTime,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)]
|
#[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user