mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Merge #5351
5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops This PR brings back the changes made in v1.13 into the main branch. Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Clémentine <clementine@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
0f1aeb8eaa
101 changed files with 8351 additions and 1518 deletions
|
@ -166,13 +166,41 @@ impl IndexScheduler {
|
|||
let processing_batch = &mut processing_batch;
|
||||
let progress = progress.clone();
|
||||
std::thread::scope(|s| {
|
||||
let p = progress.clone();
|
||||
let handle = std::thread::Builder::new()
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn_scoped(s, move || {
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, progress)
|
||||
cloned_index_scheduler.process_batch(batch, processing_batch, p)
|
||||
})
|
||||
.unwrap();
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
|
||||
match handle.join() {
|
||||
Ok(ret) => {
|
||||
if ret.is_err() {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
Err(panic) => {
|
||||
if let Ok(progress_view) =
|
||||
serde_json::to_string(&progress.as_progress_view())
|
||||
{
|
||||
tracing::warn!("Batch failed while doing: {progress_view}")
|
||||
}
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
Err(Error::ProcessBatchPanicked(msg.to_string()))
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
|
|||
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
|
||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::{self};
|
||||
|
@ -16,7 +16,10 @@ use crate::processing::{
|
|||
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||
UpdateIndexProgress,
|
||||
};
|
||||
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
|
||||
use crate::utils::{
|
||||
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
|
||||
ProcessingBatch,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||
|
||||
impl IndexScheduler {
|
||||
|
@ -323,8 +326,17 @@ impl IndexScheduler {
|
|||
match ret {
|
||||
Ok(Ok(())) => (),
|
||||
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
|
||||
Err(_e) => {
|
||||
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked)));
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -418,7 +430,6 @@ impl IndexScheduler {
|
|||
to_delete_tasks -= &enqueued_tasks;
|
||||
|
||||
// 2. We now have a list of tasks to delete, delete them
|
||||
|
||||
let mut affected_indexes = HashSet::new();
|
||||
let mut affected_statuses = HashSet::new();
|
||||
let mut affected_kinds = HashSet::new();
|
||||
|
@ -515,9 +526,51 @@ impl IndexScheduler {
|
|||
tasks -= &to_delete_tasks;
|
||||
// We must remove the batch entirely
|
||||
if tasks.is_empty() {
|
||||
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
|
||||
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
|
||||
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
|
||||
if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
earliest,
|
||||
batch_id,
|
||||
)?;
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
oldest,
|
||||
batch_id,
|
||||
)?;
|
||||
} else {
|
||||
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
|
||||
// and we still need to find the date by scrolling the database
|
||||
remove_n_tasks_datetime_earlier_than(
|
||||
wtxn,
|
||||
self.queue.batches.enqueued_at,
|
||||
batch.started_at,
|
||||
batch.stats.total_nb_tasks.clamp(1, 2) as usize,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.started_at,
|
||||
batch.started_at,
|
||||
batch_id,
|
||||
)?;
|
||||
if let Some(finished_at) = batch.finished_at {
|
||||
remove_task_datetime(
|
||||
wtxn,
|
||||
self.queue.batches.finished_at,
|
||||
finished_at,
|
||||
batch_id,
|
||||
)?;
|
||||
}
|
||||
|
||||
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
|
||||
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Anyway, we must remove the batch from all its reverse indexes.
|
||||
// The only way to do that is to check
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
@ -11,7 +12,9 @@ use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
|
|||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::processing::{AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress};
|
||||
use crate::processing::{
|
||||
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
|
||||
};
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
|
@ -102,7 +105,40 @@ impl IndexScheduler {
|
|||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. Dump the indexes
|
||||
// 3. dump the batches
|
||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
|
||||
let (atomic_batch_progress, update_batch_progress) =
|
||||
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
|
||||
progress.update_progress(update_batch_progress);
|
||||
|
||||
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (_, mut b) = ret?;
|
||||
// In the case we're dumping ourselves we want to be marked as finished
|
||||
// to not loop over ourselves indefinitely.
|
||||
if b.uid == task.uid {
|
||||
let finished_at = OffsetDateTime::now_utc();
|
||||
|
||||
// We're going to fake the date because we don't know if everything is going to go well.
|
||||
// But we need to dump the task as finished and successful.
|
||||
// If something fail everything will be set appropriately in the end.
|
||||
let mut statuses = BTreeMap::new();
|
||||
statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
|
||||
b.stats.status = statuses;
|
||||
b.finished_at = Some(finished_at);
|
||||
}
|
||||
|
||||
dump_batches.push_batch(&b)?;
|
||||
atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
|
||||
// 4. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
|
@ -142,7 +178,7 @@ impl IndexScheduler {
|
|||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 3.1. Dump the documents
|
||||
// 4.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
|
@ -204,7 +240,7 @@ impl IndexScheduler {
|
|||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 3.2. Dump the settings
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
|
@ -215,10 +251,12 @@ impl IndexScheduler {
|
|||
Ok(())
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
// 5. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
let network = self.network();
|
||||
dump.create_network(network)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
|
|
|
@ -56,16 +56,13 @@ succeeded [1,]
|
|||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
|
|
@ -54,15 +54,12 @@ succeeded [1,]
|
|||
### Batches Index Tasks:
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
|
|
@ -7,7 +7,7 @@ snapshot_kind: text
|
|||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task: simulated panic", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
|
|
@ -87,7 +87,6 @@ doggo [2,3,]
|
|||
girafo [4,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
|
@ -95,7 +94,6 @@ girafo [4,]
|
|||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
|
@ -103,7 +101,6 @@ girafo [4,]
|
|||
[timestamp] [5,]
|
||||
----------------------------------------------------------------------
|
||||
### Batches Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
[timestamp] [2,]
|
||||
[timestamp] [3,]
|
||||
|
|
|
@ -903,7 +903,7 @@ fn create_and_list_index() {
|
|||
|
||||
index_scheduler.index("kefir").unwrap();
|
||||
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
||||
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#"
|
||||
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
|
||||
[
|
||||
1,
|
||||
[
|
||||
|
@ -912,6 +912,8 @@ fn create_and_list_index() {
|
|||
{
|
||||
"number_of_documents": 0,
|
||||
"database_size": "[bytes]",
|
||||
"number_of_embeddings": 0,
|
||||
"number_of_embedded_documents": 0,
|
||||
"used_database_size": "[bytes]",
|
||||
"primary_key": null,
|
||||
"field_distribution": {},
|
||||
|
@ -921,5 +923,5 @@ fn create_and_list_index() {
|
|||
]
|
||||
]
|
||||
]
|
||||
"#);
|
||||
"###);
|
||||
}
|
||||
|
|
|
@ -6,8 +6,7 @@ use meili_snap::snapshot;
|
|||
use meilisearch_types::milli::obkv_to_json;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::tasks::Kind;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent};
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::Breakpoint::*;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue