2981: Move index swap error handling from meilisearch-http to index-scheduler r=irevoire a=loiclec

And make index_not_found error asynchronous, since we can't know whether the index will exist by the time the index swap task is processed.

Improve the index-swap test to verify that future tasks are not swapped and to test the new error messages that were introduced.

## Related issue
https://github.com/meilisearch/meilisearch/issues/2973


2996: Get rids of the unecessary tasks when an index_uid is specified r=Kerollmops a=irevoire



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-10-27 19:11:23 +00:00 committed by GitHub
commit 20258461a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 326 additions and 64 deletions

View File

@ -17,7 +17,7 @@ tasks individally, but should be much faster since we are only performing
one indexing operation.
*/
use std::collections::HashSet;
use std::collections::{BTreeSet, HashSet};
use std::ffi::OsStr;
use std::fs::{self, File};
use std::io::BufWriter;
@ -33,7 +33,7 @@ use meilisearch_types::milli::update::{
};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
use roaring::RoaringBitmap;
use time::OffsetDateTime;
@ -832,6 +832,26 @@ impl IndexScheduler {
} else {
unreachable!()
};
let mut not_found_indexes = BTreeSet::new();
for IndexSwap { indexes: (lhs, rhs) } in swaps {
for index in [lhs, rhs] {
let index_exists = self.index_mapper.index_exists(&wtxn, index)?;
if !index_exists {
not_found_indexes.insert(index);
}
}
}
if !not_found_indexes.is_empty() {
if not_found_indexes.len() == 1 {
return Err(Error::IndexNotFound(
not_found_indexes.into_iter().next().unwrap().clone(),
));
} else {
return Err(Error::IndexesNotFound(
not_found_indexes.into_iter().cloned().collect(),
));
}
}
for swap in swaps {
self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
}

View File

@ -9,8 +9,22 @@ use crate::TaskId;
pub enum Error {
#[error("Index `{0}` not found.")]
IndexNotFound(String),
#[error(
"Indexes {} not found.",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
IndexesNotFound(Vec<String>),
#[error("Index `{0}` already exists.")]
IndexAlreadyExists(String),
#[error(
"Indexes must be declared only once during a swap. `{0}` was specified several times."
)]
SwapDuplicateIndexFound(String),
#[error(
"Indexes must be declared only once during a swap. {} were specified several times.",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
SwapDuplicateIndexesFound(Vec<String>),
#[error("Corrupted dump.")]
CorruptedDump,
#[error("Task `{0}` not found.")]
@ -53,11 +67,13 @@ impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
Error::IndexNotFound(_) => Code::IndexNotFound,
Error::IndexesNotFound(_) => Code::IndexNotFound,
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Error::SwapDuplicateIndexesFound(_) => Code::BadRequest,
Error::SwapDuplicateIndexFound(_) => Code::BadRequest,
Error::TaskNotFound(_) => Code::TaskNotFound,
Error::TaskDeletionWithEmptyQuery => Code::TaskDeletionWithEmptyQuery,
Error::TaskCancelationWithEmptyQuery => Code::TaskCancelationWithEmptyQuery,
Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked => Code::Internal,

View File

@ -54,6 +54,7 @@ use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, m
use uuid::Uuid;
use crate::index_mapper::IndexMapper;
use crate::utils::check_index_swap_validity;
pub(crate) type BEI128 =
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
@ -589,10 +590,12 @@ impl IndexScheduler {
) -> Result<RoaringBitmap> {
let mut tasks = self.get_task_ids(rtxn, query)?;
// If the query contains a list of index_uid, then we must exclude IndexSwap tasks
// from the result (because it is not publicly associated with any index)
// If the query contains a list of `index_uid`, then we must exclude all the kind that
// arn't associated to one and only one index.
if query.index_uid.is_some() {
tasks -= self.get_kind(rtxn, Kind::IndexSwap)?
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
tasks -= self.get_kind(rtxn, kind)?;
}
}
// Any task that is internally associated with a non-authorized index
@ -671,6 +674,10 @@ impl IndexScheduler {
// For deletion and cancelation tasks, we want to make extra sure that they
// don't attempt to delete/cancel tasks that are newer than themselves.
filter_out_references_to_newer_tasks(&mut task);
// If the register task is an index swap task, verify that it is well-formed
// (that it does not contain duplicate indexes).
check_index_swap_validity(&task)?;
// Get rid of the mutability.
let task = task;
@ -1586,17 +1593,17 @@ mod tests {
})
.unwrap();
index_scheduler.assert_internally_consistent();
handle.wait_till(Breakpoint::AfterProcessing);
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed");
index_scheduler
.register(KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }],
})
.unwrap();
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered");
handle.wait_till(Breakpoint::AfterProcessing);
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed");
handle.wait_till(Breakpoint::AfterProcessing);
index_scheduler.assert_internally_consistent();
@ -1607,6 +1614,57 @@ mod tests {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed");
}
#[test]
fn swap_indexes_errors() {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
let to_enqueue = [
index_creation_task("a", "id"),
index_creation_task("b", "id"),
index_creation_task("c", "id"),
index_creation_task("d", "id"),
];
for task in to_enqueue {
let _ = index_scheduler.register(task).unwrap();
index_scheduler.assert_internally_consistent();
}
handle.advance_n_batch(4);
index_scheduler.assert_internally_consistent();
let first_snap = snapshot_index_scheduler(&index_scheduler);
snapshot!(first_snap, name: "initial_tasks_processed");
let err = index_scheduler
.register(KindWithContent::IndexSwap {
swaps: vec![
IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) },
IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) },
],
})
.unwrap_err();
snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times.");
index_scheduler.assert_internally_consistent();
let second_snap = snapshot_index_scheduler(&index_scheduler);
assert_eq!(first_snap, second_snap);
// Index `e` does not exist, but we don't check its existence yet
index_scheduler
.register(KindWithContent::IndexSwap {
swaps: vec![
IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) },
IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) },
IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) },
],
})
.unwrap();
handle.advance_n_batch(1);
// Now the first swap should have an error message saying `e` and `f` do not exist
index_scheduler.assert_internally_consistent();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_failed");
}
#[test]
fn document_addition_and_index_deletion_on_unexisting_index() {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);

View File

@ -11,19 +11,20 @@ source: index-scheduler/src/lib.rs
2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
4 {uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }}
5 {uid: 5, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }}
----------------------------------------------------------------------
### Status:
enqueued []
enqueued [5,]
succeeded [0,1,2,3,4,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,2,3,]
"indexSwap" [4,]
"indexSwap" [4,5,]
----------------------------------------------------------------------
### Index Tasks:
a [1,4,]
a [1,4,5,]
b [0,4,]
c [3,4,]
c [3,4,5,]
d [2,4,]
----------------------------------------------------------------------
### Index Mapper:
@ -35,6 +36,7 @@ d [2,4,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]

View File

@ -0,0 +1,56 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }}
5 {uid: 5, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }}
----------------------------------------------------------------------
### Status:
enqueued [4,5,]
succeeded [0,1,2,3,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,2,3,]
"indexSwap" [4,5,]
----------------------------------------------------------------------
### Index Tasks:
a [0,4,5,]
b [1,4,]
c [2,4,5,]
d [3,4,]
----------------------------------------------------------------------
### Index Mapper:
["a", "b", "c", "d"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
[timestamp] [5,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,59 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Indexes `e`, `f` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "e") }, IndexSwap { indexes: ("d", "f") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "e") }, IndexSwap { indexes: ("d", "f") }] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,3,]
failed [4,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,2,3,]
"indexSwap" [4,]
----------------------------------------------------------------------
### Index Tasks:
a [0,4,]
b [1,4,]
c [2,4,]
d [3,4,]
e [4,]
f [4,]
----------------------------------------------------------------------
### Index Mapper:
["a", "b", "c", "d"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
[timestamp] [4,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,51 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }}
1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }}
2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }}
3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,2,3,]
----------------------------------------------------------------------
### Kind:
"indexCreation" [0,1,2,3,]
----------------------------------------------------------------------
### Index Tasks:
a [0,]
b [1,]
c [2,]
d [3,]
----------------------------------------------------------------------
### Index Mapper:
["a", "b", "c", "d"]
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
[timestamp] [2,]
[timestamp] [3,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,5 +1,6 @@
//! Utility functions on the DBs. Mainly getter and setters.
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
@ -296,6 +297,33 @@ pub(crate) fn filter_out_references_to_newer_tasks(task: &mut Task) {
}
}
pub(crate) fn check_index_swap_validity(task: &Task) -> Result<()> {
let swaps =
if let KindWithContent::IndexSwap { swaps } = &task.kind { swaps } else { return Ok(()) };
let mut all_indexes = HashSet::new();
let mut duplicate_indexes = BTreeSet::new();
for IndexSwap { indexes: (lhs, rhs) } in swaps {
for name in [lhs, rhs] {
let is_new = all_indexes.insert(name);
if !is_new {
duplicate_indexes.insert(name);
}
}
}
if !duplicate_indexes.is_empty() {
if duplicate_indexes.len() == 1 {
return Err(Error::SwapDuplicateIndexFound(
duplicate_indexes.into_iter().next().unwrap().clone(),
));
} else {
return Err(Error::SwapDuplicateIndexesFound(
duplicate_indexes.into_iter().cloned().collect(),
));
}
}
Ok(())
}
#[cfg(test)]
impl IndexScheduler {
/// Asserts that the index scheduler's content is internally consistent.

View File

@ -24,20 +24,6 @@ pub enum MeilisearchHttpError {
MissingPayload(PayloadType),
#[error("The provided payload reached the size limit.")]
PayloadTooLarge,
#[error(
"Indexes {} not found.",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
IndexesNotFound(Vec<String>),
#[error(
"Indexes must be declared only once during a swap. `{0}` was specified several times."
)]
SwapDuplicateIndexFound(String),
#[error(
"Indexes must be declared only once during a swap. {} were specified several times.",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
SwapDuplicateIndexesFound(Vec<String>),
#[error("Two indexes must be given for each swap. The list `{:?}` contains {} indexes.",
.0, .0.len()
)]
@ -71,9 +57,6 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
MeilisearchHttpError::InvalidExpression(_, _) => Code::Filter,
MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge,
MeilisearchHttpError::IndexesNotFound(_) => Code::IndexNotFound,
MeilisearchHttpError::SwapDuplicateIndexFound(_) => Code::DuplicateIndexFound,
MeilisearchHttpError::SwapDuplicateIndexesFound(_) => Code::DuplicateIndexFound,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::BadRequest,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,

View File

@ -1,5 +1,3 @@
use std::collections::BTreeSet;
use actix_web::web::Data;
use actix_web::{web, HttpResponse};
use index_scheduler::IndexScheduler;
@ -29,9 +27,6 @@ pub async fn swap_indexes(
let search_rules = &index_scheduler.filters().search_rules;
let mut swaps = vec![];
let mut indexes_set = BTreeSet::<String>::default();
let mut unauthorized_indexes = BTreeSet::new();
let mut duplicate_indexes = BTreeSet::new();
for SwapIndexesPayload { indexes } in params.into_inner().into_iter() {
let (lhs, rhs) = match indexes.as_slice() {
[lhs, rhs] => (lhs, rhs),
@ -39,34 +34,10 @@ pub async fn swap_indexes(
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
}
};
if !search_rules.is_index_authorized(lhs) {
unauthorized_indexes.insert(lhs.clone());
if !search_rules.is_index_authorized(lhs) || !search_rules.is_index_authorized(rhs) {
return Err(AuthenticationError::InvalidToken.into());
}
if !search_rules.is_index_authorized(rhs) {
unauthorized_indexes.insert(rhs.clone());
}
swaps.push(IndexSwap { indexes: (lhs.clone(), rhs.clone()) });
let is_unique_index_lhs = indexes_set.insert(lhs.clone());
if !is_unique_index_lhs {
duplicate_indexes.insert(lhs.clone());
}
let is_unique_index_rhs = indexes_set.insert(rhs.clone());
if !is_unique_index_rhs {
duplicate_indexes.insert(rhs.clone());
}
}
if !duplicate_indexes.is_empty() {
let duplicate_indexes: Vec<_> = duplicate_indexes.into_iter().collect();
if let [index] = duplicate_indexes.as_slice() {
return Err(MeilisearchHttpError::SwapDuplicateIndexFound(index.clone()).into());
} else {
return Err(MeilisearchHttpError::SwapDuplicateIndexesFound(duplicate_indexes).into());
}
}
if !unauthorized_indexes.is_empty() {
return Err(AuthenticationError::InvalidToken.into());
}
let task = KindWithContent::IndexSwap { swaps };

View File

@ -705,7 +705,7 @@ async fn test_summarized_index_swap() {
]
},
"error": {
"message": "Index `doggos` not found.",
"message": "Indexes `cattos`, `doggos` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"

View File

@ -381,6 +381,24 @@ pub enum Kind {
SnapshotCreation,
}
impl Kind {
pub fn related_to_one_index(&self) -> bool {
match self {
Kind::DocumentAdditionOrUpdate
| Kind::DocumentDeletion
| Kind::SettingsUpdate
| Kind::IndexCreation
| Kind::IndexDeletion
| Kind::IndexUpdate => true,
Kind::IndexSwap
| Kind::TaskCancelation
| Kind::TaskDeletion
| Kind::DumpCreation
| Kind::SnapshotCreation => false,
}
}
}
impl FromStr for Kind {
type Err = ResponseError;