2982: Adapt task queries to account for special index swap rules r=irevoire a=loiclec

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/2970 

## What does this PR do?
- Replace the `get_tasks` method with a `get_tasks_from_authorized_indexes` which returns the list of tasks matched by the query **from the point of view of the user**. That is, it takes into consideration the list of authorised indexes as well as the special case of `IndexSwap` which should not be returned if an index_uid is specified or if any of its associated indexes are not authorised.
- Adapt the code in other places following this change
- Add some tests
- Also the method `get_task_ids_from_authorized_indexes` now takes a read transaction as argument. This is because we want to make sure that the implementation of `get_tasks_from_authorized_indexes` only uses one read transaction. Otherwise, we could (1) get a list of task ids matching the query, then (2) one of these task ids is deleted by a taskDeletion task, and finally (3) we try to get the `Task`s associated with each returned task ids, and get a `CorruptedTaskQueue` error.



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
bors[bot] 2022-10-27 14:28:04 +00:00 committed by GitHub
commit d16ea755d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 457 additions and 133 deletions

View file

@ -1,11 +1,11 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::{IndexScheduler, Query};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use meilisearch_types::tasks::{KindWithContent, Status};
use meilisearch_types::tasks::KindWithContent;
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
@ -202,14 +202,7 @@ impl IndexStats {
index_uid: String,
) -> Result<Self, ResponseError> {
// we check if there is currently a task processing associated with this index.
let processing_task = index_scheduler.get_tasks(Query {
status: Some(vec![Status::Processing]),
index_uid: Some(vec![index_uid.clone()]),
limit: Some(1),
..Query::default()
})?;
let is_processing = !processing_task.is_empty();
let is_processing = index_scheduler.is_index_processing(&index_uid)?;
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
Ok(IndexStats {

View file

@ -270,11 +270,10 @@ pub fn create_all_stats(
let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let processing_task = index_scheduler.get_tasks(Query {
status: Some(vec![Status::Processing]),
limit: Some(1),
..Query::default()
})?;
let processing_task = index_scheduler.get_tasks_from_authorized_indexes(
Query { status: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() },
search_rules.authorized_indexes(),
)?;
let processing_index = processing_task.first().and_then(|task| task.index_uid());
for (name, index) in index_scheduler.indexes()? {
if !search_rules.is_index_authorized(&name) {

View file

@ -291,8 +291,11 @@ async fn cancel_tasks(
return Err(index_scheduler::Error::TaskCancelationWithEmptyQuery.into());
}
let filtered_query = filter_out_inaccessible_indexes_from_query(&index_scheduler, &query);
let tasks = index_scheduler.get_task_ids(&filtered_query)?;
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
&index_scheduler.filters().search_rules.authorized_indexes(),
)?;
let task_cancelation =
KindWithContent::TaskCancelation { query: req.query_string().to_string(), tasks };
@ -348,8 +351,11 @@ async fn delete_tasks(
return Err(index_scheduler::Error::TaskDeletionWithEmptyQuery.into());
}
let filtered_query = filter_out_inaccessible_indexes_from_query(&index_scheduler, &query);
let tasks = index_scheduler.get_task_ids(&filtered_query)?;
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
&index_scheduler.filters().search_rules.authorized_indexes(),
)?;
let task_deletion =
KindWithContent::TaskDeletion { query: req.query_string().to_string(), tasks };
@ -425,10 +431,15 @@ async fn get_tasks(
before_finished_at,
after_finished_at,
};
let query = filter_out_inaccessible_indexes_from_query(&index_scheduler, &query);
let mut tasks_results: Vec<TaskView> =
index_scheduler.get_tasks(query)?.into_iter().map(|t| TaskView::from_task(&t)).collect();
let mut tasks_results: Vec<TaskView> = index_scheduler
.get_tasks_from_authorized_indexes(
query,
index_scheduler.filters().search_rules.authorized_indexes(),
)?
.into_iter()
.map(|t| TaskView::from_task(&t))
.collect();
// If we were able to fetch the number +1 tasks we asked
// it means that there is more to come.
@ -454,17 +465,15 @@ async fn get_task(
analytics.publish("Tasks Seen".to_string(), json!({ "per_task_uid": true }), Some(&req));
let search_rules = &index_scheduler.filters().search_rules;
let mut filters = index_scheduler::Query::default();
if !search_rules.is_index_authorized("*") {
for (index, _policy) in search_rules.clone() {
filters = filters.with_index(index);
}
}
let query = index_scheduler::Query { uid: Some(vec![task_id]), ..Query::default() };
filters.uid = Some(vec![task_id]);
if let Some(task) = index_scheduler.get_tasks(filters)?.first() {
if let Some(task) = index_scheduler
.get_tasks_from_authorized_indexes(
query,
index_scheduler.filters().search_rules.authorized_indexes(),
)?
.first()
{
let task_view = TaskView::from_task(task);
Ok(HttpResponse::Ok().json(task_view))
} else {
@ -472,39 +481,6 @@ async fn get_task(
}
}
fn filter_out_inaccessible_indexes_from_query<const ACTION: u8>(
index_scheduler: &GuardedData<ActionPolicy<ACTION>, Data<IndexScheduler>>,
query: &Query,
) -> Query {
let mut query = query.clone();
// First remove all indexes from the query, we will add them back later
let indexes = query.index_uid.take();
let search_rules = &index_scheduler.filters().search_rules;
// We filter on potential indexes and make sure that the search filter
// restrictions are also applied.
match indexes {
Some(indexes) => {
for name in indexes.iter() {
if search_rules.is_index_authorized(name) {
query = query.with_index(name.to_string());
}
}
}
None => {
if !search_rules.is_index_authorized("*") {
for (index, _policy) in search_rules.clone() {
query = query.with_index(index.to_string());
}
}
}
};
query
}
pub(crate) mod date_deserializer {
use time::format_description::well_known::Rfc3339;
use time::macros::format_description;