MeiliSearch/meilisearch/src/routes/indexes/documents.rs

529 lines
20 KiB
Rust
Raw Normal View History

2023-01-11 17:10:32 +01:00
use std::io::ErrorKind;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
2022-10-20 18:00:07 +02:00
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice;
2023-04-27 13:51:02 +02:00
use deserr::actix_web::{AwebJson, AwebQueryParameter};
2023-02-13 18:45:13 +01:00
use deserr::Deserr;
use futures::StreamExt;
2022-10-12 03:21:25 +02:00
use index_scheduler::IndexScheduler;
2022-12-07 09:20:36 +01:00
use log::debug;
use meilisearch_types::deserr::query_params::Param;
2023-04-27 13:51:02 +02:00
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
2022-11-30 10:59:06 +01:00
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
2022-10-22 14:19:56 +02:00
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::star_or::OptionStarOrList;
2022-10-12 03:21:25 +02:00
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index};
use mime::Mime;
use once_cell::sync::Lazy;
2020-12-12 13:32:06 +01:00
use serde::Deserialize;
use serde_json::Value;
2022-12-07 15:47:32 +01:00
use tempfile::tempfile;
2022-12-03 11:52:20 +01:00
use tokio::fs::File;
2022-12-13 03:41:43 +01:00
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
2020-12-12 13:32:06 +01:00
2023-01-11 17:10:32 +01:00
use crate::analytics::{Analytics, DocumentDeletionKind};
use crate::error::MeilisearchHttpError;
use crate::error::PayloadError::ReceivePayload;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
2022-12-04 11:25:06 +01:00
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
2022-10-20 18:00:07 +02:00
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
});
/// Extracts the mime type from the content type and return
/// a meilisearch error if anything bad happen.
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
match req.mime_type() {
Ok(Some(mime)) => Ok(Some(mime)),
Ok(None) => Ok(None),
Err(_) => match req.headers().get(CONTENT_TYPE) {
Some(content_type) => Err(MeilisearchHttpError::InvalidContentType(
content_type.as_bytes().as_bstr().to_string(),
ACCEPTED_CONTENT_TYPE.clone(),
)),
2022-10-20 18:00:07 +02:00
None => Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())),
},
}
}
2020-12-12 13:32:06 +01:00
#[derive(Deserialize)]
2021-07-07 16:20:22 +02:00
pub struct DocumentParam {
2021-02-13 10:44:20 +01:00
index_uid: String,
document_id: String,
2020-12-12 13:32:06 +01:00
}
2021-07-05 14:29:20 +02:00
pub fn configure(cfg: &mut web::ServiceConfig) {
2021-06-24 15:02:35 +02:00
cfg.service(
2021-07-05 14:29:20 +02:00
web::resource("")
2022-03-04 20:12:44 +01:00
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(replace_documents)))
2022-03-04 20:12:44 +01:00
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
2021-07-05 14:29:20 +02:00
)
2023-05-03 22:27:03 +02:00
// these routes need to be before the /documents/{document_id} to match properly
2023-04-27 13:51:02 +02:00
.service(
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
)
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
2021-07-05 14:29:20 +02:00
.service(
web::resource("/{document_id}")
2022-03-04 20:12:44 +01:00
.route(web::get().to(SeqHandler(get_document)))
.route(web::delete().to(SeqHandler(delete_document))),
2021-06-24 15:02:35 +02:00
);
2020-12-12 13:32:06 +01:00
}
2023-02-13 18:45:13 +01:00
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct GetDocument {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
fields: OptionStarOrList<String>,
}
2021-07-07 16:20:22 +02:00
pub async fn get_document(
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
document_param: web::Path<DocumentParam>,
2023-02-14 13:12:42 +01:00
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
2020-12-12 13:32:06 +01:00
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.merge_star_and_none();
let index = index_scheduler.index(&index_uid)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
2021-06-23 12:18:34 +02:00
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
2020-12-12 13:32:06 +01:00
}
2021-09-24 15:21:07 +02:00
pub async fn delete_document(
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
2021-09-24 15:21:07 +02:00
path: web::Path<DocumentParam>,
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 16:27:41 +01:00
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
2021-09-24 15:21:07 +02:00
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = path.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 16:27:41 +01:00
analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
let task = KindWithContent::DocumentDeletion {
index_uid: index_uid.to_string(),
documents_ids: vec![document_id],
};
2022-10-13 15:02:59 +02:00
let task: SummarizedTaskView =
2022-10-20 18:00:07 +02:00
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
2021-09-24 15:21:07 +02:00
}
2020-12-12 13:32:06 +01:00
2023-02-13 18:45:13 +01:00
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
2021-07-07 16:20:22 +02:00
pub struct BrowseQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
offset: Param<usize>,
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidDocumentLimit>)]
limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
fields: OptionStarOrList<String>,
2020-12-12 13:32:06 +01:00
}
2021-07-07 16:20:22 +02:00
pub async fn get_all_documents(
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
2023-02-14 13:12:42 +01:00
params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
2020-12-12 13:32:06 +01:00
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
2021-06-23 12:18:34 +02:00
debug!("called with params: {:?}", params);
2022-10-20 18:00:07 +02:00
let BrowseQuery { limit, offset, fields } = params.into_inner();
let attributes_to_retrieve = fields.merge_star_and_none();
2021-02-10 17:08:37 +01:00
2022-09-27 16:33:37 +02:00
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(&index, offset.0, limit.0, attributes_to_retrieve)?;
let ret = PaginationView::new(offset.0, limit.0, total as usize, documents);
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
2020-12-12 13:32:06 +01:00
}
2023-02-13 18:45:13 +01:00
#[derive(Deserialize, Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
2021-07-07 16:20:22 +02:00
pub struct UpdateDocumentsQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
2021-10-25 16:41:23 +02:00
pub primary_key: Option<String>,
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
pub csv_delimiter: Option<u8>,
}
fn from_char_csv_delimiter(
c: char,
) -> Result<Option<u8>, DeserrQueryParamError<InvalidDocumentCsvDelimiter>> {
if c.is_ascii() {
Ok(Some(c as u8))
} else {
Err(DeserrQueryParamError::new(
format!("csv delimiter must be an ascii character. Found: `{}`", c),
Code::InvalidDocumentCsvDelimiter,
))
}
2020-12-12 13:32:06 +01:00
}
pub async fn replace_documents(
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
2021-09-29 00:12:25 +02:00
body: Payload,
2021-09-30 11:17:42 +02:00
req: HttpRequest,
2021-10-29 16:10:58 +02:00
analytics: web::Data<dyn Analytics>,
2021-09-29 00:12:25 +02:00
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
2021-09-30 11:17:42 +02:00
debug!("called with params: {:?}", params);
2021-10-12 15:23:31 +02:00
let params = params.into_inner();
2022-09-27 16:33:37 +02:00
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
2021-10-12 15:23:31 +02:00
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let task = document_addition(
extract_mime_type(&req)?,
2022-09-27 16:33:37 +02:00
index_scheduler,
index_uid,
2021-10-12 15:23:31 +02:00
params.primary_key,
params.csv_delimiter,
2021-09-29 10:17:52 +02:00
body,
2021-09-30 11:29:27 +02:00
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
2021-09-30 11:29:27 +02:00
)
.await?;
Ok(HttpResponse::Accepted().json(task))
2021-09-29 10:17:52 +02:00
}
2021-09-30 11:17:42 +02:00
pub async fn update_documents(
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
2021-09-29 10:17:52 +02:00
body: Payload,
2021-09-30 11:17:42 +02:00
req: HttpRequest,
2021-10-29 16:10:58 +02:00
analytics: web::Data<dyn Analytics>,
2021-09-29 10:17:52 +02:00
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
2021-09-30 11:17:42 +02:00
debug!("called with params: {:?}", params);
let params = params.into_inner();
2021-10-12 15:31:59 +02:00
2022-09-27 16:33:37 +02:00
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
2021-10-12 15:31:59 +02:00
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let task = document_addition(
extract_mime_type(&req)?,
2022-09-27 16:33:37 +02:00
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
2021-09-29 10:17:52 +02:00
body,
2021-09-30 11:29:27 +02:00
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
2021-09-30 11:29:27 +02:00
)
.await?;
Ok(HttpResponse::Accepted().json(task))
2021-09-29 00:12:25 +02:00
}
2023-02-16 18:00:40 +01:00
#[allow(clippy::too_many_arguments)]
2021-09-29 00:12:25 +02:00
async fn document_addition(
mime_type: Option<Mime>,
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
2021-09-30 11:17:42 +02:00
primary_key: Option<String>,
csv_delimiter: Option<u8>,
2022-09-22 20:02:55 +02:00
mut body: Payload,
2021-09-29 00:12:25 +02:00
method: IndexDocumentsMethod,
allow_index_creation: bool,
2022-10-12 03:21:25 +02:00
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
) {
(Some(("application", "json")), None) => PayloadType::Json,
(Some(("application", "x-ndjson")), None) => PayloadType::Ndjson,
2023-02-20 14:53:37 +01:00
(Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' },
(Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter },
(Some(("application", "json")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/json",
)))
}
(Some(("application", "x-ndjson")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/x-ndjson",
)))
}
(Some((type_, subtype)), _) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
2022-10-03 15:29:37 +02:00
))
}
(None, _) => {
2022-10-20 18:00:07 +02:00
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
2021-09-30 11:29:27 +02:00
}
2021-09-30 11:17:42 +02:00
};
2022-09-27 16:33:37 +02:00
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
2022-09-22 20:02:55 +02:00
2022-12-07 15:47:32 +01:00
let temp_file = match tempfile() {
Ok(file) => file,
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
2022-11-29 17:03:22 +01:00
};
2022-12-07 10:04:24 +01:00
2022-12-13 03:41:43 +01:00
let async_file = File::from_std(temp_file);
let mut buffer = BufWriter::new(async_file);
2022-12-07 09:20:36 +01:00
2022-11-30 10:59:06 +01:00
let mut buffer_write_size: usize = 0;
while let Some(result) = body.next().await {
let byte = result?;
2022-12-03 15:48:38 +01:00
if byte.is_empty() && buffer_write_size == 0 {
return Err(MeilisearchHttpError::MissingPayload(format));
}
match buffer.write_all(&byte).await {
2022-12-03 15:48:38 +01:00
Ok(()) => buffer_write_size += 1,
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
}
2022-11-30 10:59:06 +01:00
}
2022-11-29 17:03:22 +01:00
2022-12-04 16:05:34 +01:00
if let Err(e) = buffer.flush().await {
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
2022-12-07 09:20:36 +01:00
}
2022-12-04 15:31:23 +01:00
2022-11-29 17:03:22 +01:00
if buffer_write_size == 0 {
2022-10-22 13:59:21 +02:00
return Err(MeilisearchHttpError::MissingPayload(format));
}
2022-09-22 20:02:55 +02:00
2022-12-07 15:47:32 +01:00
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
}
2022-12-07 15:47:32 +01:00
2022-12-13 03:41:43 +01:00
let read_file = buffer.into_inner().into_std().await;
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
2023-02-20 14:53:37 +01:00
PayloadType::Csv { delimiter } => {
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
}
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
Ok(documents_count)
})
.await;
2022-09-22 20:02:55 +02:00
let documents_count = match documents_count {
2022-12-13 15:10:51 +01:00
Ok(Ok(documents_count)) => documents_count,
2022-10-22 13:59:21 +02:00
// in this case the file has not possibly be persisted.
Ok(Err(e)) => return Err(e),
2022-09-22 20:02:55 +02:00
Err(e) => {
2022-10-22 13:59:21 +02:00
// Here the file MAY have been persisted or not.
// We don't know thus we ignore the file not found error.
match index_scheduler.delete_update_file(uuid) {
Ok(()) => (),
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
2022-10-22 17:06:20 +02:00
if e.kind() == ErrorKind::NotFound => {}
2022-10-22 13:59:21 +02:00
Err(e) => {
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
}
}
// We still want to return the original error to the end user.
2022-09-22 20:02:55 +02:00
return Err(e.into());
}
};
2022-09-22 12:47:09 +02:00
let task = KindWithContent::DocumentAdditionOrUpdate {
method,
content_file: uuid,
documents_count,
primary_key,
allow_index_creation,
index_uid: index_uid.to_string(),
2021-09-14 18:39:02 +02:00
};
2021-09-30 11:17:42 +02:00
2022-09-27 16:33:37 +02:00
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
2022-09-22 20:02:55 +02:00
Ok(task) => task,
Err(e) => {
2022-09-27 16:33:37 +02:00
index_scheduler.delete_update_file(uuid)?;
2022-09-22 20:02:55 +02:00
return Err(e.into());
}
};
2021-03-04 15:10:58 +01:00
debug!("returns: {:?}", task);
2022-10-12 03:21:25 +02:00
Ok(task.into())
2020-12-12 13:32:06 +01:00
}
2023-04-27 13:51:02 +02:00
pub async fn delete_documents_batch(
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
2023-04-27 13:51:02 +02:00
body: web::Json<Vec<Value>>,
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 16:27:41 +01:00
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
2021-09-24 15:21:07 +02:00
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 16:27:41 +01:00
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
2023-04-27 13:51:02 +02:00
let ids = body
.iter()
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
.collect();
2021-09-24 15:21:07 +02:00
2022-10-20 18:00:07 +02:00
let task =
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
2022-10-13 15:02:59 +02:00
let task: SummarizedTaskView =
2022-10-20 18:00:07 +02:00
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
2021-09-24 15:21:07 +02:00
}
2023-04-27 13:51:02 +02:00
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentDeletionByFilter {
2023-05-02 17:46:04 +02:00
#[deserr(error = DeserrJsonError<InvalidDocumentDeleteFilter>)]
2023-04-27 13:51:02 +02:00
filter: Value,
}
pub async fn delete_documents_by_filter(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter;
2023-05-02 17:46:04 +02:00
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
2023-04-27 13:51:02 +02:00
2023-05-02 17:46:04 +02:00
// we ensure the filter is well formed before enqueuing it
|| -> Result<_, ResponseError> {
Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
}()
// and whatever was the error, the error code should always be an InvalidDocumentDeleteFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentDeleteFilter))?;
2023-04-27 13:51:02 +02:00
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
2023-05-02 17:46:04 +02:00
Ok(HttpResponse::Accepted().json(task))
2023-04-27 13:51:02 +02:00
}
2021-09-24 15:21:07 +02:00
pub async fn clear_all_documents(
2022-09-27 16:33:37 +02:00
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 16:27:41 +01:00
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
2021-09-24 15:21:07 +02:00
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 16:27:41 +01:00
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
2022-10-13 15:02:59 +02:00
let task: SummarizedTaskView =
2022-10-20 18:00:07 +02:00
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
2021-09-24 15:21:07 +02:00
}
fn all_documents<'a>(
index: &Index,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(index.all_documents(rtxn)?.map(move |ret| {
2022-10-20 18:00:07 +02:00
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
fn retrieve_documents<S: AsRef<str>>(
index: &Index,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let mut documents = Vec::new();
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
};
documents.push(document);
}
let number_of_documents = index.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
fn retrieve_document<S: AsRef<str>>(
index: &Index,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document, ResponseError> {
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
};
Ok(document)
}