MeiliSearch/meilisearch-http/src/routes/document.rs

297 lines
8.9 KiB
Rust
Raw Normal View History

2020-04-16 18:37:54 +02:00
use std::collections::HashSet;
use std::collections::BTreeSet;
2020-04-10 19:05:05 +02:00
use actix_web as aweb;
use actix_web::{delete, get, post, put, web, HttpResponse};
2019-10-31 15:00:36 +01:00
use indexmap::IndexMap;
use serde::Deserialize;
2019-10-31 15:00:36 +01:00
use serde_json::Value;
use crate::error::ResponseError;
2020-04-10 19:05:05 +02:00
use crate::routes::{IndexParam, IndexUpdateResponse};
2019-10-31 15:00:36 +01:00
use crate::Data;
type Document = IndexMap<String, Value>;
2019-10-31 15:00:36 +01:00
#[derive(Default, Deserialize)]
pub struct DocumentParam {
index_uid: String,
2020-04-10 19:05:05 +02:00
document_id: String,
}
#[get("/indexes/{index_uid}/documents/{document_id}")]
pub async fn get_document(
data: web::Data<Data>,
path: web::Path<DocumentParam>,
2020-04-16 11:09:47 +02:00
) -> aweb::Result<HttpResponse> {
2020-04-10 19:05:05 +02:00
let index = data
.db
.open_index(&path.index_uid)
.ok_or(ResponseError::IndexNotFound(path.index_uid.clone()))?;
let document_id = meilisearch_core::serde::compute_document_id(path.document_id.clone());
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
let reader = data
.db
.main_read_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
let response = index
2020-04-16 18:37:54 +02:00
.document::<Document>(&reader, None, document_id)
.map_err(|_| ResponseError::DocumentNotFound(path.document_id.clone()))?
.ok_or(ResponseError::DocumentNotFound(path.document_id.clone()))?;
2019-10-31 15:00:36 +01:00
2020-04-16 11:09:47 +02:00
Ok(HttpResponse::Ok().json(response))
2019-10-31 15:00:36 +01:00
}
#[delete("/indexes/{index_uid}/documents/{document_id}")]
pub async fn delete_document(
data: web::Data<Data>,
path: web::Path<DocumentParam>,
2020-04-09 10:39:34 +02:00
) -> aweb::Result<HttpResponse> {
2020-04-10 19:05:05 +02:00
let index = data
.db
.open_index(&path.index_uid)
.ok_or(ResponseError::IndexNotFound(path.index_uid.clone()))?;
let document_id = meilisearch_core::serde::compute_document_id(path.document_id.clone());
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
let mut update_writer = data
.db
.update_write_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
let mut documents_deletion = index.documents_deletion();
documents_deletion.delete_document_by_id(document_id);
2020-04-10 19:05:05 +02:00
let update_id = documents_deletion
.finalize(&mut update_writer)
2020-04-07 19:34:57 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
update_writer
.commit()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
2019-10-31 15:00:36 +01:00
}
#[derive(Default, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct BrowseQuery {
2019-10-31 15:00:36 +01:00
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<String>,
}
#[get("/indexes/{index_uid}/documents")]
pub async fn get_all_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
2020-04-16 11:09:47 +02:00
) -> aweb::Result<HttpResponse> {
2020-04-10 19:05:05 +02:00
let index = data
.db
.open_index(&path.index_uid)
.ok_or(ResponseError::IndexNotFound(path.index_uid.clone()))?;
2019-10-31 15:00:36 +01:00
let offset = params.offset.unwrap_or(0);
let limit = params.limit.unwrap_or(20);
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
let reader = data
.db
.main_read_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
2020-01-23 11:30:18 +01:00
let documents_ids: Result<BTreeSet<_>, _> = index
.documents_fields_counts
.documents_ids(&reader)
.map_err(|_| ResponseError::Internal(path.index_uid.clone()))?
2020-01-23 11:30:18 +01:00
.skip(offset)
.take(limit)
.collect();
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
let documents_ids = documents_ids.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
2020-04-16 18:37:54 +02:00
let attributes: Option<HashSet<&str>> = params
.attributes_to_retrieve.as_ref()
.map(|a| a.split(',').collect());
2019-10-31 15:00:36 +01:00
2020-04-16 11:09:47 +02:00
let mut response = Vec::<Document>::new();
for document_id in documents_ids {
2020-04-16 18:37:54 +02:00
if let Ok(Some(document)) = index.document(&reader, attributes.as_ref(), document_id) {
2020-04-16 11:09:47 +02:00
response.push(document);
2019-10-31 15:00:36 +01:00
}
}
2020-04-16 11:09:47 +02:00
Ok(HttpResponse::Ok().json(response))
2019-10-31 15:00:36 +01:00
}
fn find_primary_key(document: &IndexMap<String, Value>) -> Option<String> {
2019-10-31 15:00:36 +01:00
for key in document.keys() {
if key.to_lowercase().contains("id") {
2020-01-23 11:30:18 +01:00
return Some(key.to_string());
2019-10-31 15:00:36 +01:00
}
}
2020-01-31 10:50:28 +01:00
None
2019-10-31 15:00:36 +01:00
}
2020-01-02 12:24:29 +01:00
#[derive(Default, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
primary_key: Option<String>,
2020-01-02 12:24:29 +01:00
}
async fn update_multiple_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: web::Json<Vec<Document>>,
2020-04-10 19:05:05 +02:00
is_partial: bool,
2020-04-09 10:39:34 +02:00
) -> aweb::Result<HttpResponse> {
2020-04-10 19:05:05 +02:00
let index = data
.db
.open_index(path.index_uid.clone())
.ok_or(ResponseError::IndexNotFound(path.index_uid.clone()))?;
2020-01-02 12:24:29 +01:00
2020-04-10 19:05:05 +02:00
let reader = data
.db
.main_read_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
2020-03-10 11:29:56 +01:00
let mut schema = index
.main
.schema(&reader)
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?
2020-04-10 19:05:05 +02:00
.ok_or(ResponseError::Internal(
"Impossible to retrieve the schema".to_string(),
))?;
2020-03-05 15:49:47 +01:00
if schema.primary_key().is_none() {
let id = match params.primary_key.clone() {
Some(id) => id,
2020-04-10 19:05:05 +02:00
None => body.first().and_then(|docs| find_primary_key(docs)).ok_or(
2020-04-16 11:09:47 +02:00
ResponseError::BadRequest("Could not infer a primary key".to_string()),
2020-04-10 19:05:05 +02:00
)?,
};
2020-04-10 19:05:05 +02:00
let mut writer = data
.db
.main_write_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2020-04-10 19:05:05 +02:00
schema
.set_primary_key(&id)
.map_err(|e| ResponseError::Internal(e.to_string()))?;
2020-04-10 19:05:05 +02:00
index
.main
.put_schema(&mut writer, &schema)
.map_err(|e| ResponseError::Internal(e.to_string()))?;
2020-04-10 19:05:05 +02:00
writer
.commit()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
}
let mut document_addition = if is_partial {
index.documents_partial_addition()
} else {
index.documents_addition()
};
2019-10-31 15:00:36 +01:00
for document in body.into_inner() {
2019-10-31 15:00:36 +01:00
document_addition.update_document(document);
}
2020-04-10 19:05:05 +02:00
let mut update_writer = data
.db
.update_write_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2020-04-10 19:05:05 +02:00
let update_id = document_addition
.finalize(&mut update_writer)
.map_err(|e| ResponseError::Internal(e.to_string()))?;
2020-04-10 19:05:05 +02:00
update_writer
.commit()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
2019-10-31 15:00:36 +01:00
}
#[post("/indexes/{index_uid}/documents")]
pub async fn add_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
2020-04-10 19:05:05 +02:00
body: web::Json<Vec<Document>>,
2020-04-09 10:39:34 +02:00
) -> aweb::Result<HttpResponse> {
update_multiple_documents(data, path, params, body, false).await
}
#[put("/indexes/{index_uid}/documents")]
pub async fn update_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
2020-04-10 19:05:05 +02:00
body: web::Json<Vec<Document>>,
2020-04-09 10:39:34 +02:00
) -> aweb::Result<HttpResponse> {
update_multiple_documents(data, path, params, body, true).await
}
#[post("/indexes/{index_uid}/documents/delete-batch")]
pub async fn delete_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
2020-04-10 19:05:05 +02:00
body: web::Json<Vec<Value>>,
2020-04-09 10:39:34 +02:00
) -> aweb::Result<HttpResponse> {
2020-04-10 19:05:05 +02:00
let index = data
.db
.open_index(path.index_uid.clone())
.ok_or(ResponseError::IndexNotFound(path.index_uid.clone()))?;
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
let mut writer = data
.db
.update_write_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
let mut documents_deletion = index.documents_deletion();
for document_id in body.into_inner() {
if let Some(document_id) = meilisearch_core::serde::value_to_string(&document_id) {
2019-10-31 15:00:36 +01:00
documents_deletion
.delete_document_by_id(meilisearch_core::serde::compute_document_id(document_id));
2019-10-31 15:00:36 +01:00
}
}
2020-04-10 19:05:05 +02:00
let update_id = documents_deletion
.finalize(&mut writer)
.map_err(|e| ResponseError::Internal(e.to_string()))?;
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
writer
.commit()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
2019-10-31 15:00:36 +01:00
}
#[delete("/indexes/{index_uid}/documents")]
pub async fn clear_all_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
2020-04-09 10:39:34 +02:00
) -> aweb::Result<HttpResponse> {
2020-04-10 19:05:05 +02:00
let index = data
.db
.open_index(path.index_uid.clone())
.ok_or(ResponseError::IndexNotFound(path.index_uid.clone()))?;
2019-11-18 14:41:04 +01:00
2020-04-10 19:05:05 +02:00
let mut writer = data
.db
.update_write_txn()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
2020-04-10 19:05:05 +02:00
let update_id = index
.clear_all(&mut writer)
.map_err(|e| ResponseError::Internal(e.to_string()))?;
2020-04-10 19:05:05 +02:00
writer
.commit()
2020-04-09 10:39:34 +02:00
.map_err(|err| ResponseError::Internal(err.to_string()))?;
2019-10-31 15:00:36 +01:00
Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)))
2019-10-31 15:00:36 +01:00
}