Merge branch 'main' into fix-3037-new

This commit is contained in:
jiangbo212 2022-12-12 21:36:10 +08:00
commit 169682d3ec
98 changed files with 133 additions and 104 deletions

View file

@ -0,0 +1,438 @@
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index};
use mime::Mime;
use once_cell::sync::Lazy;
use serde::Deserialize;
use serde_cs::vec::CS;
use serde_json::Value;
use std::io::ErrorKind;
use tempfile::tempfile;
use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
use crate::analytics::{Analytics, DocumentDeletionKind};
use crate::error::MeilisearchHttpError;
use crate::error::PayloadError::ReceivePayloadErr;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{fold_star_or, PaginationView, SummarizedTaskView};
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
});
/// Extracts the mime type from the content type and return
/// a meilisearch error if anything bad happen.
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
match req.mime_type() {
Ok(Some(mime)) => Ok(Some(mime)),
Ok(None) => Ok(None),
Err(_) => match req.headers().get(CONTENT_TYPE) {
Some(content_type) => Err(MeilisearchHttpError::InvalidContentType(
content_type.as_bytes().as_bstr().to_string(),
ACCEPTED_CONTENT_TYPE.clone(),
)),
None => Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())),
},
}
}
#[derive(Deserialize)]
pub struct DocumentParam {
index_uid: String,
document_id: String,
}
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
// this route needs to be before the /documents/{document_id} to match properly
.service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
.service(
web::resource("/{document_id}")
.route(web::get().to(SeqHandler(get_document)))
.route(web::delete().to(SeqHandler(delete_document))),
);
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct GetDocument {
fields: Option<CS<StarOr<String>>>,
}
pub async fn get_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
params: web::Query<GetDocument>,
) -> Result<HttpResponse, ResponseError> {
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
let index = index_scheduler.index(&path.index_uid)?;
let document = retrieve_document(&index, &path.document_id, attributes_to_retrieve)?;
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
pub async fn delete_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
let DocumentParam { document_id, index_uid } = path.into_inner();
let task = KindWithContent::DocumentDeletion { index_uid, documents_ids: vec![document_id] };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct BrowseQuery {
#[serde(default)]
offset: usize,
#[serde(default = "crate::routes::PAGINATION_DEFAULT_LIMIT")]
limit: usize,
fields: Option<CS<StarOr<String>>>,
}
pub async fn get_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let BrowseQuery { limit, offset, fields } = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(&index, offset, limit, attributes_to_retrieve)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
pub primary_key: Option<String>,
}
pub async fn add_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid.into_inner(),
params.primary_key,
body,
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
)
.await?;
Ok(HttpResponse::Accepted().json(task))
}
pub async fn update_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
path: web::Path<String>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let index_uid = path.into_inner();
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.into_inner().primary_key,
body,
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
)
.await?;
Ok(HttpResponse::Accepted().json(task))
}
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: String,
primary_key: Option<String>,
mut body: Payload,
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
Some(("application", "json")) => PayloadType::Json,
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
Some(("text", "csv")) => PayloadType::Csv,
Some((type_, subtype)) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
))
}
None => {
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
}
};
// is your indexUid valid?
let index_uid = IndexUid::try_from(index_uid)?.into_inner();
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
let temp_file = match tempfile() {
Ok(temp_file) => temp_file,
Err(e) => {
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
}
};
let mut buffer = File::from_std(temp_file);
let mut buffer_write_size: usize = 0;
while let Some(bytes) = body.next().await {
let byte = &bytes?;
if byte.is_empty() && buffer_write_size == 0 {
return Err(MeilisearchHttpError::MissingPayload(format));
}
match buffer.write_all(byte).await {
Ok(()) => buffer_write_size += 1,
Err(e) => {
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
}
};
}
if let Err(e) = buffer.flush().await {
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
}
if buffer_write_size == 0 {
return Err(MeilisearchHttpError::MissingPayload(format));
}
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
};
let read_file = buffer.into_std().await;
let documents_count =
tokio::task::spawn_blocking(move || -> Result<_, MeilisearchHttpError> {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?,
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
Ok(documents_count)
})
.await;
let documents_count = match documents_count {
Ok(Ok(documents_count)) => documents_count as u64,
// in this case the file has not possibly be persisted.
Ok(Err(e)) => return Err(e),
Err(e) => {
// Here the file MAY have been persisted or not.
// We don't know thus we ignore the file not found error.
match index_scheduler.delete_update_file(uuid) {
Ok(()) => (),
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {}
Err(e) => {
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
}
}
// We still want to return the original error to the end user.
return Err(e.into());
}
};
let task = KindWithContent::DocumentAdditionOrUpdate {
method,
content_file: uuid,
documents_count,
primary_key,
allow_index_creation,
index_uid,
};
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
Ok(task) => task,
Err(e) => {
index_scheduler.delete_update_file(uuid)?;
return Err(e.into());
}
};
debug!("returns: {:?}", task);
Ok(task.into())
}
pub async fn delete_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<String>,
body: web::Json<Vec<Value>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
let ids = body
.iter()
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
.collect();
let task =
KindWithContent::DocumentDeletion { index_uid: path.into_inner(), documents_ids: ids };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn clear_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<String>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
let task = KindWithContent::DocumentClear { index_uid: path.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
fn all_documents<'a>(
index: &Index,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(index.all_documents(rtxn)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
fn retrieve_documents<S: AsRef<str>>(
index: &Index,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let mut documents = Vec::new();
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
};
documents.push(document);
}
let number_of_documents = index.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
fn retrieve_document<S: AsRef<str>>(
index: &Index,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document, ResponseError> {
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
};
Ok(document)
}

View file

@ -0,0 +1,214 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use meilisearch_types::tasks::KindWithContent;
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use super::{Pagination, SummarizedTaskView};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
pub mod search;
pub mod settings;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(list_indexes))
.route(web::post().to(SeqHandler(create_index))),
)
.service(
web::scope("/{index_uid}")
.service(
web::resource("")
.route(web::get().to(SeqHandler(get_index)))
.route(web::patch().to(SeqHandler(update_index)))
.route(web::delete().to(SeqHandler(delete_index))),
)
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexView {
pub uid: String,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
impl IndexView {
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
let rtxn = index.read_txn()?;
Ok(IndexView {
uid,
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
primary_key: index.primary_key(&rtxn)?.map(String::from),
})
}
}
pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: web::Query<Pagination>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &index_scheduler.filters().search_rules;
let indexes: Vec<_> = index_scheduler.indexes()?;
let indexes = indexes
.into_iter()
.filter(|(name, _)| search_rules.is_index_authorized(name))
.map(|(name, index)| IndexView::new(name, &index))
.collect::<Result<Vec<_>, _>>()?;
let ret = paginate.auto_paginate_sized(indexes.into_iter());
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct IndexCreateRequest {
uid: String,
primary_key: Option<String>,
}
pub async fn create_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: web::Json<IndexCreateRequest>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let uid = IndexUid::try_from(uid)?.into_inner();
let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid);
if allow_index_creation {
analytics.publish(
"Index Created".to_string(),
json!({ "primary_key": primary_key }),
Some(&req),
);
let task = KindWithContent::IndexCreation { index_uid: uid, primary_key };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
Ok(HttpResponse::Accepted().json(task))
} else {
Err(AuthenticationError::InvalidToken.into())
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
#[allow(dead_code)]
pub struct UpdateIndexRequest {
uid: Option<String>,
primary_key: Option<String>,
}
pub async fn get_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!("returns: {:?}", index_view);
Ok(HttpResponse::Ok().json(index_view))
}
pub async fn update_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
path: web::Path<String>,
body: web::Json<UpdateIndexRequest>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let body = body.into_inner();
analytics.publish(
"Index Updated".to_string(),
json!({ "primary_key": body.primary_key}),
Some(&req),
);
let task = KindWithContent::IndexUpdate {
index_uid: path.into_inner(),
primary_key: body.primary_key,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn delete_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get_index_stats(
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": true }), Some(&req));
let stats = IndexStats::new((*index_scheduler).clone(), index_uid.into_inner())?;
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
pub number_of_documents: u64,
pub is_indexing: bool,
pub field_distribution: FieldDistribution,
}
impl IndexStats {
pub fn new(
index_scheduler: Data<IndexScheduler>,
index_uid: String,
) -> Result<Self, ResponseError> {
// we check if there is currently a task processing associated with this index.
let is_processing = index_scheduler.is_index_processing(&index_uid)?;
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
Ok(IndexStats {
number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: is_processing,
field_distribution: index.field_distribution(&rtxn)?,
})
}
}

View file

@ -0,0 +1,225 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::error::ResponseError;
use serde::Deserialize;
use serde_cs::vec::CS;
use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(search_with_url_query)))
.route(web::post().to(SeqHandler(search_with_post))),
);
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQueryGet {
q: Option<String>,
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
offset: usize,
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
limit: usize,
page: Option<usize>,
hits_per_page: Option<usize>,
attributes_to_retrieve: Option<CS<String>>,
attributes_to_crop: Option<CS<String>>,
#[serde(default = "DEFAULT_CROP_LENGTH")]
crop_length: usize,
attributes_to_highlight: Option<CS<String>>,
filter: Option<String>,
sort: Option<String>,
#[serde(default = "Default::default")]
show_matches_position: bool,
facets: Option<CS<String>>,
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
highlight_pre_tag: String,
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
highlight_post_tag: String,
#[serde(default = "DEFAULT_CROP_MARKER")]
crop_marker: String,
#[serde(default)]
matching_strategy: MatchingStrategy,
}
impl From<SearchQueryGet> for SearchQuery {
fn from(other: SearchQueryGet) -> Self {
let filter = match other.filter {
Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v),
_ => Some(Value::String(f)),
},
None => None,
};
Self {
q: other.q,
offset: other.offset,
limit: other.limit,
page: other.page,
hits_per_page: other.hits_per_page,
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
crop_length: other.crop_length,
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
show_matches_position: other.show_matches_position,
facets: other.facets.map(|o| o.into_iter().collect()),
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,
crop_marker: other.crop_marker,
matching_strategy: other.matching_strategy,
}
}
}
/// Incorporate search rules in search query
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
let mut sort_parameters = Vec::new();
let mut merge = false;
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
if current_sort.starts_with("_geoPoint(") {
sort_parameters.push(current_sort.to_string());
merge = true;
} else if merge && !sort_parameters.is_empty() {
let s = sort_parameters.last_mut().unwrap();
s.push(',');
s.push_str(current_sort);
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
merge = false;
}
} else {
sort_parameters.push(current_sort.to_string());
merge = false;
}
}
sort_parameters
}
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<SearchQueryGet>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let mut query: SearchQuery = params.into_inner().into();
// Tenant token search_rules.
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.get_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Json<SearchQuery>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let mut query = params.into_inner();
debug!("search called with params: {:?}", query);
// Tenant token search_rules.
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.post_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_fix_sort_query_parameters() {
let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc");
assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]);
let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc");
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]);
let sort = fix_sort_query_parameters(
"doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc",
);
assert_eq!(
sort,
vec![
"doggo:asc".to_string(),
"_geoPoint(12.45,13.56,2590352):desc".to_string(),
"catto:desc".to_string(),
]
);
let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc");
// This is ugly but eh, I don't want to write a full parser just for this unused route
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]);
}
}

View file

@ -0,0 +1,565 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::{settings, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::SummarizedTaskView;
#[macro_export]
macro_rules! make_setting_route {
($route:literal, $update_verb:ident, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
pub mod $attr {
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData;
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::routes::SummarizedTaskView;
pub async fn delete(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = Settings { $attr: Setting::Reset, ..Default::default() };
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn update(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
body: actix_web::web::Json<Option<$type>>,
req: HttpRequest,
$analytics_var: web::Data<dyn Analytics>,
) -> std::result::Result<HttpResponse, ResponseError> {
let body = body.into_inner();
$analytics(&body, &req);
let new_settings = Settings {
$attr: match body {
Some(inner_body) => Setting::Set(inner_body),
None => Setting::Reset,
},
..Default::default()
};
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_GET }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take();
Ok(HttpResponse::Ok().json(val))
}
pub fn resources() -> Resource {
Resource::new($route)
.route(web::get().to(SeqHandler(get)))
.route(web::$update_verb().to(SeqHandler(update)))
.route(web::delete().to(SeqHandler(delete)))
}
}
};
}
make_setting_route!(
"/filterable-attributes",
put,
std::collections::BTreeSet<String>,
filterable_attributes,
"filterableAttributes",
analytics,
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"FilterableAttributes Updated".to_string(),
json!({
"filterable_attributes": {
"total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
"has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/sortable-attributes",
put,
std::collections::BTreeSet<String>,
sortable_attributes,
"sortableAttributes",
analytics,
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"SortableAttributes Updated".to_string(),
json!({
"sortable_attributes": {
"total": setting.as_ref().map(|sort| sort.len()),
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/displayed-attributes",
put,
Vec<String>,
displayed_attributes,
"displayedAttributes",
analytics,
|displayed: &Option<Vec<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"DisplayedAttributes Updated".to_string(),
json!({
"displayed_attributes": {
"total": displayed.as_ref().map(|displayed| displayed.len()),
"with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/typo-tolerance",
patch,
meilisearch_types::settings::TypoSettings,
typo_tolerance,
"typoTolerance",
analytics,
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"TypoTolerance Updated".to_string(),
json!({
"typo_tolerance": {
"enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
"disable_on_attributes": setting
.as_ref()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": setting
.as_ref()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/searchable-attributes",
put,
Vec<String>,
searchable_attributes,
"searchableAttributes",
analytics,
|setting: &Option<Vec<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"SearchableAttributes Updated".to_string(),
json!({
"searchable_attributes": {
"total": setting.as_ref().map(|searchable| searchable.len()),
"with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/stop-words",
put,
std::collections::BTreeSet<String>,
stop_words,
"stopWords",
analytics,
|stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"StopWords Updated".to_string(),
json!({
"stop_words": {
"total": stop_words.as_ref().map(|stop_words| stop_words.len()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/synonyms",
put,
std::collections::BTreeMap<String, Vec<String>>,
synonyms,
"synonyms",
analytics,
|synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Synonyms Updated".to_string(),
json!({
"synonyms": {
"total": synonyms.as_ref().map(|synonyms| synonyms.len()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/distinct-attribute",
put,
String,
distinct_attribute,
"distinctAttribute",
analytics,
|distinct: &Option<String>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"DistinctAttribute Updated".to_string(),
json!({
"distinct_attribute": {
"set": distinct.is_some(),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/ranking-rules",
put,
Vec<String>,
ranking_rules,
"rankingRules",
analytics,
|setting: &Option<Vec<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"RankingRules Updated".to_string(),
json!({
"ranking_rules": {
"words_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "words")),
"typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "typo")),
"proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "proximity")),
"attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "attribute")),
"sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "sort")),
"exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "exactness")),
"values": setting.as_ref().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::<Vec<_>>().join(", ")),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/faceting",
patch,
meilisearch_types::settings::FacetingSettings,
faceting,
"faceting",
analytics,
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Faceting Updated".to_string(),
json!({
"faceting": {
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/pagination",
patch,
meilisearch_types::settings::PaginationSettings,
pagination,
"pagination",
analytics,
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Pagination Updated".to_string(),
json!({
"pagination": {
"max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
},
}),
Some(req),
);
}
);
macro_rules! generate_configure {
($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) {
use crate::extractors::sequential_extractor::SeqHandler;
cfg.service(
web::resource("")
.route(web::patch().to(SeqHandler(update_all)))
.route(web::get().to(SeqHandler(get_all)))
.route(web::delete().to(SeqHandler(delete_all))))
$(.service($mod::resources()))*;
}
};
}
generate_configure!(
filterable_attributes,
sortable_attributes,
displayed_attributes,
searchable_attributes,
distinct_attribute,
stop_words,
synonyms,
ranking_rules,
typo_tolerance,
pagination,
faceting
);
pub async fn update_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: web::Json<Settings<Unchecked>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = body.into_inner();
analytics.publish(
"Settings Updated".to_string(),
json!({
"ranking_rules": {
"words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "words")),
"typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "typo")),
"proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "proximity")),
"attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "attribute")),
"sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "sort")),
"exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "exactness")),
"values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::<Vec<_>>().join(", ")),
},
"searchable_attributes": {
"total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
"with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
},
"displayed_attributes": {
"total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()),
"with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
},
"sortable_attributes": {
"total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
"has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
},
"filterable_attributes": {
"total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
"has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
},
"distinct_attribute": {
"set": new_settings.distinct_attribute.as_ref().set().is_some()
},
"typo_tolerance": {
"enabled": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.enabled.as_ref().set())
.copied(),
"disable_on_attributes": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
},
"faceting": {
"max_values_per_facet": new_settings.faceting
.as_ref()
.set()
.and_then(|s| s.max_values_per_facet.as_ref().set()),
},
"pagination": {
"max_total_hits": new_settings.pagination
.as_ref()
.set()
.and_then(|s| s.max_total_hits.as_ref().set()),
},
"stop_words": {
"total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()),
},
"synonyms": {
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
},
}),
Some(&req),
);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings))
}
pub async fn delete_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = Settings::cleared().into_unchecked();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}