mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Move crates under a sub folder to clean up the code
This commit is contained in:
parent
30f3c30389
commit
9c1e54a2c8
1062 changed files with 19 additions and 20 deletions
109
crates/meilisearch/src/analytics/mock_analytics.rs
Normal file
109
crates/meilisearch/src/analytics/mock_analytics.rs
Normal file
|
@ -0,0 +1,109 @@
|
|||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use meilisearch_types::InstanceUid;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||
use crate::Opt;
|
||||
|
||||
pub struct MockAnalytics {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SearchAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl SearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SimilarAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl SimilarAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MultiSearchAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl MultiSearchAggregator {
|
||||
pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FacetSearchAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl FacetSearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
impl MockAnalytics {
|
||||
#[allow(clippy::new_ret_no_self)]
|
||||
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||
let instance_uid = find_user_id(&opt.db_path);
|
||||
Arc::new(Self { instance_uid })
|
||||
}
|
||||
}
|
||||
|
||||
impl Analytics for MockAnalytics {
|
||||
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
|
||||
self.instance_uid.as_ref()
|
||||
}
|
||||
|
||||
// These methods are noop and should be optimized out
|
||||
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
||||
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
|
||||
fn add_documents(
|
||||
&self,
|
||||
_documents_query: &UpdateDocumentsQuery,
|
||||
_index_creation: bool,
|
||||
_request: &HttpRequest,
|
||||
) {
|
||||
}
|
||||
fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
|
||||
fn update_documents(
|
||||
&self,
|
||||
_documents_query: &UpdateDocumentsQuery,
|
||||
_index_creation: bool,
|
||||
_request: &HttpRequest,
|
||||
) {
|
||||
}
|
||||
fn update_documents_by_function(
|
||||
&self,
|
||||
_documents_query: &DocumentEditionByFunction,
|
||||
_index_creation: bool,
|
||||
_request: &HttpRequest,
|
||||
) {
|
||||
}
|
||||
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
|
||||
}
|
137
crates/meilisearch/src/analytics/mod.rs
Normal file
137
crates/meilisearch/src/analytics/mod.rs
Normal file
|
@ -0,0 +1,137 @@
|
|||
mod mock_analytics;
|
||||
#[cfg(feature = "analytics")]
|
||||
mod segment_analytics;
|
||||
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use meilisearch_types::InstanceUid;
|
||||
pub use mock_analytics::MockAnalytics;
|
||||
use once_cell::sync::Lazy;
|
||||
use platform_dirs::AppDirs;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
|
||||
|
||||
// if the analytics feature is disabled
|
||||
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type SimilarAggregator = mock_analytics::SimilarAggregator;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
||||
#[cfg(not(feature = "analytics"))]
|
||||
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
|
||||
|
||||
// if the feature analytics is enabled we use the real analytics
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type SimilarAggregator = segment_analytics::SimilarAggregator;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
||||
#[cfg(feature = "analytics")]
|
||||
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
|
||||
|
||||
/// The Meilisearch config dir:
|
||||
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
||||
/// `~/Library/ApplicationSupport` on macOS.
|
||||
/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows.
|
||||
static MEILISEARCH_CONFIG_PATH: Lazy<Option<PathBuf>> =
|
||||
Lazy::new(|| AppDirs::new(Some("Meilisearch"), false).map(|appdir| appdir.config_dir));
|
||||
|
||||
fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
|
||||
db_path
|
||||
.canonicalize()
|
||||
.ok()
|
||||
.map(|path| path.join("instance-uid").display().to_string().replace('/', "-"))
|
||||
.zip(MEILISEARCH_CONFIG_PATH.as_ref())
|
||||
.map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-')))
|
||||
}
|
||||
|
||||
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
|
||||
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
|
||||
fs::read_to_string(db_path.join("instance-uid"))
|
||||
.ok()
|
||||
.or_else(|| fs::read_to_string(config_user_id_path(db_path)?).ok())
|
||||
.and_then(|uid| InstanceUid::from_str(&uid).ok())
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DocumentDeletionKind {
|
||||
PerDocumentId,
|
||||
ClearAll,
|
||||
PerBatch,
|
||||
PerFilter,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DocumentFetchKind {
|
||||
PerDocumentId { retrieve_vectors: bool },
|
||||
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
|
||||
}
|
||||
|
||||
pub trait Analytics: Sync + Send {
|
||||
fn instance_uid(&self) -> Option<&InstanceUid>;
|
||||
|
||||
/// The method used to publish most analytics that do not need to be batched every hours
|
||||
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
|
||||
|
||||
/// This method should be called to aggregate a get search
|
||||
fn get_search(&self, aggregate: SearchAggregator);
|
||||
|
||||
/// This method should be called to aggregate a post search
|
||||
fn post_search(&self, aggregate: SearchAggregator);
|
||||
|
||||
/// This method should be called to aggregate a get similar request
|
||||
fn get_similar(&self, aggregate: SimilarAggregator);
|
||||
|
||||
/// This method should be called to aggregate a post similar request
|
||||
fn post_similar(&self, aggregate: SimilarAggregator);
|
||||
|
||||
/// This method should be called to aggregate a post array of searches
|
||||
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
||||
|
||||
/// This method should be called to aggregate post facet values searches
|
||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
|
||||
|
||||
// this method should be called to aggregate an add documents request
|
||||
fn add_documents(
|
||||
&self,
|
||||
documents_query: &UpdateDocumentsQuery,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
);
|
||||
|
||||
// this method should be called to aggregate a fetch documents request
|
||||
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
|
||||
|
||||
// this method should be called to aggregate a fetch documents request
|
||||
fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
|
||||
|
||||
// this method should be called to aggregate a add documents request
|
||||
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
|
||||
|
||||
// this method should be called to batch an update documents request
|
||||
fn update_documents(
|
||||
&self,
|
||||
documents_query: &UpdateDocumentsQuery,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
);
|
||||
|
||||
// this method should be called to batch an update documents by function request
|
||||
fn update_documents_by_function(
|
||||
&self,
|
||||
documents_query: &DocumentEditionByFunction,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
);
|
||||
}
|
1994
crates/meilisearch/src/analytics/segment_analytics.rs
Normal file
1994
crates/meilisearch/src/analytics/segment_analytics.rs
Normal file
File diff suppressed because it is too large
Load diff
227
crates/meilisearch/src/error.rs
Normal file
227
crates/meilisearch/src/error.rs
Normal file
|
@ -0,0 +1,227 @@
|
|||
use actix_web as aweb;
|
||||
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
||||
use byte_unit::{Byte, UnitType};
|
||||
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
|
||||
use meilisearch_types::milli::OrderBy;
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
|
||||
MissingContentType(Vec<String>),
|
||||
#[error("The `/logs/stream` route is currently in use by someone else.")]
|
||||
AlreadyUsedLogRoute,
|
||||
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
|
||||
CsvDelimiterWithWrongContentType(String),
|
||||
#[error(
|
||||
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
|
||||
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
|
||||
)]
|
||||
InvalidContentType(String, Vec<String>),
|
||||
#[error("Document `{0}` not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("Sending an empty filter is forbidden.")]
|
||||
EmptyFilter,
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
#[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
|
||||
FederationOptionsInNonFederatedRequest(usize),
|
||||
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")]
|
||||
PaginationInFederatedQuery(usize, &'static str),
|
||||
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
|
||||
FacetsInFederatedQuery(usize, String, Vec<String>),
|
||||
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
|
||||
InconsistentFacetOrder {
|
||||
facet: String,
|
||||
previous_facet_order: OrderBy,
|
||||
previous_uid: String,
|
||||
index_facet_order: OrderBy,
|
||||
current_uid: String,
|
||||
},
|
||||
#[error("A {0} payload is missing.")]
|
||||
MissingPayload(PayloadType),
|
||||
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
|
||||
TooManySearchRequests(usize),
|
||||
#[error("Internal error: Search limiter is down.")]
|
||||
SearchLimiterIsDown,
|
||||
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
|
||||
PayloadTooLarge(usize),
|
||||
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
|
||||
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
||||
)]
|
||||
SwapIndexPayloadWrongLength(Vec<IndexUid>),
|
||||
#[error(transparent)]
|
||||
IndexUid(#[from] IndexUidFormatError),
|
||||
#[error(transparent)]
|
||||
SerdeJson(#[from] serde_json::Error),
|
||||
#[error(transparent)]
|
||||
HeedError(#[from] meilisearch_types::heed::Error),
|
||||
#[error(transparent)]
|
||||
IndexScheduler(#[from] index_scheduler::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] meilisearch_types::milli::Error),
|
||||
#[error(transparent)]
|
||||
Payload(#[from] PayloadError),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
|
||||
MissingSearchHybrid,
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
|
||||
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
|
||||
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
|
||||
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
|
||||
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
|
||||
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter,
|
||||
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
|
||||
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
|
||||
MeilisearchHttpError::TooManySearchRequests(_) => Code::TooManySearchRequests,
|
||||
MeilisearchHttpError::SearchLimiterIsDown => Code::Internal,
|
||||
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
|
||||
MeilisearchHttpError::IndexUid(e) => e.error_code(),
|
||||
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
||||
MeilisearchHttpError::Milli(e) => e.error_code(),
|
||||
MeilisearchHttpError::Payload(e) => e.error_code(),
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
|
||||
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
|
||||
Code::InvalidMultiSearchFederationOptions
|
||||
}
|
||||
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
|
||||
Code::InvalidMultiSearchQueryPagination
|
||||
}
|
||||
MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets,
|
||||
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
||||
Code::InvalidMultiSearchFacetOrder
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MeilisearchHttpError> for aweb::Error {
|
||||
fn from(other: MeilisearchHttpError) -> Self {
|
||||
aweb::Error::from(ResponseError::from(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
|
||||
fn from(error: aweb::error::PayloadError) -> Self {
|
||||
match error {
|
||||
aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
|
||||
PayloadError::Payload(ActixPayloadError::IncompleteError),
|
||||
),
|
||||
_ => MeilisearchHttpError::Payload(PayloadError::Payload(
|
||||
ActixPayloadError::OtherError(error),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ActixPayloadError {
|
||||
#[error("The provided payload is incomplete and cannot be parsed")]
|
||||
IncompleteError,
|
||||
#[error(transparent)]
|
||||
OtherError(aweb::error::PayloadError),
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum PayloadError {
|
||||
#[error(transparent)]
|
||||
Payload(ActixPayloadError),
|
||||
#[error(transparent)]
|
||||
Json(JsonPayloadError),
|
||||
#[error(transparent)]
|
||||
Query(QueryPayloadError),
|
||||
#[error("The json payload provided is malformed. `{0}`.")]
|
||||
MalformedPayload(serde_json::error::Error),
|
||||
#[error("A json payload is missing.")]
|
||||
MissingPayload,
|
||||
#[error("Error while receiving the playload. `{0}`.")]
|
||||
ReceivePayload(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
}
|
||||
|
||||
impl ErrorCode for PayloadError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
PayloadError::Payload(e) => match e {
|
||||
ActixPayloadError::IncompleteError => Code::BadRequest,
|
||||
ActixPayloadError::OtherError(error) => match error {
|
||||
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
|
||||
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
||||
aweb::error::PayloadError::UnknownLength => Code::Internal,
|
||||
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
|
||||
aweb::error::PayloadError::Io(_) => Code::Internal,
|
||||
_ => todo!(),
|
||||
},
|
||||
},
|
||||
PayloadError::Json(err) => match err {
|
||||
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
|
||||
JsonPayloadError::ContentType => Code::UnsupportedMediaType,
|
||||
JsonPayloadError::Payload(aweb::error::PayloadError::Overflow) => {
|
||||
Code::PayloadTooLarge
|
||||
}
|
||||
JsonPayloadError::Payload(_) => Code::BadRequest,
|
||||
JsonPayloadError::Deserialize(_) => Code::BadRequest,
|
||||
JsonPayloadError::Serialize(_) => Code::Internal,
|
||||
_ => Code::Internal,
|
||||
},
|
||||
PayloadError::Query(err) => match err {
|
||||
QueryPayloadError::Deserialize(_) => Code::BadRequest,
|
||||
_ => Code::Internal,
|
||||
},
|
||||
PayloadError::MissingPayload => Code::MissingPayload,
|
||||
PayloadError::MalformedPayload(_) => Code::MalformedPayload,
|
||||
PayloadError::ReceivePayload(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JsonPayloadError> for PayloadError {
|
||||
fn from(other: JsonPayloadError) -> Self {
|
||||
match other {
|
||||
JsonPayloadError::Deserialize(e)
|
||||
if e.classify() == serde_json::error::Category::Eof
|
||||
&& e.line() == 1
|
||||
&& e.column() == 0 =>
|
||||
{
|
||||
Self::MissingPayload
|
||||
}
|
||||
JsonPayloadError::Deserialize(e)
|
||||
if e.classify() != serde_json::error::Category::Data =>
|
||||
{
|
||||
Self::MalformedPayload(e)
|
||||
}
|
||||
_ => Self::Json(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<QueryPayloadError> for PayloadError {
|
||||
fn from(other: QueryPayloadError) -> Self {
|
||||
Self::Query(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PayloadError> for aweb::Error {
|
||||
fn from(other: PayloadError) -> Self {
|
||||
aweb::Error::from(ResponseError::from(other))
|
||||
}
|
||||
}
|
25
crates/meilisearch/src/extractors/authentication/error.rs
Normal file
25
crates/meilisearch/src/extractors/authentication/error.rs
Normal file
|
@ -0,0 +1,25 @@
|
|||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum AuthenticationError {
|
||||
#[error("The Authorization header is missing. It must use the bearer authorization method.")]
|
||||
MissingAuthorizationHeader,
|
||||
#[error("The provided API key is invalid.")]
|
||||
InvalidToken,
|
||||
// Triggered on configuration error.
|
||||
#[error("An internal error has occurred. `Irretrievable state`.")]
|
||||
IrretrievableState,
|
||||
#[error("Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.")]
|
||||
MissingMasterKey,
|
||||
}
|
||||
|
||||
impl ErrorCode for AuthenticationError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
AuthenticationError::MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
|
||||
AuthenticationError::InvalidToken => Code::InvalidApiKey,
|
||||
AuthenticationError::IrretrievableState => Code::Internal,
|
||||
AuthenticationError::MissingMasterKey => Code::MissingMasterKey,
|
||||
}
|
||||
}
|
||||
}
|
341
crates/meilisearch/src/extractors/authentication/mod.rs
Normal file
341
crates/meilisearch/src/extractors/authentication/mod.rs
Normal file
|
@ -0,0 +1,341 @@
|
|||
mod error;
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::pin::Pin;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::FromRequest;
|
||||
pub use error::AuthenticationError;
|
||||
use futures::future::err;
|
||||
use futures::Future;
|
||||
use meilisearch_auth::{AuthController, AuthFilter};
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
|
||||
use self::policies::AuthError;
|
||||
|
||||
pub struct GuardedData<P, D> {
|
||||
data: D,
|
||||
filters: AuthFilter,
|
||||
_marker: PhantomData<P>,
|
||||
}
|
||||
|
||||
impl<P, D> GuardedData<P, D> {
|
||||
pub fn filters(&self) -> &AuthFilter {
|
||||
&self.filters
|
||||
}
|
||||
|
||||
async fn auth_bearer(
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
data: Option<D>,
|
||||
) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
let missing_master_key = auth.get_master_key().is_none();
|
||||
|
||||
match Self::authenticate(auth, token, index).await? {
|
||||
Ok(filters) => match data {
|
||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||
},
|
||||
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn auth_token(auth: Data<AuthController>, data: Option<D>) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
let missing_master_key = auth.get_master_key().is_none();
|
||||
|
||||
match Self::authenticate(auth, String::new(), None).await? {
|
||||
Ok(filters) => match data {
|
||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||
},
|
||||
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn authenticate(
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
) -> Result<Result<AuthFilter, AuthError>, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref()))
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))
|
||||
}
|
||||
}
|
||||
|
||||
impl<P, D> Deref for GuardedData<P, D> {
|
||||
type Target = D;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D> {
|
||||
type Error = ResponseError;
|
||||
|
||||
type Future = Pin<Box<dyn Future<Output = Result<Self, Self::Error>>>>;
|
||||
|
||||
fn from_request(
|
||||
req: &actix_web::HttpRequest,
|
||||
_payload: &mut actix_web::dev::Payload,
|
||||
) -> Self::Future {
|
||||
match req.app_data::<Data<AuthController>>().cloned() {
|
||||
Some(auth) => match req
|
||||
.headers()
|
||||
.get("Authorization")
|
||||
.map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' '))
|
||||
{
|
||||
Some(mut type_token) => match type_token.next() {
|
||||
Some("Bearer") => {
|
||||
// TODO: find a less hardcoded way?
|
||||
let index = req.match_info().get("index_uid");
|
||||
match type_token.next() {
|
||||
Some(token) => Box::pin(Self::auth_bearer(
|
||||
auth,
|
||||
token.to_string(),
|
||||
index.map(String::from),
|
||||
req.app_data::<D>().cloned(),
|
||||
)),
|
||||
None => Box::pin(err(AuthenticationError::InvalidToken.into())),
|
||||
}
|
||||
}
|
||||
_otherwise => {
|
||||
Box::pin(err(AuthenticationError::MissingAuthorizationHeader.into()))
|
||||
}
|
||||
},
|
||||
None => Box::pin(Self::auth_token(auth, req.app_data::<D>().cloned())),
|
||||
},
|
||||
None => Box::pin(err(AuthenticationError::IrretrievableState.into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Policy {
|
||||
fn authenticate(
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Result<AuthFilter, policies::AuthError>;
|
||||
}
|
||||
|
||||
pub mod policies {
|
||||
use actix_web::web::Data;
|
||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
// reexport actions in policies in order to be used in routes configuration.
|
||||
pub use meilisearch_types::keys::{actions, Action};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::extractors::authentication::Policy;
|
||||
|
||||
enum TenantTokenOutcome {
|
||||
NotATenantToken,
|
||||
Valid(Uuid, SearchRules),
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum AuthError {
|
||||
#[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
|
||||
ExpiredTenantToken { exp: i64, now: i64 },
|
||||
#[error("The provided API key is invalid.")]
|
||||
InvalidApiKey,
|
||||
#[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
|
||||
TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
|
||||
#[error(
|
||||
"The API key used to generate this tenant token cannot acces the index `{index}`."
|
||||
)]
|
||||
TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
|
||||
#[error(
|
||||
"The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
|
||||
)]
|
||||
ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
|
||||
#[error("The provided tenant token is invalid.")]
|
||||
InvalidTenantToken,
|
||||
#[error("Could not decode tenant token, {0}.")]
|
||||
CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
|
||||
#[error("Invalid action `{0}`.")]
|
||||
InternalInvalidAction(u8),
|
||||
}
|
||||
|
||||
impl From<jsonwebtoken::errors::Error> for AuthError {
|
||||
fn from(error: jsonwebtoken::errors::Error) -> Self {
|
||||
use jsonwebtoken::errors::ErrorKind;
|
||||
|
||||
match error.kind() {
|
||||
ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
|
||||
_ => AuthError::CouldNotDecodeTenantToken(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for AuthError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
AuthError::InternalInvalidAction(_) => Code::Internal,
|
||||
_ => Code::InvalidApiKey,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn tenant_token_validation() -> Validation {
|
||||
let mut validation = Validation::default();
|
||||
validation.validate_exp = false;
|
||||
validation.required_spec_claims.remove("exp");
|
||||
validation.algorithms = vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512];
|
||||
validation
|
||||
}
|
||||
|
||||
/// Extracts the key id used to sign the payload, without performing any validation.
|
||||
fn extract_key_id(token: &str) -> Result<Uuid, AuthError> {
|
||||
let mut validation = tenant_token_validation();
|
||||
validation.insecure_disable_signature_validation();
|
||||
let dummy_key = DecodingKey::from_secret(b"secret");
|
||||
let token_data = decode::<Claims>(token, &dummy_key, &validation)?;
|
||||
|
||||
// get token fields without validating it.
|
||||
let Claims { api_key_uid, .. } = token_data.claims;
|
||||
Ok(api_key_uid)
|
||||
}
|
||||
|
||||
fn is_keys_action(action: u8) -> bool {
|
||||
use actions::*;
|
||||
matches!(action, KEYS_GET | KEYS_CREATE | KEYS_UPDATE | KEYS_DELETE)
|
||||
}
|
||||
|
||||
pub struct ActionPolicy<const A: u8>;
|
||||
|
||||
impl<const A: u8> Policy for ActionPolicy<A> {
|
||||
/// Attempts to grant authentication from a bearer token (that can be a tenant token or an API key), the requested Action,
|
||||
/// and a list of requested indexes.
|
||||
///
|
||||
/// If the bearer token is not allowed for the specified indexes and action, returns `None`.
|
||||
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
|
||||
/// (that may contain more indexes than requested).
|
||||
fn authenticate(
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Result<AuthFilter, AuthError> {
|
||||
// authenticate if token is the master key.
|
||||
// Without a master key, all routes are accessible except the key-related routes.
|
||||
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
|
||||
return Ok(AuthFilter::default());
|
||||
}
|
||||
|
||||
let (key_uuid, search_rules) =
|
||||
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
|
||||
Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => {
|
||||
(key_uuid, Some(search_rules))
|
||||
}
|
||||
Ok(TenantTokenOutcome::NotATenantToken)
|
||||
| Err(AuthError::InvalidTenantToken) => (
|
||||
auth.get_optional_uid_from_encoded_key(token.as_bytes())
|
||||
.map_err(|_e| AuthError::InvalidApiKey)?
|
||||
.ok_or(AuthError::InvalidApiKey)?,
|
||||
None,
|
||||
),
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
// check that the indexes are allowed
|
||||
let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?;
|
||||
let auth_filter = auth
|
||||
.get_key_filters(key_uuid, search_rules)
|
||||
.map_err(|_e| AuthError::InvalidApiKey)?;
|
||||
|
||||
// First check if the index is authorized in the tenant token, this is a public
|
||||
// information, we can return a nice error message.
|
||||
if let Some(index) = index {
|
||||
if !auth_filter.tenant_token_is_index_authorized(index) {
|
||||
return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
|
||||
index: index.to_string(),
|
||||
allowed: auth_filter.tenant_token_list_index_authorized(),
|
||||
});
|
||||
}
|
||||
if !auth_filter.api_key_is_index_authorized(index) {
|
||||
if auth_filter.is_tenant_token() {
|
||||
// If the error comes from a tenant token we cannot share the list
|
||||
// of authorized indexes in the API key. This is not public information.
|
||||
return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
|
||||
index: index.to_string(),
|
||||
});
|
||||
} else {
|
||||
// Otherwise we can share the list
|
||||
// of authorized indexes in the API key.
|
||||
return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
|
||||
index: index.to_string(),
|
||||
allowed: auth_filter.api_key_list_index_authorized(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
|
||||
return Ok(auth_filter);
|
||||
}
|
||||
|
||||
Err(AuthError::InvalidApiKey)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const A: u8> ActionPolicy<A> {
|
||||
fn authenticate_tenant_token(
|
||||
auth: &AuthController,
|
||||
token: &str,
|
||||
) -> Result<TenantTokenOutcome, AuthError> {
|
||||
// Only search action can be accessed by a tenant token.
|
||||
if A != actions::SEARCH {
|
||||
return Ok(TenantTokenOutcome::NotATenantToken);
|
||||
}
|
||||
|
||||
let uid = extract_key_id(token)?;
|
||||
|
||||
// Check if tenant token is valid.
|
||||
let key = if let Some(key) = auth.generate_key(uid) {
|
||||
key
|
||||
} else {
|
||||
return Err(AuthError::InvalidTenantToken);
|
||||
};
|
||||
|
||||
let data = decode::<Claims>(
|
||||
token,
|
||||
&DecodingKey::from_secret(key.as_bytes()),
|
||||
&tenant_token_validation(),
|
||||
)?;
|
||||
|
||||
// Check if token is expired.
|
||||
if let Some(exp) = data.claims.exp {
|
||||
let now = OffsetDateTime::now_utc().unix_timestamp();
|
||||
if now > exp {
|
||||
return Err(AuthError::ExpiredTenantToken { exp, now });
|
||||
}
|
||||
}
|
||||
|
||||
Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Claims {
|
||||
search_rules: SearchRules,
|
||||
exp: Option<i64>,
|
||||
api_key_uid: Uuid,
|
||||
}
|
||||
}
|
4
crates/meilisearch/src/extractors/mod.rs
Normal file
4
crates/meilisearch/src/extractors/mod.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
pub mod payload;
|
||||
#[macro_use]
|
||||
pub mod authentication;
|
||||
pub mod sequential_extractor;
|
110
crates/meilisearch/src/extractors/payload.rs
Normal file
110
crates/meilisearch/src/extractors/payload.rs
Normal file
|
@ -0,0 +1,110 @@
|
|||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use actix_http::encoding::Decoder as Decompress;
|
||||
use actix_web::{dev, web, FromRequest, HttpRequest};
|
||||
use futures::future::{ready, Ready};
|
||||
use futures::Stream;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
pub struct Payload {
|
||||
payload: Decompress<dev::Payload>,
|
||||
limit: usize,
|
||||
remaining: usize,
|
||||
}
|
||||
|
||||
pub struct PayloadConfig {
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
impl PayloadConfig {
|
||||
pub fn new(limit: usize) -> Self {
|
||||
Self { limit }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PayloadConfig {
|
||||
fn default() -> Self {
|
||||
Self { limit: 256 * 1024 }
|
||||
}
|
||||
}
|
||||
|
||||
impl FromRequest for Payload {
|
||||
type Error = MeilisearchHttpError;
|
||||
|
||||
type Future = Ready<Result<Payload, Self::Error>>;
|
||||
|
||||
#[inline]
|
||||
fn from_request(req: &HttpRequest, payload: &mut dev::Payload) -> Self::Future {
|
||||
let limit = req
|
||||
.app_data::<PayloadConfig>()
|
||||
.map(|c| c.limit)
|
||||
.unwrap_or(PayloadConfig::default().limit);
|
||||
ready(Ok(Payload {
|
||||
payload: Decompress::from_headers(payload.take(), req.headers()),
|
||||
limit,
|
||||
remaining: limit,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for Payload {
|
||||
type Item = Result<web::Bytes, MeilisearchHttpError>;
|
||||
|
||||
#[inline]
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.payload).poll_next(cx) {
|
||||
Poll::Ready(Some(result)) => match result {
|
||||
Ok(bytes) => match self.remaining.checked_sub(bytes.len()) {
|
||||
Some(new_limit) => {
|
||||
self.remaining = new_limit;
|
||||
Poll::Ready(Some(Ok(bytes)))
|
||||
}
|
||||
None => {
|
||||
Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge(self.limit))))
|
||||
}
|
||||
},
|
||||
x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))),
|
||||
},
|
||||
otherwise => otherwise.map(|o| o.map(|o| o.map_err(MeilisearchHttpError::from))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use actix_http::encoding::Decoder as Decompress;
|
||||
use actix_http::BoxedPayloadStream;
|
||||
use bytes::Bytes;
|
||||
use futures_util::StreamExt;
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn payload_to_large() {
|
||||
let stream = futures::stream::iter(vec![
|
||||
Ok(Bytes::from("1")),
|
||||
Ok(Bytes::from("2")),
|
||||
Ok(Bytes::from("3")),
|
||||
Ok(Bytes::from("4")),
|
||||
]);
|
||||
let boxed_stream: BoxedPayloadStream = Box::pin(stream);
|
||||
let actix_payload = dev::Payload::from(boxed_stream);
|
||||
|
||||
let payload = Payload {
|
||||
limit: 3,
|
||||
remaining: 3,
|
||||
payload: Decompress::new(actix_payload, actix_http::ContentEncoding::Identity),
|
||||
};
|
||||
|
||||
let mut enumerated_payload_stream = payload.enumerate();
|
||||
|
||||
while let Some((idx, chunk)) = enumerated_payload_stream.next().await {
|
||||
if idx == 3 {
|
||||
snapshot!(chunk.unwrap_err(), @"The provided payload reached the size limit. The maximum accepted payload size is 3 B.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
152
crates/meilisearch/src/extractors/sequential_extractor.rs
Normal file
152
crates/meilisearch/src/extractors/sequential_extractor.rs
Normal file
|
@ -0,0 +1,152 @@
|
|||
#![allow(non_snake_case)]
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::task::Poll;
|
||||
|
||||
use actix_web::dev::Payload;
|
||||
use actix_web::{FromRequest, Handler, HttpRequest};
|
||||
use pin_project_lite::pin_project;
|
||||
|
||||
/// `SeqHandler` is an actix `Handler` that enforces that extractors errors are returned in the
|
||||
/// same order as they are defined in the wrapped handler. This is needed because, by default, actix
|
||||
/// resolves the extractors concurrently, whereas we always need the authentication extractor to
|
||||
/// throw first.
|
||||
#[derive(Clone)]
|
||||
pub struct SeqHandler<H>(pub H);
|
||||
|
||||
pub struct SeqFromRequest<T>(T);
|
||||
|
||||
/// This macro implements `FromRequest` for arbitrary arity handler, except for one, which is
|
||||
/// useless anyway.
|
||||
macro_rules! gen_seq {
|
||||
($ty:ident; $($T:ident)+) => {
|
||||
pin_project! {
|
||||
pub struct $ty<$($T: FromRequest), +> {
|
||||
$(
|
||||
#[pin]
|
||||
$T: ExtractFuture<$T::Future, $T, $T::Error>,
|
||||
)+
|
||||
}
|
||||
}
|
||||
|
||||
impl<$($T: FromRequest), +> Future for $ty<$($T),+> {
|
||||
type Output = Result<SeqFromRequest<($($T),+)>, actix_web::Error>;
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> {
|
||||
let mut this = self.project();
|
||||
|
||||
let mut count_fut = 0;
|
||||
let mut count_finished = 0;
|
||||
|
||||
$(
|
||||
count_fut += 1;
|
||||
match this.$T.as_mut().project() {
|
||||
ExtractProj::Future { fut } => match fut.poll(cx) {
|
||||
Poll::Ready(Ok(output)) => {
|
||||
count_finished += 1;
|
||||
let _ = this
|
||||
.$T
|
||||
.as_mut()
|
||||
.project_replace(ExtractFuture::Done { output });
|
||||
}
|
||||
Poll::Ready(Err(error)) => {
|
||||
count_finished += 1;
|
||||
let _ = this
|
||||
.$T
|
||||
.as_mut()
|
||||
.project_replace(ExtractFuture::Error { error });
|
||||
}
|
||||
Poll::Pending => (),
|
||||
},
|
||||
ExtractProj::Done { .. } => count_finished += 1,
|
||||
ExtractProj::Error { .. } => {
|
||||
// short circuit if all previous are finished and we had an error.
|
||||
if count_finished == count_fut {
|
||||
match this.$T.project_replace(ExtractFuture::Empty) {
|
||||
ExtractReplaceProj::Error { error } => {
|
||||
return Poll::Ready(Err(error.into()))
|
||||
}
|
||||
_ => unreachable!("Invalid future state"),
|
||||
}
|
||||
} else {
|
||||
count_finished += 1;
|
||||
}
|
||||
}
|
||||
ExtractProj::Empty => unreachable!("From request polled after being finished. {}", stringify!($T)),
|
||||
}
|
||||
)+
|
||||
|
||||
if count_fut == count_finished {
|
||||
let result = (
|
||||
$(
|
||||
match this.$T.project_replace(ExtractFuture::Empty) {
|
||||
ExtractReplaceProj::Done { output } => output,
|
||||
ExtractReplaceProj::Error { error } => return Poll::Ready(Err(error.into())),
|
||||
_ => unreachable!("Invalid future state"),
|
||||
},
|
||||
)+
|
||||
);
|
||||
|
||||
Poll::Ready(Ok(SeqFromRequest(result)))
|
||||
} else {
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<$($T: FromRequest,)+> FromRequest for SeqFromRequest<($($T,)+)> {
|
||||
type Error = actix_web::Error;
|
||||
|
||||
type Future = $ty<$($T),+>;
|
||||
|
||||
fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future {
|
||||
$ty {
|
||||
$(
|
||||
$T: ExtractFuture::Future {
|
||||
fut: $T::from_request(req, payload),
|
||||
},
|
||||
)+
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Han, $($T: FromRequest),+> Handler<SeqFromRequest<($($T),+)>> for SeqHandler<Han>
|
||||
where
|
||||
Han: Handler<($($T),+)>,
|
||||
{
|
||||
type Output = Han::Output;
|
||||
type Future = Han::Future;
|
||||
|
||||
fn call(&self, args: SeqFromRequest<($($T),+)>) -> Self::Future {
|
||||
self.0.call(args.0)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Not working for a single argument, but then, it is not really necessary.
|
||||
// gen_seq! { SeqFromRequestFut1; A }
|
||||
gen_seq! { SeqFromRequestFut2; A B }
|
||||
gen_seq! { SeqFromRequestFut3; A B C }
|
||||
gen_seq! { SeqFromRequestFut4; A B C D }
|
||||
gen_seq! { SeqFromRequestFut5; A B C D E }
|
||||
gen_seq! { SeqFromRequestFut6; A B C D E F }
|
||||
gen_seq! { SeqFromRequestFut7; A B C D E F G }
|
||||
|
||||
pin_project! {
|
||||
#[project = ExtractProj]
|
||||
#[project_replace = ExtractReplaceProj]
|
||||
enum ExtractFuture<Fut, Res, Err> {
|
||||
Future {
|
||||
#[pin]
|
||||
fut: Fut,
|
||||
},
|
||||
Done {
|
||||
output: Res,
|
||||
},
|
||||
Error {
|
||||
error: Err,
|
||||
},
|
||||
Empty,
|
||||
}
|
||||
}
|
545
crates/meilisearch/src/lib.rs
Normal file
545
crates/meilisearch/src/lib.rs
Normal file
|
@ -0,0 +1,545 @@
|
|||
#![allow(rustdoc::private_intra_doc_links)]
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod analytics;
|
||||
#[macro_use]
|
||||
pub mod extractors;
|
||||
pub mod metrics;
|
||||
pub mod middleware;
|
||||
pub mod option;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
pub mod search_queue;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, BufWriter};
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_cors::Cors;
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||
use actix_web::error::JsonPayloadError;
|
||||
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest};
|
||||
use analytics::Analytics;
|
||||
use anyhow::bail;
|
||||
use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::versioning::{check_version_file, create_current_version_file};
|
||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||
pub use option::Opt;
|
||||
use option::ScheduleSnapshot;
|
||||
use search_queue::SearchQueue;
|
||||
use tracing::{error, info_span};
|
||||
use tracing_subscriber::filter::Targets;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
/// Default number of simultaneously opened indexes.
|
||||
///
|
||||
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
|
||||
///
|
||||
/// Lower for Windows that dedicates a smaller virtual address space to processes.
|
||||
///
|
||||
/// The value was chosen this way:
|
||||
///
|
||||
/// - Windows provides a small virtual address space of about 10TiB to processes.
|
||||
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
|
||||
#[cfg(windows)]
|
||||
const DEFAULT_INDEX_COUNT: usize = 4;
|
||||
|
||||
/// Default number of simultaneously opened indexes.
|
||||
///
|
||||
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
|
||||
///
|
||||
/// The higher, the better for avoiding reopening indexes.
|
||||
///
|
||||
/// The value was chosen this way:
|
||||
///
|
||||
/// - Opening an index consumes a file descriptor.
|
||||
/// - The default on many unices is about 256 file descriptors for a process.
|
||||
/// - 100 is a little bit less than half this value.
|
||||
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
|
||||
#[cfg(not(windows))]
|
||||
const DEFAULT_INDEX_COUNT: usize = 20;
|
||||
|
||||
/// Check if a db is empty. It does not provide any information on the
|
||||
/// validity of the data in it.
|
||||
/// We consider a database as non empty when it's a non empty directory.
|
||||
fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
let db_path = db_path.as_ref();
|
||||
|
||||
if !db_path.exists() {
|
||||
true
|
||||
// if we encounter an error or if the db is a file we consider the db non empty
|
||||
} else if let Ok(dir) = db_path.read_dir() {
|
||||
dir.count() == 0
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
|
||||
pub type LogRouteHandle =
|
||||
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
|
||||
|
||||
pub type LogRouteType = tracing_subscriber::filter::Filtered<
|
||||
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
|
||||
Targets,
|
||||
tracing_subscriber::Registry,
|
||||
>;
|
||||
|
||||
pub type SubscriberForSecondLayer = tracing_subscriber::layer::Layered<
|
||||
tracing_subscriber::reload::Layer<LogRouteType, tracing_subscriber::Registry>,
|
||||
tracing_subscriber::Registry,
|
||||
>;
|
||||
|
||||
pub type LogStderrHandle =
|
||||
tracing_subscriber::reload::Handle<LogStderrType, SubscriberForSecondLayer>;
|
||||
|
||||
pub type LogStderrType = tracing_subscriber::filter::Filtered<
|
||||
Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
|
||||
Targets,
|
||||
SubscriberForSecondLayer,
|
||||
>;
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
opt: Opt,
|
||||
logs: (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
) -> actix_web::App<
|
||||
impl ServiceFactory<
|
||||
actix_web::dev::ServiceRequest,
|
||||
Config = (),
|
||||
Response = ServiceResponse<impl MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
InitError = (),
|
||||
>,
|
||||
> {
|
||||
let app = actix_web::App::new()
|
||||
.configure(|s| {
|
||||
configure_data(
|
||||
s,
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
search_queue.clone(),
|
||||
&opt,
|
||||
logs,
|
||||
analytics.clone(),
|
||||
)
|
||||
})
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
|
||||
let app = app.wrap(middleware::RouteMetrics);
|
||||
app.wrap(
|
||||
Cors::default()
|
||||
.send_wildcard()
|
||||
.allow_any_header()
|
||||
.allow_any_origin()
|
||||
.allow_any_method()
|
||||
.max_age(86_400), // 24h
|
||||
)
|
||||
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
|
||||
.wrap(actix_web::middleware::Compress::default())
|
||||
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
|
||||
}
|
||||
|
||||
struct AwebTracingLogger;
|
||||
|
||||
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
|
||||
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
|
||||
use tracing::field::Empty;
|
||||
|
||||
let conn_info = request.connection_info();
|
||||
let headers = request.headers();
|
||||
let user_agent = headers
|
||||
.get(USER_AGENT)
|
||||
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
|
||||
.unwrap_or_default();
|
||||
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
|
||||
}
|
||||
|
||||
fn on_request_end<B: MessageBody>(
|
||||
span: tracing::Span,
|
||||
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
|
||||
) {
|
||||
match &outcome {
|
||||
Ok(response) => {
|
||||
let code: i32 = response.response().status().as_u16().into();
|
||||
span.record("status_code", code);
|
||||
|
||||
if let Some(error) = response.response().error() {
|
||||
// use the status code already constructed for the outgoing HTTP response
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
let code: i32 = error.error_response().status().as_u16().into();
|
||||
span.record("status_code", code);
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
enum OnFailure {
|
||||
RemoveDb,
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
// the db is empty and the snapshot exists, import it
|
||||
if empty_db && snapshot_path_exists {
|
||||
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
|
||||
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
// the db already exists and we should not ignore the snapshot => throw an error
|
||||
} else if !empty_db && !opt.ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
|
||||
)
|
||||
// the snapshot doesn't exist and we can't ignore it => throw an error
|
||||
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
|
||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db)?
|
||||
}
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let src_path_exists = path.exists();
|
||||
// the db is empty and the dump exists, import it
|
||||
if empty_db && src_path_exists {
|
||||
let (mut index_scheduler, mut auth_controller) =
|
||||
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
// the db already exists and we should not ignore the dump option => throw an error
|
||||
} else if !empty_db && !opt.ignore_dump_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
|
||||
)
|
||||
// the dump doesn't exist and we can't ignore it => throw an error
|
||||
} else if !src_path_exists && !opt.ignore_missing_dump {
|
||||
bail!("dump doesn't exist at {:?}", path)
|
||||
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
||||
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db)?
|
||||
}
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db)?
|
||||
};
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
let index_scheduler = Arc::new(index_scheduler);
|
||||
let auth_controller = Arc::new(auth_controller);
|
||||
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
thread::Builder::new()
|
||||
.name(String::from("register-snapshot-tasks"))
|
||||
.spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) =
|
||||
index_scheduler.register(KindWithContent::SnapshotCreation, None, false)
|
||||
{
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
Ok((index_scheduler, auth_controller))
|
||||
}
|
||||
|
||||
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
|
||||
fn open_or_create_database_unchecked(
|
||||
opt: &Opt,
|
||||
on_failure: OnFailure,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
|
||||
let instance_features = opt.to_instance_features();
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
tasks_path: opt.db_path.join("tasks"),
|
||||
update_file_path: opt.db_path.join("update_files"),
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
||||
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
||||
task_db_size: opt.max_task_db_size.as_u64() as usize,
|
||||
index_base_map_size: opt.max_index_size.as_u64() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
})?)
|
||||
};
|
||||
|
||||
match (
|
||||
index_scheduler_builder(),
|
||||
auth_controller.map_err(anyhow::Error::from),
|
||||
create_current_version_file(&opt.db_path).map_err(anyhow::Error::from),
|
||||
) {
|
||||
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
|
||||
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
|
||||
if matches!(on_failure, OnFailure::RemoveDb) {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
|
||||
fn open_or_create_database(
|
||||
opt: &Opt,
|
||||
empty_db: bool,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
if !empty_db {
|
||||
check_version_file(&opt.db_path)?;
|
||||
}
|
||||
|
||||
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
db_path: &Path,
|
||||
dump_path: &Path,
|
||||
index_scheduler: &mut IndexScheduler,
|
||||
auth: &mut AuthController,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let reader = File::open(dump_path)?;
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
if let Some(date) = dump_reader.date() {
|
||||
tracing::info!(
|
||||
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
%date,
|
||||
"Importing a dump of meilisearch"
|
||||
);
|
||||
} else {
|
||||
tracing::info!(
|
||||
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
"Importing a dump of meilisearch",
|
||||
);
|
||||
}
|
||||
|
||||
let instance_uid = dump_reader.instance_uid()?;
|
||||
|
||||
// 1. Import the instance-uid.
|
||||
if let Some(ref instance_uid) = instance_uid {
|
||||
// we don't want to panic if there is an error with the instance-uid.
|
||||
let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes());
|
||||
};
|
||||
|
||||
// 2. Import the `Key`s.
|
||||
let mut keys = Vec::new();
|
||||
auth.raw_delete_all_keys()?;
|
||||
for key in dump_reader.keys()? {
|
||||
let key = key?;
|
||||
auth.raw_insert_key(key.clone())?;
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
// 3. Import the runtime features.
|
||||
let features = dump_reader.features()?.unwrap_or_default();
|
||||
index_scheduler.put_runtime_features(features)?;
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
|
||||
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
||||
// try to process tasks while we're trying to import the indexes.
|
||||
|
||||
// 4. Import the indexes.
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
tracing::info!("Importing index `{}`.", metadata.uid);
|
||||
|
||||
let date = Some((metadata.created_at, metadata.updated_at));
|
||||
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
|
||||
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
||||
// 4.1 Import the primary key if there is one.
|
||||
if let Some(ref primary_key) = metadata.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
// 4.2 Import the settings.
|
||||
tracing::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder
|
||||
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
|
||||
|
||||
// 4.3 Import the documents.
|
||||
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
tracing::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
for document in index_reader.documents()? {
|
||||
builder.append_json_object(&document?)?;
|
||||
}
|
||||
|
||||
// This flush the content of the batch builder.
|
||||
let file = builder.into_inner()?.into_inner()?;
|
||||
|
||||
// 4.3.2 We feed it to the milli index.
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
indexer_config,
|
||||
IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
)?;
|
||||
|
||||
let builder = builder.with_embedders(embedders);
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
let user_result = user_result?;
|
||||
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
||||
builder.execute()?;
|
||||
wtxn.commit()?;
|
||||
tracing::info!("All documents successfully imported.");
|
||||
}
|
||||
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
|
||||
// 5. Import the tasks.
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
}
|
||||
Ok(index_scheduler_dump.finish()?)
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: Data<AuthController>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
opt: &Opt,
|
||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||
config
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(search_queue)
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(web::Data::new(logs_route))
|
||||
.app_data(web::Data::new(logs_stderr))
|
||||
.app_data(web::Data::new(opt.clone()))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.limit(http_payload_size_limit)
|
||||
.content_type(|mime| mime == mime::APPLICATION_JSON)
|
||||
.error_handler(|err, req: &HttpRequest| match err {
|
||||
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
|
||||
Some(content_type) => MeilisearchHttpError::InvalidContentType(
|
||||
content_type.to_str().unwrap_or("unknown").to_string(),
|
||||
vec![mime::APPLICATION_JSON.to_string()],
|
||||
)
|
||||
.into(),
|
||||
None => MeilisearchHttpError::MissingContentType(vec![
|
||||
mime::APPLICATION_JSON.to_string(),
|
||||
])
|
||||
.into(),
|
||||
},
|
||||
err => PayloadError::from(err).into(),
|
||||
}),
|
||||
)
|
||||
.app_data(PayloadConfig::new(http_payload_size_limit))
|
||||
.app_data(
|
||||
web::QueryConfig::default().error_handler(|err, _req| PayloadError::from(err).into()),
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "mini-dashboard")]
|
||||
pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
|
||||
use actix_web::HttpResponse;
|
||||
use static_files::Resource;
|
||||
|
||||
mod generated {
|
||||
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
|
||||
}
|
||||
|
||||
if enable_frontend {
|
||||
let generated = generated::generate();
|
||||
// Generate routes for mini-dashboard assets
|
||||
for (path, resource) in generated.into_iter() {
|
||||
let Resource { mime_type, data, .. } = resource;
|
||||
// Redirect index.html to /
|
||||
if path == "index.html" {
|
||||
config.service(web::resource("/").route(web::get().to(move || async move {
|
||||
HttpResponse::Ok().content_type(mime_type).body(data)
|
||||
})));
|
||||
} else {
|
||||
config.service(web::resource(path).route(web::get().to(move || async move {
|
||||
HttpResponse::Ok().content_type(mime_type).body(data)
|
||||
})));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "mini-dashboard"))]
|
||||
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||
}
|
335
crates/meilisearch/src/main.rs
Normal file
335
crates/meilisearch/src/main.rs
Normal file
|
@ -0,0 +1,335 @@
|
|||
use std::env;
|
||||
use std::io::{stderr, LineWriter, Write};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::thread::available_parallelism;
|
||||
|
||||
use actix_web::http::KeepAlive;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::HttpServer;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use is_terminal::IsTerminal;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::option::LogMode;
|
||||
use meilisearch::search_queue::SearchQueue;
|
||||
use meilisearch::{
|
||||
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
|
||||
LogStderrType, Opt, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use mimalloc::MiMalloc;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: MiMalloc = MiMalloc;
|
||||
|
||||
fn default_log_route_layer() -> LogRouteType {
|
||||
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
|
||||
}
|
||||
|
||||
fn default_log_stderr_layer(opt: &Opt) -> LogStderrType {
|
||||
let layer = tracing_subscriber::fmt::layer()
|
||||
.with_writer(|| LineWriter::new(std::io::stderr()))
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
|
||||
|
||||
let layer = match opt.experimental_logs_mode {
|
||||
LogMode::Human => Box::new(layer)
|
||||
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
|
||||
LogMode::Json => Box::new(layer.json())
|
||||
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
|
||||
};
|
||||
|
||||
layer.with_filter(
|
||||
tracing_subscriber::filter::Targets::new()
|
||||
.with_target("", LevelFilter::from_str(&opt.log_level.to_string()).unwrap()),
|
||||
)
|
||||
}
|
||||
|
||||
/// does all the setup before meilisearch is launched
|
||||
fn setup(opt: &Opt) -> anyhow::Result<(LogRouteHandle, LogStderrHandle)> {
|
||||
let (route_layer, route_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(default_log_route_layer());
|
||||
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
|
||||
|
||||
let (stderr_layer, stderr_layer_handle) =
|
||||
tracing_subscriber::reload::Layer::new(default_log_stderr_layer(opt));
|
||||
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(route_layer).with(stderr_layer);
|
||||
|
||||
// set the subscriber as the default for the application
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
Ok((route_layer_handle, stderr_layer_handle))
|
||||
}
|
||||
|
||||
fn on_panic(info: &std::panic::PanicInfo) {
|
||||
let info = info.to_string().replace('\n', " ");
|
||||
tracing::error!(%info);
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
try_main().await.inspect_err(|error| {
|
||||
tracing::error!(%error);
|
||||
let mut current = error.source();
|
||||
let mut depth = 0;
|
||||
while let Some(source) = current {
|
||||
tracing::info!(%source, depth, "Error caused by");
|
||||
current = source.source();
|
||||
depth += 1;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async fn try_main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
);
|
||||
|
||||
let log_handle = setup(&opt)?;
|
||||
|
||||
match (opt.env.as_ref(), &opt.master_key) {
|
||||
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
|
||||
anyhow::bail!(
|
||||
"The master key must be at least {MASTER_KEY_MIN_SIZE} bytes in a production environment. The provided key is only {} bytes.
|
||||
|
||||
{}",
|
||||
master_key.len(),
|
||||
generated_master_key_message(),
|
||||
)
|
||||
}
|
||||
("production", None) => {
|
||||
anyhow::bail!(
|
||||
"You must provide a master key to secure your instance in a production environment. It can be specified via the MEILI_MASTER_KEY environment variable or the --master-key launch option.
|
||||
|
||||
{}",
|
||||
generated_master_key_message()
|
||||
)
|
||||
}
|
||||
// No error; continue
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
let analytics = if !opt.no_analytics {
|
||||
analytics::SegmentAnalytics::new(&opt, index_scheduler.clone(), auth_controller.clone())
|
||||
.await
|
||||
} else {
|
||||
analytics::MockAnalytics::new(&opt)
|
||||
};
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
let analytics = analytics::MockAnalytics::new(&opt);
|
||||
|
||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||
|
||||
run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
opt: Opt,
|
||||
logs: (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
let auth_controller = Data::from(auth_controller);
|
||||
let search_queue = SearchQueue::new(
|
||||
opt.experimental_search_queue_size,
|
||||
available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()),
|
||||
);
|
||||
let search_queue = Data::new(search_queue);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
search_queue.clone(),
|
||||
opt.clone(),
|
||||
logs.clone(),
|
||||
analytics.clone(),
|
||||
enable_dashboard,
|
||||
)
|
||||
})
|
||||
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
|
||||
.disable_signals()
|
||||
.keep_alive(KeepAlive::Os);
|
||||
|
||||
if let Some(config) = opt_clone.get_ssl_config()? {
|
||||
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?;
|
||||
} else {
|
||||
http_server.bind(&opt_clone.http_addr)?.run().await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn print_launch_resume(
|
||||
opt: &Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
config_read_from: Option<PathBuf>,
|
||||
) {
|
||||
let build_info = build_info::BuildInfo::from_build();
|
||||
|
||||
let protocol =
|
||||
if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" };
|
||||
let ascii_name = r#"
|
||||
888b d888 d8b 888 d8b 888
|
||||
8888b d8888 Y8P 888 Y8P 888
|
||||
88888b.d88888 888 888
|
||||
888Y88888P888 .d88b. 888 888 888 .d8888b .d88b. 8888b. 888d888 .d8888b 88888b.
|
||||
888 Y888P 888 d8P Y8b 888 888 888 88K d8P Y8b "88b 888P" d88P" 888 "88b
|
||||
888 Y8P 888 88888888 888 888 888 "Y8888b. 88888888 .d888888 888 888 888 888
|
||||
888 " 888 Y8b. 888 888 888 X88 Y8b. 888 888 888 Y88b. 888 888
|
||||
888 888 "Y8888 888 888 888 88888P' "Y8888 "Y888888 888 "Y8888P 888 888
|
||||
"#;
|
||||
|
||||
eprintln!("{}", ascii_name);
|
||||
|
||||
eprintln!(
|
||||
"Config file path:\t{:?}",
|
||||
config_read_from
|
||||
.map(|config_file_path| config_file_path.display().to_string())
|
||||
.unwrap_or_else(|| "none".to_string())
|
||||
);
|
||||
eprintln!("Database path:\t\t{:?}", opt.db_path);
|
||||
eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
|
||||
eprintln!("Environment:\t\t{:?}", opt.env);
|
||||
eprintln!("Commit SHA:\t\t{:?}", build_info.commit_sha1.unwrap_or("unknown"));
|
||||
eprintln!(
|
||||
"Commit date:\t\t{:?}",
|
||||
build_info
|
||||
.commit_timestamp
|
||||
.and_then(|commit_timestamp| commit_timestamp
|
||||
.format(&time::format_description::well_known::Rfc3339)
|
||||
.ok())
|
||||
.unwrap_or("unknown".into())
|
||||
);
|
||||
eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
|
||||
if let Some(prototype) = build_info.describe.and_then(|describe| describe.as_prototype()) {
|
||||
eprintln!("Prototype:\t\t{:?}", prototype);
|
||||
}
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
{
|
||||
if !opt.no_analytics {
|
||||
eprintln!(
|
||||
"
|
||||
Thank you for using Meilisearch!
|
||||
|
||||
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry
|
||||
|
||||
Anonymous telemetry:\t\"Enabled\""
|
||||
);
|
||||
} else {
|
||||
eprintln!("Anonymous telemetry:\t\"Disabled\"");
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(instance_uid) = analytics.instance_uid() {
|
||||
eprintln!("Instance UID:\t\t\"{}\"", instance_uid);
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
|
||||
match (opt.env.as_ref(), &opt.master_key) {
|
||||
("production", Some(_)) => {
|
||||
eprintln!("A master key has been set. Requests to Meilisearch won't be authorized unless you provide an authentication key.");
|
||||
}
|
||||
("development", Some(master_key)) => {
|
||||
eprintln!("A master key has been set. Requests to Meilisearch won't be authorized unless you provide an authentication key.");
|
||||
|
||||
if master_key.len() < MASTER_KEY_MIN_SIZE {
|
||||
print_master_key_too_short_warning()
|
||||
}
|
||||
}
|
||||
("development", None) => print_missing_master_key_warning(),
|
||||
// unreachable because Opt::try_build above would have failed already if any other value had been produced
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
eprintln!("Check out Meilisearch Cloud!\thttps://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=engine&utm_medium=cli");
|
||||
eprintln!("Documentation:\t\t\thttps://www.meilisearch.com/docs");
|
||||
eprintln!("Source code:\t\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Discord:\t\t\thttps://discord.meilisearch.com");
|
||||
eprintln!();
|
||||
}
|
||||
|
||||
const WARNING_BG_COLOR: Option<Color> = Some(Color::Ansi256(178));
|
||||
const WARNING_FG_COLOR: Option<Color> = Some(Color::Ansi256(0));
|
||||
|
||||
fn print_master_key_too_short_warning() {
|
||||
let choice = if stderr().is_terminal() { ColorChoice::Auto } else { ColorChoice::Never };
|
||||
let mut stderr = StandardStream::stderr(choice);
|
||||
stderr
|
||||
.set_color(
|
||||
ColorSpec::new().set_bg(WARNING_BG_COLOR).set_fg(WARNING_FG_COLOR).set_bold(true),
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(stderr, "\n").unwrap();
|
||||
writeln!(
|
||||
stderr,
|
||||
" Meilisearch started with a master key considered unsafe for use in a production environment.
|
||||
|
||||
A master key of at least {MASTER_KEY_MIN_SIZE} bytes will be required when switching to a production environment."
|
||||
)
|
||||
.unwrap();
|
||||
stderr.reset().unwrap();
|
||||
writeln!(stderr).unwrap();
|
||||
|
||||
eprintln!("\n{}", generated_master_key_message());
|
||||
eprintln!(
|
||||
"\nRestart Meilisearch with the argument above to use this new and secure master key."
|
||||
)
|
||||
}
|
||||
|
||||
fn print_missing_master_key_warning() {
|
||||
let choice = if stderr().is_terminal() { ColorChoice::Auto } else { ColorChoice::Never };
|
||||
let mut stderr = StandardStream::stderr(choice);
|
||||
stderr
|
||||
.set_color(
|
||||
ColorSpec::new().set_bg(WARNING_BG_COLOR).set_fg(WARNING_FG_COLOR).set_bold(true),
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(stderr, "\n").unwrap();
|
||||
writeln!(
|
||||
stderr,
|
||||
" No master key was found. The server will accept unidentified requests.
|
||||
|
||||
A master key of at least {MASTER_KEY_MIN_SIZE} bytes will be required when switching to a production environment."
|
||||
)
|
||||
.unwrap();
|
||||
stderr.reset().unwrap();
|
||||
writeln!(stderr).unwrap();
|
||||
|
||||
eprintln!("\n{}", generated_master_key_message());
|
||||
eprintln!(
|
||||
"\nRestart Meilisearch with the argument above to use this new and secure master key."
|
||||
)
|
||||
}
|
||||
|
||||
fn generated_master_key_message() -> String {
|
||||
format!(
|
||||
"We generated a new secure master key for you (you can safely use this token):
|
||||
|
||||
>> --master-key {} <<",
|
||||
generate_master_key()
|
||||
)
|
||||
}
|
52
crates/meilisearch/src/metrics.rs
Normal file
52
crates/meilisearch/src/metrics.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
use lazy_static::lazy_static;
|
||||
use prometheus::{
|
||||
opts, register_histogram_vec, register_int_counter_vec, register_int_gauge,
|
||||
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||
opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"),
|
||||
&["method", "path", "status"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_DEGRADED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
|
||||
"meilisearch_degraded_search_requests",
|
||||
"Meilisearch number of degraded search requests"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!(
|
||||
"meilisearch_used_db_size_bytes",
|
||||
"Meilisearch Used DB Size In Bytes"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_INDEX_DOCS_COUNT: IntGaugeVec = register_int_gauge_vec!(
|
||||
opts!("meilisearch_index_docs_count", "Meilisearch Index Docs Count"),
|
||||
&["index"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
|
||||
"meilisearch_http_response_time_seconds",
|
||||
"Meilisearch HTTP response times",
|
||||
&["method", "path"],
|
||||
vec![0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!(
|
||||
opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"),
|
||||
&["kind", "value"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_LAST_UPDATE: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_last_update", "Meilisearch Last Update"))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_IS_INDEXING: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_is_indexing", "Meilisearch Is Indexing"))
|
||||
.expect("Can't create a metric");
|
||||
}
|
89
crates/meilisearch/src/middleware.rs
Normal file
89
crates/meilisearch/src/middleware.rs
Normal file
|
@ -0,0 +1,89 @@
|
|||
//! Contains all the custom middleware used in meilisearch
|
||||
|
||||
use std::future::{ready, Ready};
|
||||
|
||||
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
||||
use actix_web::web::Data;
|
||||
use actix_web::Error;
|
||||
use futures_util::future::LocalBoxFuture;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use prometheus::HistogramTimer;
|
||||
|
||||
pub struct RouteMetrics;
|
||||
|
||||
// Middleware factory is `Transform` trait from actix-service crate
|
||||
// `S` - type of the next service
|
||||
// `B` - type of response's body
|
||||
impl<S, B> Transform<S, ServiceRequest> for RouteMetrics
|
||||
where
|
||||
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type InitError = ();
|
||||
type Transform = RouteMetricsMiddleware<S>;
|
||||
type Future = Ready<Result<Self::Transform, Self::InitError>>;
|
||||
|
||||
fn new_transform(&self, service: S) -> Self::Future {
|
||||
ready(Ok(RouteMetricsMiddleware { service }))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RouteMetricsMiddleware<S> {
|
||||
service: S,
|
||||
}
|
||||
|
||||
impl<S, B> Service<ServiceRequest> for RouteMetricsMiddleware<S>
|
||||
where
|
||||
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
|
||||
|
||||
dev::forward_ready!(service);
|
||||
|
||||
fn call(&self, req: ServiceRequest) -> Self::Future {
|
||||
let mut histogram_timer: Option<HistogramTimer> = None;
|
||||
|
||||
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
|
||||
// also, the tests will fail if this is not present.
|
||||
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let request_path = req.path();
|
||||
let request_pattern = req.match_pattern();
|
||||
let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str).to_string();
|
||||
let request_method = req.method().to_string();
|
||||
|
||||
if features.check_metrics().is_ok() {
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
histogram_timer = Some(
|
||||
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||
.with_label_values(&[&request_method, &metric_path])
|
||||
.start_timer(),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let fut = self.service.call(req);
|
||||
|
||||
Box::pin(async move {
|
||||
let res = fut.await?;
|
||||
|
||||
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||
.with_label_values(&[&request_method, &metric_path, res.status().as_str()])
|
||||
.inc();
|
||||
|
||||
if let Some(histogram_timer) = histogram_timer {
|
||||
histogram_timer.observe_duration();
|
||||
};
|
||||
Ok(res)
|
||||
})
|
||||
}
|
||||
}
|
1027
crates/meilisearch/src/option.rs
Normal file
1027
crates/meilisearch/src/option.rs
Normal file
File diff suppressed because it is too large
Load diff
180
crates/meilisearch/src/routes/api_key.rs
Normal file
180
crates/meilisearch/src/routes/api_key.rs
Normal file
|
@ -0,0 +1,180 @@
|
|||
use std::str;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use meilisearch_auth::error::AuthControllerError;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::keys::{CreateApiKey, Key, PatchApiKey};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::PAGINATION_DEFAULT_LIMIT;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::Pagination;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::post().to(SeqHandler(create_api_key)))
|
||||
.route(web::get().to(SeqHandler(list_api_keys))),
|
||||
)
|
||||
.service(
|
||||
web::resource("/{key}")
|
||||
.route(web::get().to(SeqHandler(get_api_key)))
|
||||
.route(web::patch().to(SeqHandler(patch_api_key)))
|
||||
.route(web::delete().to(SeqHandler(delete_api_key))),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn create_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, Data<AuthController>>,
|
||||
body: AwebJson<CreateApiKey, DeserrJsonError>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let v = body.into_inner();
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let key = auth_controller.create_key(v)?;
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Created().json(res))
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug, Clone, Copy)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct ListApiKeys {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidApiKeyOffset>)]
|
||||
pub offset: Param<usize>,
|
||||
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidApiKeyLimit>)]
|
||||
pub limit: Param<usize>,
|
||||
}
|
||||
|
||||
impl ListApiKeys {
|
||||
fn as_pagination(self) -> Pagination {
|
||||
Pagination { offset: self.offset.0, limit: self.limit.0 }
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_api_keys(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let paginate = list_api_keys.into_inner().as_pagination();
|
||||
let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let keys = auth_controller.list_keys()?;
|
||||
let page_view = paginate
|
||||
.auto_paginate_sized(keys.into_iter().map(|k| KeyView::from_key(k, &auth_controller)));
|
||||
|
||||
Ok(page_view)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(page_view))
|
||||
}
|
||||
|
||||
pub async fn get_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.get_key(uid)?;
|
||||
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(res))
|
||||
}
|
||||
|
||||
pub async fn patch_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, Data<AuthController>>,
|
||||
body: AwebJson<PatchApiKey, DeserrJsonError>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
let patch_api_key = body.into_inner();
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.update_key(uid, patch_api_key)?;
|
||||
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(res))
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
auth_controller.delete_key(uid)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct AuthParam {
|
||||
key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct KeyView {
|
||||
name: Option<String>,
|
||||
description: Option<String>,
|
||||
key: String,
|
||||
uid: Uuid,
|
||||
actions: Vec<Action>,
|
||||
indexes: Vec<String>,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||
expires_at: Option<OffsetDateTime>,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
created_at: OffsetDateTime,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
updated_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl KeyView {
|
||||
fn from_key(key: Key, auth: &AuthController) -> Self {
|
||||
let generated_key = auth.generate_key(key.uid).unwrap_or_default();
|
||||
|
||||
KeyView {
|
||||
name: key.name,
|
||||
description: key.description,
|
||||
key: generated_key,
|
||||
uid: key.uid,
|
||||
actions: key.actions,
|
||||
indexes: key.indexes.into_iter().map(|x| x.to_string()).collect(),
|
||||
expires_at: key.expires_at,
|
||||
created_at: key.created_at,
|
||||
updated_at: key.updated_at,
|
||||
}
|
||||
}
|
||||
}
|
43
crates/meilisearch/src/routes/dump.rs
Normal file
43
crates/meilisearch/src/routes/dump.rs
Normal file
|
@ -0,0 +1,43 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
||||
}
|
||||
|
||||
pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
|
||||
|
||||
let task = KindWithContent::DumpCreation {
|
||||
keys: auth_controller.list_keys()?,
|
||||
instance_uid: analytics.instance_uid().cloned(),
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Create dump");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
105
crates/meilisearch/src/routes/features.rs
Normal file
105
crates/meilisearch/src/routes/features.rs
Normal file
|
@ -0,0 +1,105 @@
|
|||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_features)))
|
||||
.route(web::patch().to(SeqHandler(patch_features))),
|
||||
);
|
||||
}
|
||||
|
||||
async fn get_features(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> HttpResponse {
|
||||
let features = index_scheduler.features();
|
||||
|
||||
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
|
||||
let features = features.runtime_features();
|
||||
debug!(returns = ?features, "Get features");
|
||||
HttpResponse::Ok().json(features)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct RuntimeTogglableFeatures {
|
||||
#[deserr(default)]
|
||||
pub vector_store: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub metrics: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub logs_route: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub edit_documents_by_function: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub contains_filter: Option<bool>,
|
||||
}
|
||||
|
||||
async fn patch_features(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let features = index_scheduler.features();
|
||||
debug!(parameters = ?new_features, "Patch features");
|
||||
|
||||
let old_features = features.runtime_features();
|
||||
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
|
||||
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
|
||||
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
|
||||
edit_documents_by_function: new_features
|
||||
.0
|
||||
.edit_documents_by_function
|
||||
.unwrap_or(old_features.edit_documents_by_function),
|
||||
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
// the it renames to camelCase, which we don't want for analytics.
|
||||
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
|
||||
let meilisearch_types::features::RuntimeTogglableFeatures {
|
||||
vector_store,
|
||||
metrics,
|
||||
logs_route,
|
||||
edit_documents_by_function,
|
||||
contains_filter,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
"Experimental features Updated".to_string(),
|
||||
json!({
|
||||
"vector_store": vector_store,
|
||||
"metrics": metrics,
|
||||
"logs_route": logs_route,
|
||||
"edit_documents_by_function": edit_documents_by_function,
|
||||
"contains_filter": contains_filter,
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
index_scheduler.put_runtime_features(new_features)?;
|
||||
debug!(returns = ?new_features, "Patch features");
|
||||
Ok(HttpResponse::Ok().json(new_features))
|
||||
}
|
825
crates/meilisearch/src/routes/indexes/documents.rs
Normal file
825
crates/meilisearch/src/routes/indexes/documents.rs
Normal file
|
@ -0,0 +1,825 @@
|
|||
use std::io::ErrorKind;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
|
||||
use bstr::ByteSlice as _;
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::{IndexScheduler, RoFeatures, TaskId};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::DocumentId;
|
||||
use meilisearch_types::star_or::OptionStarOrList;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::{milli, Document, Index};
|
||||
use mime::Mime;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use tempfile::tempfile;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::error::PayloadError::ReceivePayload;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
};
|
||||
use crate::search::{parse_filter, RetrieveVectors};
|
||||
use crate::Opt;
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
|
||||
});
|
||||
|
||||
/// Extracts the mime type from the content type and return
|
||||
/// a meilisearch error if anything bad happen.
|
||||
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
|
||||
match req.mime_type() {
|
||||
Ok(Some(mime)) => Ok(Some(mime)),
|
||||
Ok(None) => Ok(None),
|
||||
Err(_) => match req.headers().get(CONTENT_TYPE) {
|
||||
Some(content_type) => Err(MeilisearchHttpError::InvalidContentType(
|
||||
content_type.as_bytes().as_bstr().to_string(),
|
||||
ACCEPTED_CONTENT_TYPE.clone(),
|
||||
)),
|
||||
None => Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct DocumentParam {
|
||||
index_uid: String,
|
||||
document_id: String,
|
||||
}
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_documents)))
|
||||
.route(web::post().to(SeqHandler(replace_documents)))
|
||||
.route(web::put().to(SeqHandler(update_documents)))
|
||||
.route(web::delete().to(SeqHandler(clear_all_documents))),
|
||||
)
|
||||
// these routes need to be before the /documents/{document_id} to match properly
|
||||
.service(
|
||||
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
|
||||
)
|
||||
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
|
||||
.service(web::resource("/edit").route(web::post().to(SeqHandler(edit_documents_by_function))))
|
||||
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
|
||||
.service(
|
||||
web::resource("/{document_id}")
|
||||
.route(web::get().to(SeqHandler(get_document)))
|
||||
.route(web::delete().to(SeqHandler(delete_document))),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct GetDocument {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
||||
fields: OptionStarOrList<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
}
|
||||
|
||||
pub async fn get_document(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||
document_param: web::Path<DocumentParam>,
|
||||
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let DocumentParam { index_uid, document_id } = document_param.into_inner();
|
||||
debug!(parameters = ?params, "Get document");
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
|
||||
let GetDocument { fields, retrieve_vectors: param_retrieve_vectors } = params.into_inner();
|
||||
let attributes_to_retrieve = fields.merge_star_and_none();
|
||||
|
||||
let features = index_scheduler.features();
|
||||
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
|
||||
|
||||
analytics.get_fetch_documents(
|
||||
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
|
||||
&req,
|
||||
);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let document =
|
||||
retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors)?;
|
||||
debug!(returns = ?document, "Get document");
|
||||
Ok(HttpResponse::Ok().json(document))
|
||||
}
|
||||
|
||||
pub async fn delete_document(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<DocumentParam>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
|
||||
analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
|
||||
|
||||
let task = KindWithContent::DocumentDeletion {
|
||||
index_uid: index_uid.to_string(),
|
||||
documents_ids: vec![document_id],
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct BrowseQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
|
||||
offset: Param<usize>,
|
||||
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidDocumentLimit>)]
|
||||
limit: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
||||
fields: OptionStarOrList<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
||||
filter: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct BrowseQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentOffset>)]
|
||||
offset: usize,
|
||||
#[deserr(default = PAGINATION_DEFAULT_LIMIT, error = DeserrJsonError<InvalidDocumentLimit>)]
|
||||
limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
|
||||
fields: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
|
||||
retrieve_vectors: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||
filter: Option<Value>,
|
||||
}
|
||||
|
||||
pub async fn documents_by_query_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<BrowseQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let body = body.into_inner();
|
||||
debug!(parameters = ?body, "Get documents POST");
|
||||
|
||||
analytics.post_fetch_documents(
|
||||
&DocumentFetchKind::Normal {
|
||||
with_filter: body.filter.is_some(),
|
||||
limit: body.limit,
|
||||
offset: body.offset,
|
||||
retrieve_vectors: body.retrieve_vectors,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
documents_by_query(&index_scheduler, index_uid, body)
|
||||
}
|
||||
|
||||
pub async fn get_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Get documents GET");
|
||||
|
||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
|
||||
|
||||
let filter = match filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
_ => Some(Value::String(f)),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
let query = BrowseQuery {
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
fields: fields.merge_star_and_none(),
|
||||
retrieve_vectors: retrieve_vectors.0,
|
||||
filter,
|
||||
};
|
||||
|
||||
analytics.get_fetch_documents(
|
||||
&DocumentFetchKind::Normal {
|
||||
with_filter: query.filter.is_some(),
|
||||
limit: query.limit,
|
||||
offset: query.offset,
|
||||
retrieve_vectors: query.retrieve_vectors,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
documents_by_query(&index_scheduler, index_uid, query)
|
||||
}
|
||||
|
||||
fn documents_by_query(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: web::Path<String>,
|
||||
query: BrowseQuery,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let (total, documents) = retrieve_documents(
|
||||
&index,
|
||||
offset,
|
||||
limit,
|
||||
filter,
|
||||
fields,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)?;
|
||||
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
|
||||
debug!(returns = ?ret, "Get documents");
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct UpdateDocumentsQuery {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
|
||||
pub primary_key: Option<String>,
|
||||
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidDocumentCsvDelimiter>, error = DeserrQueryParamError<InvalidDocumentCsvDelimiter>)]
|
||||
pub csv_delimiter: Option<u8>,
|
||||
}
|
||||
|
||||
fn from_char_csv_delimiter(
|
||||
c: char,
|
||||
) -> Result<Option<u8>, DeserrQueryParamError<InvalidDocumentCsvDelimiter>> {
|
||||
if c.is_ascii() {
|
||||
Ok(Some(c as u8))
|
||||
} else {
|
||||
Err(DeserrQueryParamError::new(
|
||||
format!("csv delimiter must be an ascii character. Found: `{}`", c),
|
||||
Code::InvalidDocumentCsvDelimiter,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn replace_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
debug!(parameters = ?params, "Replace documents");
|
||||
let params = params.into_inner();
|
||||
|
||||
analytics.add_documents(
|
||||
¶ms,
|
||||
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
||||
&req,
|
||||
);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
params.primary_key,
|
||||
params.csv_delimiter,
|
||||
body,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Replace documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn update_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let params = params.into_inner();
|
||||
debug!(parameters = ?params, "Update documents");
|
||||
|
||||
analytics.add_documents(
|
||||
¶ms,
|
||||
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
|
||||
&req,
|
||||
);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
params.primary_key,
|
||||
params.csv_delimiter,
|
||||
body,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Update documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn document_addition(
|
||||
mime_type: Option<Mime>,
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
csv_delimiter: Option<u8>,
|
||||
mut body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
allow_index_creation: bool,
|
||||
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
|
||||
let format = match (
|
||||
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
|
||||
csv_delimiter,
|
||||
) {
|
||||
(Some(("application", "json")), None) => PayloadType::Json,
|
||||
(Some(("application", "x-ndjson")), None) => PayloadType::Ndjson,
|
||||
(Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' },
|
||||
(Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter },
|
||||
|
||||
(Some(("application", "json")), Some(_)) => {
|
||||
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
|
||||
"application/json",
|
||||
)))
|
||||
}
|
||||
(Some(("application", "x-ndjson")), Some(_)) => {
|
||||
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
|
||||
"application/x-ndjson",
|
||||
)))
|
||||
}
|
||||
(Some((type_, subtype)), _) => {
|
||||
return Err(MeilisearchHttpError::InvalidContentType(
|
||||
format!("{}/{}", type_, subtype),
|
||||
ACCEPTED_CONTENT_TYPE.clone(),
|
||||
))
|
||||
}
|
||||
(None, _) => {
|
||||
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
|
||||
}
|
||||
};
|
||||
|
||||
let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
|
||||
|
||||
let temp_file = match tempfile() {
|
||||
Ok(file) => file,
|
||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||
};
|
||||
|
||||
let async_file = File::from_std(temp_file);
|
||||
let mut buffer = BufWriter::new(async_file);
|
||||
|
||||
let mut buffer_write_size: usize = 0;
|
||||
while let Some(result) = body.next().await {
|
||||
let byte = result?;
|
||||
|
||||
if byte.is_empty() && buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
match buffer.write_all(&byte).await {
|
||||
Ok(()) => buffer_write_size += 1,
|
||||
Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.flush().await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
|
||||
if buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
|
||||
let read_file = buffer.into_inner().into_std().await;
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = match format {
|
||||
PayloadType::Json => read_json(&read_file, &mut update_file)?,
|
||||
PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
|
||||
PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
|
||||
};
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
Ok(documents_count)
|
||||
})
|
||||
.await;
|
||||
|
||||
let documents_count = match documents_count {
|
||||
Ok(Ok(documents_count)) => documents_count,
|
||||
// in this case the file has not possibly be persisted.
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Err(e) => {
|
||||
// Here the file MAY have been persisted or not.
|
||||
// We don't know thus we ignore the file not found error.
|
||||
match index_scheduler.delete_update_file(uuid) {
|
||||
Ok(()) => (),
|
||||
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
|
||||
if e.kind() == ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
index_uuid = %uuid,
|
||||
"Unknown error happened while deleting a malformed update file: {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
// We still want to return the original error to the end user.
|
||||
return Err(e.into());
|
||||
}
|
||||
};
|
||||
|
||||
let task = KindWithContent::DocumentAdditionOrUpdate {
|
||||
method,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
primary_key,
|
||||
allow_index_creation,
|
||||
index_uid: index_uid.to_string(),
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
|
||||
.await?
|
||||
{
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
index_scheduler.delete_update_file(uuid)?;
|
||||
return Err(e.into());
|
||||
}
|
||||
};
|
||||
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
pub async fn delete_documents_batch(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
|
||||
|
||||
let ids = body
|
||||
.iter()
|
||||
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
|
||||
.collect();
|
||||
|
||||
let task =
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by batch");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct DocumentDeletionByFilter {
|
||||
#[deserr(error = DeserrJsonError<InvalidDocumentFilter>, missing_field_error = DeserrJsonError::missing_document_filter)]
|
||||
filter: Value,
|
||||
}
|
||||
|
||||
pub async fn delete_documents_by_filter(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner().filter;
|
||||
|
||||
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
|
||||
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by filter");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct DocumentEditionByFunction {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentEditionContext>)]
|
||||
pub context: Option<Value>,
|
||||
#[deserr(error = DeserrJsonError<InvalidDocumentEditionFunctionFilter>, missing_field_error = DeserrJsonError::missing_document_edition_function)]
|
||||
pub function: String,
|
||||
}
|
||||
|
||||
pub async fn edit_documents_by_function(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Edit documents by function");
|
||||
|
||||
index_scheduler
|
||||
.features()
|
||||
.check_edit_documents_by_function("Using the documents edit route")?;
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let params = params.into_inner();
|
||||
|
||||
analytics.update_documents_by_function(
|
||||
¶ms,
|
||||
index_scheduler.index(&index_uid).is_err(),
|
||||
&req,
|
||||
);
|
||||
|
||||
let DocumentEditionByFunction { filter, context, function } = params;
|
||||
let engine = milli::rhai::Engine::new();
|
||||
if let Err(e) = engine.compile(&function) {
|
||||
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = filter {
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(
|
||||
filter,
|
||||
Code::InvalidDocumentFilter,
|
||||
index_scheduler.features(),
|
||||
)?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
}
|
||||
let task = KindWithContent::DocumentEdition {
|
||||
index_uid,
|
||||
filter_expr: filter,
|
||||
context: match context {
|
||||
Some(Value::Object(m)) => Some(m),
|
||||
None => None,
|
||||
_ => {
|
||||
return Err(ResponseError::from_msg(
|
||||
"The context must be an object".to_string(),
|
||||
Code::InvalidDocumentEditionContext,
|
||||
))
|
||||
}
|
||||
},
|
||||
function,
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Edit documents by function");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn clear_all_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
|
||||
|
||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete all documents");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
fn some_documents<'a, 't: 'a>(
|
||||
index: &'a Index,
|
||||
rtxn: &'t RoTxn,
|
||||
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(rtxn)?;
|
||||
|
||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
match retrieve_vectors {
|
||||
RetrieveVectors::Ignore => {}
|
||||
RetrieveVectors::Hide => {
|
||||
document.remove("_vectors");
|
||||
}
|
||||
RetrieveVectors::Retrieve => {
|
||||
// Clippy is simply wrong
|
||||
#[allow(clippy::manual_unwrap_or_default)]
|
||||
let mut vectors = match document.remove("_vectors") {
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(vector.into()),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||
);
|
||||
}
|
||||
document.insert("_vectors".into(), vectors.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(document)
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
||||
fn retrieve_documents<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
filter: Option<Value>,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
features: RoFeatures,
|
||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let filter = &filter;
|
||||
let filter = if let Some(filter) = filter {
|
||||
parse_filter(filter, Code::InvalidDocumentFilter, features)?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let candidates = if let Some(filter) = filter {
|
||||
filter.evaluate(&rtxn, index).map_err(|err| match err {
|
||||
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||
ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
|
||||
}
|
||||
e => e.into(),
|
||||
})?
|
||||
} else {
|
||||
index.documents_ids(&rtxn)?
|
||||
};
|
||||
|
||||
let (it, number_of_documents) = {
|
||||
let number_of_documents = candidates.len();
|
||||
(
|
||||
some_documents(
|
||||
index,
|
||||
&rtxn,
|
||||
candidates.into_iter().skip(offset).take(limit),
|
||||
retrieve_vectors,
|
||||
)?,
|
||||
number_of_documents,
|
||||
)
|
||||
};
|
||||
|
||||
let documents: Vec<_> = it
|
||||
.map(|document| {
|
||||
Ok(match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document?,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()).chain(
|
||||
(retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"),
|
||||
),
|
||||
),
|
||||
None => document?,
|
||||
})
|
||||
})
|
||||
.collect::<Result<_, ResponseError>>()?;
|
||||
|
||||
Ok((number_of_documents, documents))
|
||||
}
|
||||
|
||||
fn retrieve_document<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
doc_id: &str,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
retrieve_vectors: RetrieveVectors,
|
||||
) -> Result<Document, ResponseError> {
|
||||
let txn = index.read_txn()?;
|
||||
|
||||
let internal_id = index
|
||||
.external_documents_ids()
|
||||
.get(&txn, doc_id)?
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = some_documents(index, &txn, Some(internal_id), retrieve_vectors)?
|
||||
.next()
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))??;
|
||||
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document,
|
||||
attributes_to_retrieve
|
||||
.iter()
|
||||
.map(|s| s.as_ref())
|
||||
.chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")),
|
||||
),
|
||||
None => document,
|
||||
};
|
||||
|
||||
Ok(document)
|
||||
}
|
155
crates/meilisearch/src/routes/indexes/facet_search.rs
Normal file
155
crates/meilisearch/src/routes/indexes/facet_search.rs
Normal file
|
@ -0,0 +1,155 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::locales::Locale;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(search)));
|
||||
}
|
||||
|
||||
/// # Important
|
||||
///
|
||||
/// Intentionally don't use `deny_unknown_fields` to ignore search parameters sent by user
|
||||
#[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase)]
|
||||
pub struct FacetSearchQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
|
||||
pub facet_query: Option<String>,
|
||||
#[deserr(error = DeserrJsonError<InvalidFacetSearchFacetName>, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)]
|
||||
pub facet_name: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
|
||||
pub vector: Option<Vec<f32>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
|
||||
pub hybrid: Option<HybridQuery>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
|
||||
pub attributes_to_search_on: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!(parameters = ?query, "Facet search");
|
||||
|
||||
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
|
||||
|
||||
let facet_query = query.facet_query.clone();
|
||||
let facet_name = query.facet_name.clone();
|
||||
let locales = query.locales.clone().map(|l| l.into_iter().map(Into::into).collect());
|
||||
let mut search_query = SearchQuery::from(query);
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut search_query.filter, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(
|
||||
&index,
|
||||
search_query,
|
||||
facet_query,
|
||||
facet_name,
|
||||
search_kind,
|
||||
index_scheduler.features(),
|
||||
locales,
|
||||
)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.post_facet_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Facet search");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
impl From<FacetSearchQuery> for SearchQuery {
|
||||
fn from(value: FacetSearchQuery) -> Self {
|
||||
let FacetSearchQuery {
|
||||
facet_query: _,
|
||||
facet_name: _,
|
||||
q,
|
||||
vector,
|
||||
filter,
|
||||
matching_strategy,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
} = value;
|
||||
|
||||
SearchQuery {
|
||||
q,
|
||||
offset: DEFAULT_SEARCH_OFFSET(),
|
||||
limit: DEFAULT_SEARCH_LIMIT(),
|
||||
page: None,
|
||||
hits_per_page: None,
|
||||
attributes_to_retrieve: None,
|
||||
retrieve_vectors: false,
|
||||
attributes_to_crop: None,
|
||||
crop_length: DEFAULT_CROP_LENGTH(),
|
||||
attributes_to_highlight: None,
|
||||
show_matches_position: false,
|
||||
show_ranking_score: false,
|
||||
show_ranking_score_details: false,
|
||||
filter,
|
||||
sort: None,
|
||||
distinct: None,
|
||||
facets: None,
|
||||
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
matching_strategy,
|
||||
vector,
|
||||
attributes_to_search_on,
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
}
|
||||
}
|
||||
}
|
280
crates/meilisearch/src/routes/indexes/mod.rs
Normal file
280
crates/meilisearch/src/routes/indexes/mod.rs
Normal file
|
@ -0,0 +1,280 @@
|
|||
use std::convert::Infallible;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::{self, FieldDistribution, Index};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::is_dry_run;
|
||||
use crate::Opt;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
pub mod similar;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(list_indexes))
|
||||
.route(web::post().to(SeqHandler(create_index))),
|
||||
)
|
||||
.service(
|
||||
web::scope("/{index_uid}")
|
||||
.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_index)))
|
||||
.route(web::patch().to(SeqHandler(update_index)))
|
||||
.route(web::delete().to(SeqHandler(delete_index))),
|
||||
)
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/similar").configure(similar::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexView {
|
||||
pub uid: String,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub created_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub updated_at: OffsetDateTime,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
impl IndexView {
|
||||
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
|
||||
// It is important that this function does not keep the Index handle or a clone of it, because
|
||||
// `list_indexes` relies on this property to avoid opening all indexes at once.
|
||||
let rtxn = index.read_txn()?;
|
||||
Ok(IndexView {
|
||||
uid,
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug, Clone, Copy)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct ListIndexes {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexOffset>)]
|
||||
pub offset: Param<usize>,
|
||||
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidIndexLimit>)]
|
||||
pub limit: Param<usize>,
|
||||
}
|
||||
impl ListIndexes {
|
||||
fn as_pagination(self) -> Pagination {
|
||||
Pagination { offset: self.offset.0, limit: self.limit.0 }
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_indexes(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
||||
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?paginate, "List indexes");
|
||||
let filters = index_scheduler.filters();
|
||||
let indexes: Vec<Option<IndexView>> =
|
||||
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
|
||||
if !filters.is_index_authorized(uid) {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(Some(IndexView::new(uid.to_string(), index)?))
|
||||
})?;
|
||||
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
|
||||
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
|
||||
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
|
||||
|
||||
debug!(returns = ?ret, "List indexes");
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct IndexCreateRequest {
|
||||
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
|
||||
uid: IndexUid,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn create_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
|
||||
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Create index");
|
||||
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
|
||||
if allow_index_creation {
|
||||
analytics.publish(
|
||||
"Index Created".to_string(),
|
||||
json!({ "primary_key": primary_key }),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!(returns = ?task, "Create index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
Err(AuthenticationError::InvalidToken.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn deny_immutable_fields_index(
|
||||
field: &str,
|
||||
accepted: &[&str],
|
||||
location: ValuePointerRef,
|
||||
) -> DeserrJsonError {
|
||||
match field {
|
||||
"uid" => immutable_field_error(field, accepted, Code::ImmutableIndexUid),
|
||||
"createdAt" => immutable_field_error(field, accepted, Code::ImmutableIndexCreatedAt),
|
||||
"updatedAt" => immutable_field_error(field, accepted, Code::ImmutableIndexUpdatedAt),
|
||||
_ => deserr::take_cf_content(DeserrJsonError::<BadRequest>::error::<Infallible>(
|
||||
None,
|
||||
deserr::ErrorKind::UnknownKey { key: field, accepted },
|
||||
location,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_index)]
|
||||
pub struct UpdateIndexRequest {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn get_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
|
||||
|
||||
debug!(returns = ?index_view, "Get index");
|
||||
|
||||
Ok(HttpResponse::Ok().json(index_view))
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Update index");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let body = body.into_inner();
|
||||
analytics.publish(
|
||||
"Index Updated".to_string(),
|
||||
json!({ "primary_key": body.primary_key }),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexUpdate {
|
||||
index_uid: index_uid.into_inner(),
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Update index");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn delete_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
debug!(returns = ?task, "Delete index");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
/// Stats of an `Index`, as known to the `stats` route.
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexStats {
|
||||
/// Number of documents in the index
|
||||
pub number_of_documents: u64,
|
||||
/// Whether the index is currently performing indexation, according to the scheduler.
|
||||
pub is_indexing: bool,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
}
|
||||
|
||||
impl From<index_scheduler::IndexStats> for IndexStats {
|
||||
fn from(stats: index_scheduler::IndexStats) -> Self {
|
||||
IndexStats {
|
||||
number_of_documents: stats.inner_stats.number_of_documents,
|
||||
is_indexing: stats.is_indexing,
|
||||
field_distribution: stats.inner_stats.field_distribution,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
|
||||
|
||||
debug!(returns = ?stats, "Get index stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
384
crates/meilisearch/src/routes/indexes/search.rs
Normal file
384
crates/meilisearch/src/routes/indexes/search.rs
Normal file
|
@ -0,0 +1,384 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::locales::Locale;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(search_with_url_query)))
|
||||
.route(web::post().to(SeqHandler(search_with_post))),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SearchQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
|
||||
q: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchVector>)]
|
||||
vector: Option<CS<f32>>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
|
||||
offset: Param<usize>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
|
||||
limit: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPage>)]
|
||||
page: Option<Param<usize>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchHitsPerPage>)]
|
||||
hits_per_page: Option<Param<usize>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
||||
attributes_to_retrieve: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
||||
attributes_to_crop: Option<CS<String>>,
|
||||
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
||||
crop_length: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToHighlight>)]
|
||||
attributes_to_highlight: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
|
||||
filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
||||
sort: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
|
||||
distinct: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||
show_matches_position: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
|
||||
show_ranking_score: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
|
||||
show_ranking_score_details: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
||||
facets: Option<CS<String>>,
|
||||
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
||||
highlight_pre_tag: String,
|
||||
#[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPostTag>)]
|
||||
highlight_post_tag: String,
|
||||
#[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError<InvalidSearchCropMarker>)]
|
||||
crop_marker: String,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
||||
matching_strategy: MatchingStrategy,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
|
||||
pub attributes_to_search_on: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub hybrid_embedder: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
|
||||
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
||||
pub locales: Option<CS<Locale>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
|
||||
|
||||
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
||||
type Error = InvalidSearchRankingScoreThreshold;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
|
||||
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
|
||||
pub struct SemanticRatioGet(SemanticRatio);
|
||||
|
||||
impl std::convert::TryFrom<String> for SemanticRatioGet {
|
||||
type Error = InvalidSearchSemanticRatio;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
|
||||
Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for SemanticRatioGet {
|
||||
type Target = SemanticRatio;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
type Error = ResponseError;
|
||||
|
||||
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
|
||||
let filter = match other.filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
_ => Some(Value::String(f)),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
|
||||
(None, None) => None,
|
||||
(None, Some(_)) => {
|
||||
return Err(ResponseError::from_msg(
|
||||
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
|
||||
meilisearch_types::error::Code::InvalidHybridQuery,
|
||||
));
|
||||
}
|
||||
(Some(embedder), None) => {
|
||||
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
|
||||
}
|
||||
(Some(embedder), Some(semantic_ratio)) => {
|
||||
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
|
||||
}
|
||||
};
|
||||
|
||||
if other.vector.is_some() && hybrid.is_none() {
|
||||
return Err(ResponseError::from_msg(
|
||||
"`hybridEmbedder` is mandatory when `vector` is present".into(),
|
||||
meilisearch_types::error::Code::MissingSearchHybrid,
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
vector: other.vector.map(CS::into_inner),
|
||||
offset: other.offset.0,
|
||||
limit: other.limit.0,
|
||||
page: other.page.as_deref().copied(),
|
||||
hits_per_page: other.hits_per_page.as_deref().copied(),
|
||||
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||
retrieve_vectors: other.retrieve_vectors.0,
|
||||
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
||||
crop_length: other.crop_length.0,
|
||||
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
||||
filter,
|
||||
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||
distinct: other.distinct,
|
||||
show_matches_position: other.show_matches_position.0,
|
||||
show_ranking_score: other.show_ranking_score.0,
|
||||
show_ranking_score_details: other.show_ranking_score_details.0,
|
||||
facets: other.facets.map(|o| o.into_iter().collect()),
|
||||
highlight_pre_tag: other.highlight_pre_tag,
|
||||
highlight_post_tag: other.highlight_post_tag,
|
||||
crop_marker: other.crop_marker,
|
||||
matching_strategy: other.matching_strategy,
|
||||
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
|
||||
|
||||
/// Transform the sort query parameter into something that matches the post expected format.
|
||||
fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
||||
let mut sort_parameters = Vec::new();
|
||||
let mut merge = false;
|
||||
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
|
||||
if current_sort.starts_with("_geoPoint(") {
|
||||
sort_parameters.push(current_sort.to_string());
|
||||
merge = true;
|
||||
} else if merge && !sort_parameters.is_empty() {
|
||||
let s = sort_parameters.last_mut().unwrap();
|
||||
s.push(',');
|
||||
s.push_str(current_sort);
|
||||
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
|
||||
merge = false;
|
||||
}
|
||||
} else {
|
||||
sort_parameters.push(current_sort.to_string());
|
||||
merge = false;
|
||||
}
|
||||
}
|
||||
sort_parameters
|
||||
}
|
||||
|
||||
pub async fn search_with_url_query(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?params, "Search get");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query: SearchQuery = params.into_inner().try_into()?;
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.get_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
pub async fn search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<SearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let mut query = params.into_inner();
|
||||
debug!(parameters = ?query, "Search post");
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
|
||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
let search_result = search_result?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
if search_result.degraded {
|
||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
}
|
||||
analytics.post_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
pub fn search_kind(
|
||||
query: &SearchQuery,
|
||||
index_scheduler: &IndexScheduler,
|
||||
index: &milli::Index,
|
||||
features: RoFeatures,
|
||||
) -> Result<SearchKind, ResponseError> {
|
||||
if query.vector.is_some() {
|
||||
features.check_vector("Passing `vector` as a parameter")?;
|
||||
}
|
||||
if query.hybrid.is_some() {
|
||||
features.check_vector("Passing `hybrid` as a parameter")?;
|
||||
}
|
||||
|
||||
// handle with care, the order of cases matters, the semantics is subtle
|
||||
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
|
||||
// empty query, no vector => placeholder search
|
||||
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
|
||||
// no query, no vector => placeholder search
|
||||
(None, _, None) => Ok(SearchKind::KeywordOnly),
|
||||
// hybrid.semantic_ratio == 1.0 => vector
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
|
||||
}
|
||||
// hybrid.semantic_ratio == 0.0 => keyword
|
||||
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
|
||||
Ok(SearchKind::KeywordOnly)
|
||||
}
|
||||
// no query, hybrid, vector => semantic
|
||||
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
|
||||
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
|
||||
}
|
||||
// query, no hybrid, no vector => keyword
|
||||
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
|
||||
// query, hybrid, maybe vector => hybrid
|
||||
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
|
||||
index_scheduler,
|
||||
index,
|
||||
embedder,
|
||||
**semantic_ratio,
|
||||
v.map(|v| v.len()),
|
||||
),
|
||||
|
||||
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fix_sort_query_parameters() {
|
||||
let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc");
|
||||
assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]);
|
||||
let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc");
|
||||
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]);
|
||||
let sort = fix_sort_query_parameters(
|
||||
"doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc",
|
||||
);
|
||||
assert_eq!(
|
||||
sort,
|
||||
vec![
|
||||
"doggo:asc".to_string(),
|
||||
"_geoPoint(12.45,13.56,2590352):desc".to_string(),
|
||||
"catto:desc".to_string(),
|
||||
]
|
||||
);
|
||||
let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc");
|
||||
// This is ugly but eh, I don't want to write a full parser just for this unused route
|
||||
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]);
|
||||
}
|
||||
}
|
901
crates/meilisearch/src/routes/indexes/settings.rs
Normal file
901
crates/meilisearch/src/routes/indexes/settings.rs
Normal file
|
@ -0,0 +1,901 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, RankingRuleView, SecretPolicy, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
|
||||
pub mod $attr {
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, Settings};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use tracing::debug;
|
||||
use $crate::analytics::Analytics;
|
||||
use $crate::extractors::authentication::policies::*;
|
||||
use $crate::extractors::authentication::GuardedData;
|
||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||
use $crate::Opt;
|
||||
use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView};
|
||||
|
||||
pub async fn delete(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
|
||||
|
||||
let allow_index_creation =
|
||||
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid: index_uid.to_string(),
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn update(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: actix_web::web::Path<String>,
|
||||
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
$analytics_var: web::Data<dyn Analytics>,
|
||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let body = body.into_inner();
|
||||
debug!(parameters = ?body, "Update settings");
|
||||
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
$analytics(&body, &req);
|
||||
|
||||
let new_settings = Settings {
|
||||
$attr: match body {
|
||||
Some(inner_body) => Setting::Set(inner_body).into(),
|
||||
None => Setting::Reset.into(),
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let new_settings = $crate::routes::indexes::settings::validate_settings(
|
||||
new_settings,
|
||||
&index_scheduler,
|
||||
)?;
|
||||
|
||||
let allow_index_creation =
|
||||
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid: index_uid.to_string(),
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Update settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::SETTINGS_GET }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: actix_web::web::Path<String>,
|
||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?;
|
||||
|
||||
debug!(returns = ?settings, "Update settings");
|
||||
|
||||
Ok(HttpResponse::Ok().json(settings.$attr))
|
||||
}
|
||||
|
||||
pub fn resources() -> Resource {
|
||||
Resource::new($route)
|
||||
.route(web::get().to(SeqHandler(get)))
|
||||
.route(web::$update_verb().to(SeqHandler(update)))
|
||||
.route(web::delete().to(SeqHandler(delete)))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
make_setting_route!(
|
||||
"/filterable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
|
||||
>,
|
||||
filterable_attributes,
|
||||
"filterableAttributes",
|
||||
analytics,
|
||||
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"FilterableAttributes Updated".to_string(),
|
||||
json!({
|
||||
"filterable_attributes": {
|
||||
"total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
|
||||
"has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/sortable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
|
||||
>,
|
||||
sortable_attributes,
|
||||
"sortableAttributes",
|
||||
analytics,
|
||||
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"SortableAttributes Updated".to_string(),
|
||||
json!({
|
||||
"sortable_attributes": {
|
||||
"total": setting.as_ref().map(|sort| sort.len()),
|
||||
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/displayed-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
|
||||
>,
|
||||
displayed_attributes,
|
||||
"displayedAttributes",
|
||||
analytics,
|
||||
|displayed: &Option<Vec<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"DisplayedAttributes Updated".to_string(),
|
||||
json!({
|
||||
"displayed_attributes": {
|
||||
"total": displayed.as_ref().map(|displayed| displayed.len()),
|
||||
"with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/typo-tolerance",
|
||||
patch,
|
||||
meilisearch_types::settings::TypoSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
|
||||
>,
|
||||
typo_tolerance,
|
||||
"typoTolerance",
|
||||
analytics,
|
||||
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"TypoTolerance Updated".to_string(),
|
||||
json!({
|
||||
"typo_tolerance": {
|
||||
"enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
|
||||
"disable_on_attributes": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
|
||||
"disable_on_words": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
|
||||
"min_word_size_for_one_typo": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.one_typo.set()))
|
||||
.flatten(),
|
||||
"min_word_size_for_two_typos": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.two_typos.set()))
|
||||
.flatten(),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/searchable-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
|
||||
>,
|
||||
searchable_attributes,
|
||||
"searchableAttributes",
|
||||
analytics,
|
||||
|setting: &Option<Vec<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"SearchableAttributes Updated".to_string(),
|
||||
json!({
|
||||
"searchable_attributes": {
|
||||
"total": setting.as_ref().map(|searchable| searchable.len()),
|
||||
"with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/stop-words",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
|
||||
>,
|
||||
stop_words,
|
||||
"stopWords",
|
||||
analytics,
|
||||
|stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"StopWords Updated".to_string(),
|
||||
json!({
|
||||
"stop_words": {
|
||||
"total": stop_words.as_ref().map(|stop_words| stop_words.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/non-separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
non_separator_tokens,
|
||||
"nonSeparatorTokens",
|
||||
analytics,
|
||||
|non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"nonSeparatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"non_separator_tokens": {
|
||||
"total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
separator_tokens,
|
||||
"separatorTokens",
|
||||
analytics,
|
||||
|separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"separatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"separator_tokens": {
|
||||
"total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/dictionary",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
dictionary,
|
||||
"dictionary",
|
||||
analytics,
|
||||
|dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"dictionary Updated".to_string(),
|
||||
json!({
|
||||
"dictionary": {
|
||||
"total": dictionary.as_ref().map(|dictionary| dictionary.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
std::collections::BTreeMap<String, Vec<String>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
|
||||
>,
|
||||
synonyms,
|
||||
"synonyms",
|
||||
analytics,
|
||||
|synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"Synonyms Updated".to_string(),
|
||||
json!({
|
||||
"synonyms": {
|
||||
"total": synonyms.as_ref().map(|synonyms| synonyms.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/distinct-attribute",
|
||||
put,
|
||||
String,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
|
||||
>,
|
||||
distinct_attribute,
|
||||
"distinctAttribute",
|
||||
analytics,
|
||||
|distinct: &Option<String>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
analytics.publish(
|
||||
"DistinctAttribute Updated".to_string(),
|
||||
json!({
|
||||
"distinct_attribute": {
|
||||
"set": distinct.is_some(),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/proximity-precision",
|
||||
put,
|
||||
meilisearch_types::settings::ProximityPrecisionView,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||
>,
|
||||
proximity_precision,
|
||||
"proximityPrecision",
|
||||
analytics,
|
||||
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
analytics.publish(
|
||||
"ProximityPrecision Updated".to_string(),
|
||||
json!({
|
||||
"proximity_precision": {
|
||||
"set": precision.is_some(),
|
||||
"value": precision.unwrap_or_default(),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/localized-attributes",
|
||||
put,
|
||||
Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
|
||||
>,
|
||||
localized_attributes,
|
||||
"localizedAttributes",
|
||||
analytics,
|
||||
|rules: &Option<Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
analytics.publish(
|
||||
"LocalizedAttributesRules Updated".to_string(),
|
||||
json!({
|
||||
"locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>())
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/ranking-rules",
|
||||
put,
|
||||
Vec<meilisearch_types::settings::RankingRuleView>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
|
||||
>,
|
||||
ranking_rules,
|
||||
"rankingRules",
|
||||
analytics,
|
||||
|setting: &Option<Vec<meilisearch_types::settings::RankingRuleView>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"RankingRules Updated".to_string(),
|
||||
json!({
|
||||
"ranking_rules": {
|
||||
"words_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))),
|
||||
"typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))),
|
||||
"proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Proximity))),
|
||||
"attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Attribute))),
|
||||
"sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))),
|
||||
"exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Exactness))),
|
||||
"values": setting.as_ref().map(|rr| rr.iter().filter(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/faceting",
|
||||
patch,
|
||||
meilisearch_types::settings::FacetingSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
|
||||
>,
|
||||
faceting,
|
||||
"faceting",
|
||||
analytics,
|
||||
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
|
||||
analytics.publish(
|
||||
"Faceting Updated".to_string(),
|
||||
json!({
|
||||
"faceting": {
|
||||
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
|
||||
"sort_facet_values_by_star_count": setting.as_ref().and_then(|s| {
|
||||
s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
|
||||
}),
|
||||
"sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/pagination",
|
||||
patch,
|
||||
meilisearch_types::settings::PaginationSettings,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
|
||||
>,
|
||||
pagination,
|
||||
"pagination",
|
||||
analytics,
|
||||
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"Pagination Updated".to_string(),
|
||||
json!({
|
||||
"pagination": {
|
||||
"max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/embedders",
|
||||
patch,
|
||||
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
|
||||
>,
|
||||
embedders,
|
||||
"embedders",
|
||||
analytics,
|
||||
|setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
|
||||
|
||||
|
||||
analytics.publish(
|
||||
"Embedders Updated".to_string(),
|
||||
serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
fn embedder_analytics(
|
||||
setting: Option<
|
||||
&std::collections::BTreeMap<
|
||||
String,
|
||||
Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
|
||||
>,
|
||||
>,
|
||||
) -> serde_json::Value {
|
||||
let mut sources = std::collections::HashSet::new();
|
||||
|
||||
if let Some(s) = &setting {
|
||||
for source in s
|
||||
.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.filter_map(|config| config.source.set())
|
||||
{
|
||||
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
||||
match source {
|
||||
EmbedderSource::OpenAi => sources.insert("openAi"),
|
||||
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
|
||||
EmbedderSource::UserProvided => sources.insert("userProvided"),
|
||||
EmbedderSource::Ollama => sources.insert("ollama"),
|
||||
EmbedderSource::Rest => sources.insert("rest"),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let document_template_used = setting.as_ref().map(|map| {
|
||||
map.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.any(|config| config.document_template.set().is_some())
|
||||
});
|
||||
|
||||
let document_template_max_bytes = setting.as_ref().and_then(|map| {
|
||||
map.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.filter_map(|config| config.document_template_max_bytes.set())
|
||||
.max()
|
||||
});
|
||||
|
||||
let binary_quantization_used = setting.as_ref().map(|map| {
|
||||
map.values()
|
||||
.filter_map(|config| config.clone().set())
|
||||
.any(|config| config.binary_quantized.set().is_some())
|
||||
});
|
||||
|
||||
json!(
|
||||
{
|
||||
"total": setting.as_ref().map(|s| s.len()),
|
||||
"sources": sources,
|
||||
"document_template_used": document_template_used,
|
||||
"document_template_max_bytes": document_template_max_bytes,
|
||||
"binary_quantization_used": binary_quantization_used,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
make_setting_route!(
|
||||
"/search-cutoff-ms",
|
||||
put,
|
||||
u64,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
|
||||
>,
|
||||
search_cutoff_ms,
|
||||
"searchCutoffMs",
|
||||
analytics,
|
||||
|setting: &Option<u64>, req: &HttpRequest| {
|
||||
analytics.publish(
|
||||
"Search Cutoff Updated".to_string(),
|
||||
serde_json::json!({"search_cutoff_ms": setting }),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
macro_rules! generate_configure {
|
||||
($($mod:ident),*) => {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::patch().to(SeqHandler(update_all)))
|
||||
.route(web::get().to(SeqHandler(get_all)))
|
||||
.route(web::delete().to(SeqHandler(delete_all))))
|
||||
$(.service($mod::resources()))*;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
generate_configure!(
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
displayed_attributes,
|
||||
localized_attributes,
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
proximity_precision,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
pagination,
|
||||
faceting,
|
||||
embedders,
|
||||
search_cutoff_ms
|
||||
);
|
||||
|
||||
pub async fn update_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let new_settings = body.into_inner();
|
||||
debug!(parameters = ?new_settings, "Update all settings");
|
||||
let new_settings = validate_settings(new_settings, &index_scheduler)?;
|
||||
|
||||
analytics.publish(
|
||||
"Settings Updated".to_string(),
|
||||
json!({
|
||||
"ranking_rules": {
|
||||
"words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Words))),
|
||||
"typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Typo))),
|
||||
"proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Proximity))),
|
||||
"attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Attribute))),
|
||||
"sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Sort))),
|
||||
"exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Exactness))),
|
||||
"values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !matches!(s, RankingRuleView::Asc(_) | RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::<Vec<_>>().join(", ")),
|
||||
},
|
||||
"searchable_attributes": {
|
||||
"total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
|
||||
"with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
|
||||
},
|
||||
"displayed_attributes": {
|
||||
"total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()),
|
||||
"with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
|
||||
},
|
||||
"sortable_attributes": {
|
||||
"total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
|
||||
"has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
|
||||
},
|
||||
"filterable_attributes": {
|
||||
"total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
|
||||
"has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
|
||||
},
|
||||
"distinct_attribute": {
|
||||
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||
},
|
||||
"proximity_precision": {
|
||||
"set": new_settings.proximity_precision.as_ref().set().is_some(),
|
||||
"value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
|
||||
},
|
||||
"typo_tolerance": {
|
||||
"enabled": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.enabled.as_ref().set())
|
||||
.copied(),
|
||||
"disable_on_attributes": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
|
||||
"disable_on_words": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
|
||||
"min_word_size_for_one_typo": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.one_typo.set()))
|
||||
.flatten(),
|
||||
"min_word_size_for_two_typos": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.two_typos.set()))
|
||||
.flatten(),
|
||||
},
|
||||
"faceting": {
|
||||
"max_values_per_facet": new_settings.faceting
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.max_values_per_facet.as_ref().set()),
|
||||
"sort_facet_values_by_star_count": new_settings.faceting
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| {
|
||||
s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
|
||||
}),
|
||||
"sort_facet_values_by_total": new_settings.faceting
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
|
||||
},
|
||||
"pagination": {
|
||||
"max_total_hits": new_settings.pagination
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.max_total_hits.as_ref().set()),
|
||||
},
|
||||
"stop_words": {
|
||||
"total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()),
|
||||
},
|
||||
"synonyms": {
|
||||
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
|
||||
},
|
||||
"embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
|
||||
"search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
|
||||
"locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>()),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Update all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let new_settings = settings(&index, &rtxn, SecretPolicy::HideSecrets)?;
|
||||
debug!(returns = ?new_settings, "Get all settings");
|
||||
Ok(HttpResponse::Ok().json(new_settings))
|
||||
}
|
||||
|
||||
pub async fn delete_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let new_settings = Settings::cleared().into_unchecked();
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Delete all settings");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
fn validate_settings(
|
||||
settings: Settings<Unchecked>,
|
||||
index_scheduler: &IndexScheduler,
|
||||
) -> Result<Settings<Unchecked>, ResponseError> {
|
||||
if matches!(settings.embedders, Setting::Set(_)) {
|
||||
index_scheduler.features().check_vector("Passing `embedders` in settings")?
|
||||
}
|
||||
Ok(settings.validate()?)
|
||||
}
|
200
crates/meilisearch/src/routes/indexes/similar.rs
Normal file
200
crates/meilisearch/src/routes/indexes/similar.rs
Normal file
|
@ -0,0 +1,200 @@
|
|||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tracing::debug;
|
||||
|
||||
use super::ActionPolicy;
|
||||
use crate::analytics::{Analytics, SimilarAggregator};
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
||||
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(similar_get)))
|
||||
.route(web::post().to(SeqHandler(similar_post))),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn similar_get(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.0.try_into()?;
|
||||
|
||||
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||
|
||||
debug!(parameters = ?query, "Similar get");
|
||||
|
||||
let similar = similar(index_scheduler, index_uid, query).await;
|
||||
|
||||
if let Ok(similar) = &similar {
|
||||
aggregate.succeed(similar);
|
||||
}
|
||||
analytics.get_similar(aggregate);
|
||||
|
||||
let similar = similar?;
|
||||
|
||||
debug!(returns = ?similar, "Similar get");
|
||||
Ok(HttpResponse::Ok().json(similar))
|
||||
}
|
||||
|
||||
pub async fn similar_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<SimilarQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!(parameters = ?query, "Similar post");
|
||||
|
||||
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||
|
||||
let similar = similar(index_scheduler, index_uid, query).await;
|
||||
|
||||
if let Ok(similar) = &similar {
|
||||
aggregate.succeed(similar);
|
||||
}
|
||||
analytics.post_similar(aggregate);
|
||||
|
||||
let similar = similar?;
|
||||
|
||||
debug!(returns = ?similar, "Similar post");
|
||||
Ok(HttpResponse::Ok().json(similar))
|
||||
}
|
||||
|
||||
async fn similar(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: IndexUid,
|
||||
mut query: SimilarQuery,
|
||||
) -> Result<SimilarResult, ResponseError> {
|
||||
let features = index_scheduler.features();
|
||||
|
||||
features.check_vector("Using the similar API")?;
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut query.filter, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
|
||||
let (embedder_name, embedder, quantized) =
|
||||
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
perform_similar(
|
||||
&index,
|
||||
query,
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
retrieve_vectors,
|
||||
index_scheduler.features(),
|
||||
)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SimilarQueryGet {
|
||||
#[deserr(error = DeserrQueryParamError<InvalidSimilarId>)]
|
||||
id: Param<String>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSimilarOffset>)]
|
||||
offset: Param<usize>,
|
||||
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSimilarLimit>)]
|
||||
limit: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
|
||||
attributes_to_retrieve: Option<CS<String>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)]
|
||||
retrieve_vectors: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
|
||||
filter: Option<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
|
||||
show_ranking_score: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
|
||||
show_ranking_score_details: Param<bool>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
|
||||
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
|
||||
#[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||
pub embedder: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
|
||||
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
|
||||
|
||||
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
|
||||
type Error = InvalidSimilarRankingScoreThreshold;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
|
||||
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||
type Error = ResponseError;
|
||||
|
||||
fn try_from(
|
||||
SimilarQueryGet {
|
||||
id,
|
||||
offset,
|
||||
limit,
|
||||
attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
filter,
|
||||
show_ranking_score,
|
||||
show_ranking_score_details,
|
||||
embedder,
|
||||
ranking_score_threshold,
|
||||
}: SimilarQueryGet,
|
||||
) -> Result<Self, Self::Error> {
|
||||
let filter = match filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
_ => Some(Value::String(f)),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(SimilarQuery {
|
||||
id: id.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||
})?,
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
filter,
|
||||
embedder,
|
||||
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||
retrieve_vectors: retrieve_vectors.0,
|
||||
show_ranking_score: show_ranking_score.0,
|
||||
show_ranking_score_details: show_ranking_score_details.0,
|
||||
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
|
||||
})
|
||||
}
|
||||
}
|
318
crates/meilisearch/src/routes/logs.rs
Normal file
318
crates/meilisearch/src/routes/logs.rs
Normal file
|
@ -0,0 +1,318 @@
|
|||
use std::convert::Infallible;
|
||||
use std::io::Write;
|
||||
use std::ops::ControlFlow;
|
||||
use std::pin::Pin;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::{web, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||
use futures_util::Stream;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use tokio::sync::mpsc;
|
||||
use tracing_subscriber::filter::Targets;
|
||||
use tracing_subscriber::Layer;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::{LogRouteHandle, LogStderrHandle};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("stream")
|
||||
.route(web::post().to(SeqHandler(get_logs)))
|
||||
.route(web::delete().to(SeqHandler(cancel_logs))),
|
||||
)
|
||||
.service(web::resource("stderr").route(web::post().to(SeqHandler(update_stderr_target))));
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum LogMode {
|
||||
#[default]
|
||||
Human,
|
||||
Json,
|
||||
Profile,
|
||||
}
|
||||
|
||||
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
|
||||
#[derive(Clone, Debug)]
|
||||
struct MyTargets(Targets);
|
||||
|
||||
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum MyParseError {
|
||||
#[error(transparent)]
|
||||
ParseError(#[from] tracing_subscriber::filter::ParseError),
|
||||
#[error(
|
||||
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
|
||||
)]
|
||||
Example,
|
||||
}
|
||||
|
||||
impl FromStr for MyTargets {
|
||||
type Err = MyParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
if s.is_empty() {
|
||||
Err(MyParseError::Example)
|
||||
} else {
|
||||
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
|
||||
fn merge(
|
||||
_self_: Option<Self>,
|
||||
other: MyParseError,
|
||||
merge_location: ValuePointerRef,
|
||||
) -> ControlFlow<Self, Self> {
|
||||
Self::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected { msg: other.to_string() },
|
||||
merge_location,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
|
||||
pub struct GetLogs {
|
||||
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
|
||||
target: MyTargets,
|
||||
|
||||
#[deserr(default, error = DeserrJsonError<BadRequest>)]
|
||||
mode: LogMode,
|
||||
|
||||
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
|
||||
profile_memory: bool,
|
||||
}
|
||||
|
||||
fn validate_get_logs<E: DeserializeError>(
|
||||
logs: GetLogs,
|
||||
location: ValuePointerRef,
|
||||
) -> Result<GetLogs, E> {
|
||||
if logs.profile_memory && logs.mode != LogMode::Profile {
|
||||
Err(deserr::take_cf_content(E::error::<Infallible>(
|
||||
None,
|
||||
ErrorKind::Unexpected {
|
||||
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
|
||||
},
|
||||
location,
|
||||
)))
|
||||
} else {
|
||||
Ok(logs)
|
||||
}
|
||||
}
|
||||
|
||||
struct LogWriter {
|
||||
sender: mpsc::UnboundedSender<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl Write for LogWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
|
||||
Ok(buf.len())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct HandleGuard {
|
||||
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
|
||||
logs: Arc<LogRouteHandle>,
|
||||
}
|
||||
|
||||
impl Drop for HandleGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
|
||||
tracing::error!("Could not free the logs route: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_stream(
|
||||
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
|
||||
guard: HandleGuard,
|
||||
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
|
||||
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
|
||||
let vec = receiver.recv().await;
|
||||
|
||||
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
|
||||
})
|
||||
}
|
||||
|
||||
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
|
||||
|
||||
fn make_layer<
|
||||
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
|
||||
>(
|
||||
opt: &GetLogs,
|
||||
logs: Data<LogRouteHandle>,
|
||||
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
|
||||
let guard = HandleGuard { logs: logs.into_inner() };
|
||||
match opt.mode {
|
||||
LogMode::Human => {
|
||||
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
|
||||
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_writer(move || LogWriter { sender: sender.clone() })
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
|
||||
|
||||
let stream = byte_stream(receiver, guard);
|
||||
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
|
||||
}
|
||||
LogMode::Json => {
|
||||
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
|
||||
|
||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||
.with_writer(move || LogWriter { sender: sender.clone() })
|
||||
.json()
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
|
||||
|
||||
let stream = byte_stream(receiver, guard);
|
||||
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
|
||||
}
|
||||
LogMode::Profile => {
|
||||
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
|
||||
|
||||
let stream = entry_stream(trace, guard);
|
||||
|
||||
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn entry_stream(
|
||||
trace: tracing_trace::Trace,
|
||||
guard: HandleGuard,
|
||||
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
|
||||
let receiver = trace.into_receiver();
|
||||
let entry_buf = Vec::new();
|
||||
|
||||
futures_util::stream::unfold(
|
||||
(receiver, entry_buf, guard),
|
||||
move |(mut receiver, mut entry_buf, guard)| async move {
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
while bytes.len() < 8192 {
|
||||
entry_buf.clear();
|
||||
|
||||
let Ok(count) = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(1),
|
||||
receiver.recv_many(&mut entry_buf, 100),
|
||||
)
|
||||
.await
|
||||
else {
|
||||
break;
|
||||
};
|
||||
|
||||
if count == 0 {
|
||||
if !bytes.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
// channel closed, exit
|
||||
return None;
|
||||
}
|
||||
|
||||
for entry in &entry_buf {
|
||||
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
|
||||
tracing::error!(
|
||||
error = &error as &dyn std::error::Error,
|
||||
"deserializing entry"
|
||||
);
|
||||
return Some((
|
||||
Err(ResponseError::from_msg(
|
||||
format!("error deserializing entry: {error}"),
|
||||
Code::Internal,
|
||||
)),
|
||||
(receiver, entry_buf, guard),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn get_logs(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
logs: Data<LogRouteHandle>,
|
||||
body: AwebJson<GetLogs, DeserrJsonError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_logs_route()?;
|
||||
|
||||
let opt = body.into_inner();
|
||||
let mut stream = None;
|
||||
|
||||
logs.modify(|layer| match layer.inner_mut() {
|
||||
None => {
|
||||
// there is no one getting logs
|
||||
*layer.filter_mut() = opt.target.0.clone();
|
||||
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
|
||||
|
||||
*layer.inner_mut() = Some(new_layer);
|
||||
stream = Some(new_stream);
|
||||
}
|
||||
Some(_) => {
|
||||
// there is already someone getting logs
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
if let Some(stream) = stream {
|
||||
Ok(HttpResponse::Ok().streaming(stream))
|
||||
} else {
|
||||
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cancel_logs(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
logs: Data<LogRouteHandle>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_logs_route()?;
|
||||
|
||||
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
|
||||
tracing::error!("Could not free the logs route: {e}");
|
||||
}
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct UpdateStderrLogs {
|
||||
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
|
||||
target: MyTargets,
|
||||
}
|
||||
|
||||
pub async fn update_stderr_target(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
logs: Data<LogStderrHandle>,
|
||||
body: AwebJson<UpdateStderrLogs, DeserrJsonError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_logs_route()?;
|
||||
|
||||
let opt = body.into_inner();
|
||||
|
||||
logs.modify(|layer| {
|
||||
*layer.filter_mut() = opt.target.0.clone();
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
64
crates/meilisearch/src/routes/metrics.rs
Normal file
64
crates/meilisearch/src/routes/metrics.rs
Normal file
|
@ -0,0 +1,64 @@
|
|||
use actix_web::http::header;
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::HttpResponse;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use prometheus::{Encoder, TextEncoder};
|
||||
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::routes::create_all_stats;
|
||||
|
||||
pub fn configure(config: &mut web::ServiceConfig) {
|
||||
config.service(web::resource("").route(web::get().to(get_metrics)));
|
||||
}
|
||||
|
||||
pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: Data<AuthController>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
index_scheduler.features().check_metrics()?;
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
|
||||
error
|
||||
.message
|
||||
.push_str(" The API key for the `/metrics` route must allow access to all indexes.");
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?;
|
||||
|
||||
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
|
||||
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
|
||||
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
||||
|
||||
for (index, value) in response.indexes.iter() {
|
||||
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
|
||||
.with_label_values(&[index])
|
||||
.set(value.number_of_documents as i64);
|
||||
}
|
||||
|
||||
for (kind, value) in index_scheduler.get_stats()? {
|
||||
for (value, count) in value {
|
||||
crate::metrics::MEILISEARCH_NB_TASKS
|
||||
.with_label_values(&[&kind, &value])
|
||||
.set(count as i64);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(last_update) = response.last_update {
|
||||
crate::metrics::MEILISEARCH_LAST_UPDATE.set(last_update.unix_timestamp());
|
||||
}
|
||||
crate::metrics::MEILISEARCH_IS_INDEXING.set(index_scheduler.is_task_processing()? as i64);
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
|
||||
|
||||
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
|
||||
|
||||
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
|
||||
}
|
380
crates/meilisearch/src/routes/mod.rs
Normal file
380
crates/meilisearch/src/routes/mod.rs
Normal file
|
@ -0,0 +1,380 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search_queue::SearchQueue;
|
||||
use crate::Opt;
|
||||
|
||||
const PAGINATION_DEFAULT_LIMIT: usize = 20;
|
||||
|
||||
mod api_key;
|
||||
mod dump;
|
||||
pub mod features;
|
||||
pub mod indexes;
|
||||
mod logs;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod snapshot;
|
||||
mod swap_indexes;
|
||||
pub mod tasks;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
.service(web::scope("/logs").configure(logs::configure))
|
||||
.service(web::scope("/keys").configure(api_key::configure))
|
||||
.service(web::scope("/dumps").configure(dump::configure))
|
||||
.service(web::scope("/snapshots").configure(snapshot::configure))
|
||||
.service(web::resource("/stats").route(web::get().to(get_stats)))
|
||||
.service(web::resource("/version").route(web::get().to(get_version)))
|
||||
.service(web::scope("/indexes").configure(indexes::configure))
|
||||
.service(web::scope("/multi-search").configure(multi_search::configure))
|
||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
|
||||
.service(web::scope("/metrics").configure(metrics::configure))
|
||||
.service(web::scope("/experimental-features").configure(features::configure));
|
||||
}
|
||||
|
||||
pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result<Option<TaskId>, ResponseError> {
|
||||
if !opt.experimental_replication_parameters {
|
||||
return Ok(None);
|
||||
}
|
||||
let task_id = req
|
||||
.headers()
|
||||
.get("TaskId")
|
||||
.map(|header| {
|
||||
header.to_str().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!("TaskId is not a valid utf-8 string: {e}"),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?
|
||||
.map(|s| {
|
||||
s.parse::<TaskId>().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!(
|
||||
"Could not parse the TaskId as a {}: {e}",
|
||||
std::any::type_name::<TaskId>(),
|
||||
),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(task_id)
|
||||
}
|
||||
|
||||
pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
if !opt.experimental_replication_parameters {
|
||||
return Ok(false);
|
||||
}
|
||||
Ok(req
|
||||
.headers()
|
||||
.get("DryRun")
|
||||
.map(|header| {
|
||||
header.to_str().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!("DryRun is not a valid utf-8 string: {e}"),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?
|
||||
.map_or(false, |s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
task_uid: TaskId,
|
||||
index_uid: Option<String>,
|
||||
status: Status,
|
||||
#[serde(rename = "type")]
|
||||
kind: Kind,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
enqueued_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl From<Task> for SummarizedTaskView {
|
||||
fn from(task: Task) -> Self {
|
||||
SummarizedTaskView {
|
||||
task_uid: task.uid,
|
||||
index_uid: task.index_uid().map(|s| s.to_string()),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
enqueued_at: task.enqueued_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Pagination {
|
||||
pub offset: usize,
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PaginationView<T> {
|
||||
pub results: Vec<T>,
|
||||
pub offset: usize,
|
||||
pub limit: usize,
|
||||
pub total: usize,
|
||||
}
|
||||
|
||||
impl Pagination {
|
||||
/// Given the full data to paginate, returns the selected section.
|
||||
pub fn auto_paginate_sized<T>(
|
||||
self,
|
||||
content: impl IntoIterator<Item = T> + ExactSizeIterator,
|
||||
) -> PaginationView<T>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let total = content.len();
|
||||
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
|
||||
self.format_with(total, content)
|
||||
}
|
||||
|
||||
/// Given an iterator and the total number of elements, returns the selected section.
|
||||
pub fn auto_paginate_unsized<T>(
|
||||
self,
|
||||
total: usize,
|
||||
content: impl IntoIterator<Item = T>,
|
||||
) -> PaginationView<T>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
|
||||
self.format_with(total, content)
|
||||
}
|
||||
|
||||
/// Given the data already paginated + the total number of elements, it stores
|
||||
/// everything in a [PaginationResult].
|
||||
pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<T>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
PaginationView { results, offset: self.offset, limit: self.limit, total }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PaginationView<T> {
|
||||
pub fn new(offset: usize, limit: usize, total: usize, results: Vec<T>) -> Self {
|
||||
Self { offset, limit, results, total }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[serde(tag = "name")]
|
||||
pub enum UpdateType {
|
||||
ClearAll,
|
||||
Customs,
|
||||
DocumentsAddition {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
number: Option<usize>,
|
||||
},
|
||||
DocumentsPartial {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
number: Option<usize>,
|
||||
},
|
||||
DocumentsDeletion {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
number: Option<usize>,
|
||||
},
|
||||
Settings {
|
||||
settings: Settings<Unchecked>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProcessedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
pub duration: f64, // in seconds
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub processed_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FailedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
pub error: ResponseError,
|
||||
pub duration: f64, // in seconds
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub processed_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct EnqueuedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(skip_serializing_if = "Option::is_none", with = "time::serde::rfc3339::option")]
|
||||
pub started_processing_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", tag = "status")]
|
||||
pub enum UpdateStatusResponse {
|
||||
Enqueued {
|
||||
#[serde(flatten)]
|
||||
content: EnqueuedUpdateResult,
|
||||
},
|
||||
Processing {
|
||||
#[serde(flatten)]
|
||||
content: EnqueuedUpdateResult,
|
||||
},
|
||||
Failed {
|
||||
#[serde(flatten)]
|
||||
content: FailedUpdateResult,
|
||||
},
|
||||
Processed {
|
||||
#[serde(flatten)]
|
||||
content: ProcessedUpdateResult,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexUpdateResponse {
|
||||
pub update_id: u64,
|
||||
}
|
||||
|
||||
impl IndexUpdateResponse {
|
||||
pub fn with_id(update_id: u64) -> Self {
|
||||
Self { update_id }
|
||||
}
|
||||
}
|
||||
|
||||
/// Always return a 200 with:
|
||||
/// ```json
|
||||
/// {
|
||||
/// "status": "Meilisearch is running"
|
||||
/// }
|
||||
/// ```
|
||||
pub async fn running() -> HttpResponse {
|
||||
HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" }))
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Stats {
|
||||
pub database_size: u64,
|
||||
#[serde(skip)]
|
||||
pub used_database_size: u64,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||
pub last_update: Option<OffsetDateTime>,
|
||||
pub indexes: BTreeMap<String, indexes::IndexStats>,
|
||||
}
|
||||
|
||||
async fn get_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let filters = index_scheduler.filters();
|
||||
|
||||
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
|
||||
|
||||
debug!(returns = ?stats, "Get stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
pub fn create_all_stats(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
) -> Result<Stats, ResponseError> {
|
||||
let mut last_task: Option<OffsetDateTime> = None;
|
||||
let mut indexes = BTreeMap::new();
|
||||
let mut database_size = 0;
|
||||
let mut used_database_size = 0;
|
||||
|
||||
for index_uid in index_scheduler.index_names()? {
|
||||
// Accumulate the size of all indexes, even unauthorized ones, so
|
||||
// as to return a database_size representative of the correct database size on disk.
|
||||
// See <https://github.com/meilisearch/meilisearch/pull/3541#discussion_r1126747643> for context.
|
||||
let stats = index_scheduler.index_stats(&index_uid)?;
|
||||
database_size += stats.inner_stats.database_size;
|
||||
used_database_size += stats.inner_stats.used_database_size;
|
||||
|
||||
if !filters.is_index_authorized(&index_uid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
last_task = last_task.map_or(Some(stats.inner_stats.updated_at), |last| {
|
||||
Some(last.max(stats.inner_stats.updated_at))
|
||||
});
|
||||
indexes.insert(index_uid.to_string(), stats.into());
|
||||
}
|
||||
|
||||
database_size += index_scheduler.size()?;
|
||||
used_database_size += index_scheduler.used_size()?;
|
||||
database_size += auth_controller.size()?;
|
||||
used_database_size += auth_controller.used_size()?;
|
||||
|
||||
let stats = Stats { database_size, used_database_size, last_update: last_task, indexes };
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct VersionResponse {
|
||||
commit_sha: String,
|
||||
commit_date: String,
|
||||
pkg_version: String,
|
||||
}
|
||||
|
||||
async fn get_version(
|
||||
_index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
|
||||
) -> HttpResponse {
|
||||
let build_info = build_info::BuildInfo::from_build();
|
||||
|
||||
HttpResponse::Ok().json(VersionResponse {
|
||||
commit_sha: build_info.commit_sha1.unwrap_or("unknown").to_string(),
|
||||
commit_date: build_info
|
||||
.commit_timestamp
|
||||
.and_then(|commit_timestamp| {
|
||||
commit_timestamp
|
||||
.format(&time::format_description::well_known::Iso8601::DEFAULT)
|
||||
.ok()
|
||||
})
|
||||
.unwrap_or("unknown".into()),
|
||||
pkg_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_health(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
search_queue.health().unwrap();
|
||||
index_scheduler.health().unwrap();
|
||||
auth_controller.health().unwrap();
|
||||
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
|
||||
}
|
183
crates/meilisearch/src/routes/multi_search.rs
Normal file
183
crates/meilisearch/src/routes/multi_search.rs
Normal file
|
@ -0,0 +1,183 @@
|
|||
use actix_http::StatusCode;
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::{Analytics, MultiSearchAggregator};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors,
|
||||
SearchQueryWithIndex, SearchResultWithIndex,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SearchResults {
|
||||
results: Vec<SearchResultWithIndex>,
|
||||
}
|
||||
|
||||
pub async fn multi_search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: Data<SearchQueue>,
|
||||
params: AwebJson<FederatedSearch, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
// Since we don't want to process half of the search requests and then get a permit refused
|
||||
// we're going to get one permit for the whole duration of the multi-search request.
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
|
||||
let federated_search = params.into_inner();
|
||||
|
||||
let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req);
|
||||
|
||||
let FederatedSearch { mut queries, federation } = federated_search;
|
||||
|
||||
let features = index_scheduler.features();
|
||||
|
||||
// regardless of federation, check authorization and apply search rules
|
||||
let auth = 'check_authorization: {
|
||||
for (query_index, federated_query) in queries.iter_mut().enumerate() {
|
||||
let index_uid = federated_query.index_uid.as_str();
|
||||
// Check index from API key
|
||||
if !index_scheduler.filters().is_index_authorized(index_uid) {
|
||||
break 'check_authorization Err(AuthenticationError::InvalidToken)
|
||||
.with_index(query_index);
|
||||
}
|
||||
// Apply search rules from tenant token
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid)
|
||||
{
|
||||
add_search_rules(&mut federated_query.filter, search_rules);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
|
||||
auth.map_err(|(mut err, query_index)| {
|
||||
// Add the query index that failed as context for the error message.
|
||||
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
|
||||
// of result and we can benefit from static typing.
|
||||
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
|
||||
err
|
||||
})?;
|
||||
|
||||
let response = match federation {
|
||||
Some(federation) => {
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_federated_search(&index_scheduler, queries, federation, features)
|
||||
})
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
if let Ok(Ok(_)) = search_result {
|
||||
multi_aggregate.succeed();
|
||||
}
|
||||
|
||||
analytics.post_multi_search(multi_aggregate);
|
||||
HttpResponse::Ok().json(search_result??)
|
||||
}
|
||||
None => {
|
||||
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
|
||||
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
|
||||
// changes.
|
||||
let search_results: Result<_, (ResponseError, usize)> = async {
|
||||
let mut search_results = Vec::with_capacity(queries.len());
|
||||
for (query_index, (index_uid, query, federation_options)) in queries
|
||||
.into_iter()
|
||||
.map(SearchQueryWithIndex::into_index_query_federation)
|
||||
.enumerate()
|
||||
{
|
||||
debug!(on_index = query_index, parameters = ?query, "Multi-search");
|
||||
|
||||
if federation_options.is_some() {
|
||||
return Err((
|
||||
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(
|
||||
query_index,
|
||||
)
|
||||
.into(),
|
||||
query_index,
|
||||
));
|
||||
}
|
||||
|
||||
let index = index_scheduler
|
||||
.index(&index_uid)
|
||||
.map_err(|err| {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
err
|
||||
})
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), &index, features)
|
||||
.with_index(query_index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
|
||||
.with_index(query_index)?;
|
||||
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(&index, query, search_kind, retrieve_vector, features)
|
||||
})
|
||||
.await
|
||||
.with_index(query_index)?;
|
||||
|
||||
search_results.push(SearchResultWithIndex {
|
||||
index_uid: index_uid.into_inner(),
|
||||
result: search_result.with_index(query_index)?,
|
||||
});
|
||||
}
|
||||
Ok(search_results)
|
||||
}
|
||||
.await;
|
||||
permit.drop().await;
|
||||
|
||||
if search_results.is_ok() {
|
||||
multi_aggregate.succeed();
|
||||
}
|
||||
analytics.post_multi_search(multi_aggregate);
|
||||
|
||||
let search_results = search_results.map_err(|(mut err, query_index)| {
|
||||
// Add the query index that failed as context for the error message.
|
||||
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
|
||||
// of result and we can benefit from static typing.
|
||||
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
|
||||
err
|
||||
})?;
|
||||
|
||||
debug!(returns = ?search_results, "Multi-search");
|
||||
|
||||
HttpResponse::Ok().json(SearchResults { results: search_results })
|
||||
}
|
||||
};
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Local `Result` extension trait to avoid `map_err` boilerplate.
|
||||
trait WithIndex {
|
||||
type T;
|
||||
/// convert the error type inside of the `Result` to a `ResponseError`, and return a couple of it + the usize.
|
||||
fn with_index(self, index: usize) -> Result<Self::T, (ResponseError, usize)>;
|
||||
}
|
||||
|
||||
impl<T, E: Into<ResponseError>> WithIndex for Result<T, E> {
|
||||
type T = T;
|
||||
fn with_index(self, index: usize) -> Result<T, (ResponseError, usize)> {
|
||||
self.map_err(|err| (err.into(), index))
|
||||
}
|
||||
}
|
38
crates/meilisearch/src/routes/snapshot.rs
Normal file
38
crates/meilisearch/src/routes/snapshot.rs
Normal file
|
@ -0,0 +1,38 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::Opt;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
|
||||
}
|
||||
|
||||
pub async fn create_snapshot(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
|
||||
|
||||
let task = KindWithContent::SnapshotCreation;
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!(returns = ?task, "Create snapshot");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
72
crates/meilisearch/src/routes/swap_indexes.rs
Normal file
72
crates/meilisearch/src/routes/swap_indexes.rs
Normal file
|
@ -0,0 +1,72 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::InvalidSwapIndexes;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use serde_json::json;
|
||||
|
||||
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::Opt;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug, Clone, PartialEq, Eq)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SwapIndexesPayload {
|
||||
#[deserr(error = DeserrJsonError<InvalidSwapIndexes>, missing_field_error = DeserrJsonError::missing_swap_indexes)]
|
||||
indexes: Vec<IndexUid>,
|
||||
}
|
||||
|
||||
pub async fn swap_indexes(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
|
||||
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = params.into_inner();
|
||||
analytics.publish(
|
||||
"Indexes Swapped".to_string(),
|
||||
json!({
|
||||
"swap_operation_number": params.len(),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
let filters = index_scheduler.filters();
|
||||
|
||||
let mut swaps = vec![];
|
||||
for SwapIndexesPayload { indexes } in params.into_iter() {
|
||||
// TODO: switch to deserr
|
||||
let (lhs, rhs) = match indexes.as_slice() {
|
||||
[lhs, rhs] => (lhs, rhs),
|
||||
_ => {
|
||||
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
|
||||
}
|
||||
};
|
||||
if !filters.is_index_authorized(lhs) || !filters.is_index_authorized(rhs) {
|
||||
return Err(AuthenticationError::InvalidToken.into());
|
||||
}
|
||||
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()) });
|
||||
}
|
||||
|
||||
let task = KindWithContent::IndexSwap { swaps };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
715
crates/meilisearch/src/routes/tasks.rs
Normal file
715
crates/meilisearch/src/routes/tasks.rs
Normal file
|
@ -0,0 +1,715 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebQueryParameter;
|
||||
use deserr::Deserr;
|
||||
use index_scheduler::{IndexScheduler, Query, TaskId};
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::macros::format_description;
|
||||
use time::{Date, Duration, OffsetDateTime, Time};
|
||||
use tokio::task;
|
||||
|
||||
use super::{get_task_id, is_dry_run, SummarizedTaskView};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::Opt;
|
||||
|
||||
const DEFAULT_LIMIT: u32 = 20;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_tasks)))
|
||||
.route(web::delete().to(SeqHandler(delete_tasks))),
|
||||
)
|
||||
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
|
||||
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
|
||||
}
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct TasksFilterQuery {
|
||||
#[deserr(default = Param(DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidTaskLimit>)]
|
||||
pub limit: Param<u32>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskFrom>)]
|
||||
pub from: Option<Param<TaskId>>,
|
||||
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
|
||||
pub uids: OptionStarOrList<u32>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
|
||||
pub canceled_by: OptionStarOrList<u32>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
|
||||
pub types: OptionStarOrList<Kind>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
|
||||
pub statuses: OptionStarOrList<Status>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
|
||||
pub index_uids: OptionStarOrList<IndexUid>,
|
||||
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
|
||||
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
|
||||
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
|
||||
pub after_started_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
|
||||
pub before_started_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
|
||||
pub after_finished_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
|
||||
pub before_finished_at: OptionStarOr<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TasksFilterQuery {
|
||||
fn into_query(self) -> Query {
|
||||
Query {
|
||||
limit: Some(self.limit.0),
|
||||
from: self.from.as_deref().copied(),
|
||||
statuses: self.statuses.merge_star_and_none(),
|
||||
types: self.types.merge_star_and_none(),
|
||||
index_uids: self.index_uids.map(|x| x.to_string()).merge_star_and_none(),
|
||||
uids: self.uids.merge_star_and_none(),
|
||||
canceled_by: self.canceled_by.merge_star_and_none(),
|
||||
before_enqueued_at: self.before_enqueued_at.merge_star_and_none(),
|
||||
after_enqueued_at: self.after_enqueued_at.merge_star_and_none(),
|
||||
before_started_at: self.before_started_at.merge_star_and_none(),
|
||||
after_started_at: self.after_started_at.merge_star_and_none(),
|
||||
before_finished_at: self.before_finished_at.merge_star_and_none(),
|
||||
after_finished_at: self.after_finished_at.merge_star_and_none(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TaskDeletionOrCancelationQuery {
|
||||
fn is_empty(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
TaskDeletionOrCancelationQuery {
|
||||
uids: OptionStarOrList::None,
|
||||
canceled_by: OptionStarOrList::None,
|
||||
types: OptionStarOrList::None,
|
||||
statuses: OptionStarOrList::None,
|
||||
index_uids: OptionStarOrList::None,
|
||||
after_enqueued_at: OptionStarOr::None,
|
||||
before_enqueued_at: OptionStarOr::None,
|
||||
after_started_at: OptionStarOr::None,
|
||||
before_started_at: OptionStarOr::None,
|
||||
after_finished_at: OptionStarOr::None,
|
||||
before_finished_at: OptionStarOr::None
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct TaskDeletionOrCancelationQuery {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
|
||||
pub uids: OptionStarOrList<u32>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
|
||||
pub canceled_by: OptionStarOrList<u32>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
|
||||
pub types: OptionStarOrList<Kind>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
|
||||
pub statuses: OptionStarOrList<Status>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
|
||||
pub index_uids: OptionStarOrList<IndexUid>,
|
||||
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
|
||||
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
|
||||
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
|
||||
pub after_started_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
|
||||
pub before_started_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
|
||||
pub after_finished_at: OptionStarOr<OffsetDateTime>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
|
||||
pub before_finished_at: OptionStarOr<OffsetDateTime>,
|
||||
}
|
||||
|
||||
impl TaskDeletionOrCancelationQuery {
|
||||
fn into_query(self) -> Query {
|
||||
Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
statuses: self.statuses.merge_star_and_none(),
|
||||
types: self.types.merge_star_and_none(),
|
||||
index_uids: self.index_uids.map(|x| x.to_string()).merge_star_and_none(),
|
||||
uids: self.uids.merge_star_and_none(),
|
||||
canceled_by: self.canceled_by.merge_star_and_none(),
|
||||
before_enqueued_at: self.before_enqueued_at.merge_star_and_none(),
|
||||
after_enqueued_at: self.after_enqueued_at.merge_star_and_none(),
|
||||
before_started_at: self.before_started_at.merge_star_and_none(),
|
||||
after_started_at: self.after_started_at.merge_star_and_none(),
|
||||
before_finished_at: self.before_finished_at.merge_star_and_none(),
|
||||
after_finished_at: self.after_finished_at.merge_star_and_none(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn cancel_tasks(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
|
||||
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = params.into_inner();
|
||||
|
||||
if params.is_empty() {
|
||||
return Err(index_scheduler::Error::TaskCancelationWithEmptyQuery.into());
|
||||
}
|
||||
|
||||
analytics.publish(
|
||||
"Tasks Canceled".to_string(),
|
||||
json!({
|
||||
"filtered_by_uid": params.uids.is_some(),
|
||||
"filtered_by_index_uid": params.index_uids.is_some(),
|
||||
"filtered_by_type": params.types.is_some(),
|
||||
"filtered_by_status": params.statuses.is_some(),
|
||||
"filtered_by_canceled_by": params.canceled_by.is_some(),
|
||||
"filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
|
||||
"filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
|
||||
"filtered_by_before_started_at": params.before_started_at.is_some(),
|
||||
"filtered_by_after_started_at": params.after_started_at.is_some(),
|
||||
"filtered_by_before_finished_at": params.before_finished_at.is_some(),
|
||||
"filtered_by_after_finished_at": params.after_finished_at.is_some(),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let query = params.into_query();
|
||||
|
||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||
&index_scheduler.read_txn()?,
|
||||
&query,
|
||||
index_scheduler.filters(),
|
||||
)?;
|
||||
let task_cancelation =
|
||||
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task =
|
||||
task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run))
|
||||
.await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
}
|
||||
|
||||
async fn delete_tasks(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
|
||||
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
opt: web::Data<Opt>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = params.into_inner();
|
||||
|
||||
if params.is_empty() {
|
||||
return Err(index_scheduler::Error::TaskDeletionWithEmptyQuery.into());
|
||||
}
|
||||
|
||||
analytics.publish(
|
||||
"Tasks Deleted".to_string(),
|
||||
json!({
|
||||
"filtered_by_uid": params.uids.is_some(),
|
||||
"filtered_by_index_uid": params.index_uids.is_some(),
|
||||
"filtered_by_type": params.types.is_some(),
|
||||
"filtered_by_status": params.statuses.is_some(),
|
||||
"filtered_by_canceled_by": params.canceled_by.is_some(),
|
||||
"filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(),
|
||||
"filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(),
|
||||
"filtered_by_before_started_at": params.before_started_at.is_some(),
|
||||
"filtered_by_after_started_at": params.after_started_at.is_some(),
|
||||
"filtered_by_before_finished_at": params.before_finished_at.is_some(),
|
||||
"filtered_by_after_finished_at": params.after_finished_at.is_some(),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
let query = params.into_query();
|
||||
|
||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||
&index_scheduler.read_txn()?,
|
||||
&query,
|
||||
index_scheduler.filters(),
|
||||
)?;
|
||||
let task_deletion =
|
||||
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run))
|
||||
.await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct AllTasks {
|
||||
results: Vec<TaskView>,
|
||||
total: u64,
|
||||
limit: u32,
|
||||
from: Option<u32>,
|
||||
next: Option<u32>,
|
||||
}
|
||||
|
||||
async fn get_tasks(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
|
||||
params: AwebQueryParameter<TasksFilterQuery, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let mut params = params.into_inner();
|
||||
// We +1 just to know if there is more after this "page" or not.
|
||||
params.limit.0 = params.limit.0.saturating_add(1);
|
||||
let limit = params.limit.0;
|
||||
let query = params.into_query();
|
||||
|
||||
let filters = index_scheduler.filters();
|
||||
let (tasks, total) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?;
|
||||
let mut results: Vec<_> = tasks.iter().map(TaskView::from_task).collect();
|
||||
|
||||
// If we were able to fetch the number +1 tasks we asked
|
||||
// it means that there is more to come.
|
||||
let next = if results.len() == limit as usize { results.pop().map(|t| t.uid) } else { None };
|
||||
|
||||
let from = results.first().map(|t| t.uid);
|
||||
let tasks = AllTasks { results, limit: limit.saturating_sub(1), total, from, next };
|
||||
|
||||
Ok(HttpResponse::Ok().json(tasks))
|
||||
}
|
||||
|
||||
async fn get_task(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
|
||||
task_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let task_uid_string = task_uid.into_inner();
|
||||
|
||||
let task_uid: TaskId = match task_uid_string.parse() {
|
||||
Ok(id) => id,
|
||||
Err(_e) => {
|
||||
return Err(index_scheduler::Error::InvalidTaskUids { task_uid: task_uid_string }.into())
|
||||
}
|
||||
};
|
||||
|
||||
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
|
||||
let filters = index_scheduler.filters();
|
||||
let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?;
|
||||
|
||||
if let Some(task) = tasks.first() {
|
||||
let task_view = TaskView::from_task(task);
|
||||
Ok(HttpResponse::Ok().json(task_view))
|
||||
} else {
|
||||
Err(index_scheduler::Error::TaskNotFound(task_uid).into())
|
||||
}
|
||||
}
|
||||
|
||||
pub enum DeserializeDateOption {
|
||||
Before,
|
||||
After,
|
||||
}
|
||||
|
||||
pub fn deserialize_date(
|
||||
value: &str,
|
||||
option: DeserializeDateOption,
|
||||
) -> std::result::Result<OffsetDateTime, InvalidTaskDateError> {
|
||||
// We can't parse using time's rfc3339 format, since then we won't know what part of the
|
||||
// datetime was not explicitly specified, and thus we won't be able to increment it to the
|
||||
// next step.
|
||||
if let Ok(datetime) = OffsetDateTime::parse(value, &Rfc3339) {
|
||||
// fully specified up to the second
|
||||
// we assume that the subseconds are 0 if not specified, and we don't increment to the next second
|
||||
Ok(datetime)
|
||||
} else if let Ok(datetime) = Date::parse(
|
||||
value,
|
||||
format_description!("[year repr:full base:calendar]-[month repr:numerical]-[day]"),
|
||||
) {
|
||||
let datetime = datetime.with_time(Time::MIDNIGHT).assume_utc();
|
||||
// add one day since the time was not specified
|
||||
match option {
|
||||
DeserializeDateOption::Before => Ok(datetime),
|
||||
DeserializeDateOption::After => {
|
||||
let datetime = datetime.checked_add(Duration::days(1)).unwrap_or(datetime);
|
||||
Ok(datetime)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err(InvalidTaskDateError(value.to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_date_after(
|
||||
value: OptionStarOr<String>,
|
||||
) -> std::result::Result<OptionStarOr<OffsetDateTime>, InvalidTaskDateError> {
|
||||
value.try_map(|x| deserialize_date(&x, DeserializeDateOption::After))
|
||||
}
|
||||
pub fn deserialize_date_before(
|
||||
value: OptionStarOr<String>,
|
||||
) -> std::result::Result<OptionStarOr<OffsetDateTime>, InvalidTaskDateError> {
|
||||
value.try_map(|x| deserialize_date(&x, DeserializeDateOption::Before))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use deserr::Deserr;
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
|
||||
use crate::routes::tasks::{TaskDeletionOrCancelationQuery, TasksFilterQuery};
|
||||
|
||||
fn deserr_query_params<T>(j: &str) -> Result<T, ResponseError>
|
||||
where
|
||||
T: Deserr<DeserrQueryParamError>,
|
||||
{
|
||||
let value = serde_urlencoded::from_str::<serde_json::Value>(j)
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::BadRequest))?;
|
||||
|
||||
match deserr::deserialize::<_, _, DeserrQueryParamError>(value) {
|
||||
Ok(data) => Ok(data),
|
||||
Err(e) => Err(ResponseError::from(e)),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_task_filter_dates() {
|
||||
{
|
||||
let params = "afterEnqueuedAt=2021-12-03&beforeEnqueuedAt=2021-12-03&afterStartedAt=2021-12-03&beforeStartedAt=2021-12-03&afterFinishedAt=2021-12-03&beforeFinishedAt=2021-12-03";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
|
||||
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(2021-12-04 0:00:00.0 +00:00:00)");
|
||||
snapshot!(format!("{:?}", query.before_enqueued_at), @"Other(2021-12-03 0:00:00.0 +00:00:00)");
|
||||
snapshot!(format!("{:?}", query.after_started_at), @"Other(2021-12-04 0:00:00.0 +00:00:00)");
|
||||
snapshot!(format!("{:?}", query.before_started_at), @"Other(2021-12-03 0:00:00.0 +00:00:00)");
|
||||
snapshot!(format!("{:?}", query.after_finished_at), @"Other(2021-12-04 0:00:00.0 +00:00:00)");
|
||||
snapshot!(format!("{:?}", query.before_finished_at), @"Other(2021-12-03 0:00:00.0 +00:00:00)");
|
||||
}
|
||||
{
|
||||
let params =
|
||||
"afterEnqueuedAt=2021-12-03T23:45:23Z&beforeEnqueuedAt=2021-12-03T23:45:23Z";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(2021-12-03 23:45:23.0 +00:00:00)");
|
||||
snapshot!(format!("{:?}", query.before_enqueued_at), @"Other(2021-12-03 23:45:23.0 +00:00:00)");
|
||||
}
|
||||
{
|
||||
let params = "afterEnqueuedAt=1997-11-12T09:55:06-06:20";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(1997-11-12 9:55:06.0 -06:20:00)");
|
||||
}
|
||||
{
|
||||
let params = "afterEnqueuedAt=1997-11-12T09:55:06%2B00:00";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(1997-11-12 9:55:06.0 +00:00:00)");
|
||||
}
|
||||
{
|
||||
let params = "afterEnqueuedAt=1997-11-12T09:55:06.200000300Z";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(1997-11-12 9:55:06.2000003 +00:00:00)");
|
||||
}
|
||||
{
|
||||
// Stars are allowed in date fields as well
|
||||
let params = "afterEnqueuedAt=*&beforeStartedAt=*&afterFinishedAt=*&beforeFinishedAt=*&afterStartedAt=*&beforeEnqueuedAt=*";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query), @"TaskDeletionOrCancelationQuery { uids: None, canceled_by: None, types: None, statuses: None, index_uids: None, after_enqueued_at: Star, before_enqueued_at: Star, after_started_at: Star, before_started_at: Star, after_finished_at: Star, before_finished_at: Star }");
|
||||
}
|
||||
{
|
||||
let params = "afterFinishedAt=2021";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `afterFinishedAt`: `2021` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
|
||||
"code": "invalid_task_after_finished_at",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_after_finished_at"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
let params = "beforeFinishedAt=2021";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `beforeFinishedAt`: `2021` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
|
||||
"code": "invalid_task_before_finished_at",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_before_finished_at"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
let params = "afterEnqueuedAt=2021-12";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `afterEnqueuedAt`: `2021-12` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
|
||||
"code": "invalid_task_after_enqueued_at",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_after_enqueued_at"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
{
|
||||
let params = "beforeEnqueuedAt=2021-12-03T23";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `beforeEnqueuedAt`: `2021-12-03T23` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
|
||||
"code": "invalid_task_before_enqueued_at",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_before_enqueued_at"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
let params = "afterStartedAt=2021-12-03T23:45";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `afterStartedAt`: `2021-12-03T23:45` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
|
||||
"code": "invalid_task_after_started_at",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_after_started_at"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
let params = "beforeStartedAt=2021-12-03T23:45";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `beforeStartedAt`: `2021-12-03T23:45` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
|
||||
"code": "invalid_task_before_started_at",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_before_started_at"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_task_filter_uids() {
|
||||
{
|
||||
let params = "uids=78,1,12,73";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.uids), @"List([78, 1, 12, 73])");
|
||||
}
|
||||
{
|
||||
let params = "uids=1";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.uids), @"List([1])");
|
||||
}
|
||||
{
|
||||
let params = "uids=cat,*,dog";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `uids[0]`: could not parse `cat` as a positive integer",
|
||||
"code": "invalid_task_uids",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
let params = "uids=78,hello,world";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `uids[1]`: could not parse `hello` as a positive integer",
|
||||
"code": "invalid_task_uids",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
let params = "uids=cat";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `uids`: could not parse `cat` as a positive integer",
|
||||
"code": "invalid_task_uids",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_task_filter_status() {
|
||||
{
|
||||
let params = "statuses=succeeded,failed,enqueued,processing,canceled";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.statuses), @"List([Succeeded, Failed, Enqueued, Processing, Canceled])");
|
||||
}
|
||||
{
|
||||
let params = "statuses=enqueued";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.statuses), @"List([Enqueued])");
|
||||
}
|
||||
{
|
||||
let params = "statuses=finished";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `statuses`: `finished` is not a valid task status. Available statuses are `enqueued`, `processing`, `succeeded`, `failed`, `canceled`.",
|
||||
"code": "invalid_task_statuses",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_statuses"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn deserialize_task_filter_types() {
|
||||
{
|
||||
let params = "types=documentAdditionOrUpdate,documentDeletion,settingsUpdate,indexCreation,indexDeletion,indexUpdate,indexSwap,taskCancelation,taskDeletion,dumpCreation,snapshotCreation";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.types), @"List([DocumentAdditionOrUpdate, DocumentDeletion, SettingsUpdate, IndexCreation, IndexDeletion, IndexUpdate, IndexSwap, TaskCancelation, TaskDeletion, DumpCreation, SnapshotCreation])");
|
||||
}
|
||||
{
|
||||
let params = "types=settingsUpdate";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.types), @"List([SettingsUpdate])");
|
||||
}
|
||||
{
|
||||
let params = "types=createIndex";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn deserialize_task_filter_index_uids() {
|
||||
{
|
||||
let params = "indexUids=toto,tata-78";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.index_uids), @r###"List([IndexUid("toto"), IndexUid("tata-78")])"###);
|
||||
}
|
||||
{
|
||||
let params = "indexUids=index_a";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query.index_uids), @r###"List([IndexUid("index_a")])"###);
|
||||
}
|
||||
{
|
||||
let params = "indexUids=1,hé";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
let params = "indexUids=hé";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_task_filter_general() {
|
||||
{
|
||||
let params = "from=12&limit=15&indexUids=toto,tata-78&statuses=succeeded,enqueued&afterEnqueuedAt=2012-04-23&uids=1,2,3";
|
||||
let query = deserr_query_params::<TasksFilterQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query), @r###"TasksFilterQuery { limit: Param(15), from: Some(Param(12)), uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: List([Succeeded, Enqueued]), index_uids: List([IndexUid("toto"), IndexUid("tata-78")]), after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }"###);
|
||||
}
|
||||
{
|
||||
// Stars should translate to `None` in the query
|
||||
// Verify value of the default limit
|
||||
let params = "indexUids=*&statuses=succeeded,*&afterEnqueuedAt=2012-04-23&uids=1,2,3";
|
||||
let query = deserr_query_params::<TasksFilterQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query), @"TasksFilterQuery { limit: Param(20), from: None, uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: Star, index_uids: Star, after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
|
||||
}
|
||||
{
|
||||
// Stars should also translate to `None` in task deletion/cancelation queries
|
||||
let params = "indexUids=*&statuses=succeeded,*&afterEnqueuedAt=2012-04-23&uids=1,2,3";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
snapshot!(format!("{:?}", query), @"TaskDeletionOrCancelationQuery { uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: Star, index_uids: Star, after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
|
||||
}
|
||||
{
|
||||
// Star in from not allowed
|
||||
let params = "uids=*&from=*";
|
||||
let err = deserr_query_params::<TasksFilterQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `from`: could not parse `*` as a positive integer",
|
||||
"code": "invalid_task_from",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_from"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
// From not allowed in task deletion/cancelation queries
|
||||
let params = "from=12";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Unknown parameter `from`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
{
|
||||
// Limit not allowed in task deletion/cancelation queries
|
||||
let params = "limit=12";
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Unknown parameter `limit`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_task_delete_or_cancel_empty() {
|
||||
{
|
||||
let params = "";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
assert!(query.is_empty());
|
||||
}
|
||||
{
|
||||
let params = "statuses=*";
|
||||
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
|
||||
assert!(!query.is_empty());
|
||||
snapshot!(format!("{query:?}"), @"TaskDeletionOrCancelationQuery { uids: None, canceled_by: None, types: None, statuses: Star, index_uids: None, after_enqueued_at: None, before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
|
||||
}
|
||||
}
|
||||
}
|
910
crates/meilisearch/src/search/federated.rs
Normal file
910
crates/meilisearch/src/search/federated.rs
Normal file
|
@ -0,0 +1,910 @@
|
|||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::iter::Zip;
|
||||
use std::rc::Rc;
|
||||
use std::str::FromStr as _;
|
||||
use std::time::Duration;
|
||||
use std::vec::{IntoIter, Vec};
|
||||
|
||||
use actix_http::StatusCode;
|
||||
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
|
||||
InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
|
||||
InvalidSearchOffset,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::ranking_rules::{self, RankingRules};
|
||||
use super::{
|
||||
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
|
||||
HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
|
||||
};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::routes::indexes::search::search_kind;
|
||||
|
||||
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FederationOptions {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
|
||||
pub weight: Weight,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
|
||||
pub struct Weight(f64);
|
||||
|
||||
impl Default for Weight {
|
||||
fn default() -> Self {
|
||||
Weight(DEFAULT_FEDERATED_WEIGHT)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::TryFrom<f64> for Weight {
|
||||
type Error = InvalidMultiSearchWeight;
|
||||
|
||||
fn try_from(f: f64) -> Result<Self, Self::Error> {
|
||||
if f < 0.0 {
|
||||
Err(InvalidMultiSearchWeight)
|
||||
} else {
|
||||
Ok(Weight(f))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Weight {
|
||||
type Target = f64;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct Federation {
|
||||
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
pub limit: usize,
|
||||
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
|
||||
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
|
||||
pub merge_facets: Option<MergeFacets>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, deserr::Deserr, Default)]
|
||||
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct MergeFacets {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
|
||||
pub max_values_per_facet: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FederatedSearch {
|
||||
pub queries: Vec<SearchQueryWithIndex>,
|
||||
#[deserr(default)]
|
||||
pub federation: Option<Federation>,
|
||||
}
|
||||
#[derive(Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FederatedSearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub processing_time_ms: u128,
|
||||
#[serde(flatten)]
|
||||
pub hits_info: HitsInfo,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub semantic_hit_count: Option<u32>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
#[serde(skip_serializing_if = "FederatedFacets::is_empty")]
|
||||
pub facets_by_index: FederatedFacets,
|
||||
|
||||
// These fields are only used for analytics purposes
|
||||
#[serde(skip)]
|
||||
pub degraded: bool,
|
||||
#[serde(skip)]
|
||||
pub used_negative_operator: bool,
|
||||
}
|
||||
|
||||
impl fmt::Debug for FederatedSearchResult {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let FederatedSearchResult {
|
||||
hits,
|
||||
processing_time_ms,
|
||||
hits_info,
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchResult");
|
||||
// The most important thing when looking at a search result is the time it took to process
|
||||
debug.field("processing_time_ms", &processing_time_ms);
|
||||
debug.field("hits", &format!("[{} hits returned]", hits.len()));
|
||||
debug.field("hits_info", &hits_info);
|
||||
if *used_negative_operator {
|
||||
debug.field("used_negative_operator", used_negative_operator);
|
||||
}
|
||||
if *degraded {
|
||||
debug.field("degraded", degraded);
|
||||
}
|
||||
if let Some(facet_distribution) = facet_distribution {
|
||||
debug.field("facet_distribution", &facet_distribution);
|
||||
}
|
||||
if let Some(facet_stats) = facet_stats {
|
||||
debug.field("facet_stats", &facet_stats);
|
||||
}
|
||||
if let Some(semantic_hit_count) = semantic_hit_count {
|
||||
debug.field("semantic_hit_count", &semantic_hit_count);
|
||||
}
|
||||
if !facets_by_index.is_empty() {
|
||||
debug.field("facets_by_index", &facets_by_index);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
struct WeightedScore<'a> {
|
||||
details: &'a [ScoreDetails],
|
||||
weight: f64,
|
||||
}
|
||||
|
||||
impl<'a> WeightedScore<'a> {
|
||||
pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
|
||||
Self { details, weight }
|
||||
}
|
||||
|
||||
pub fn weighted_global_score(&self) -> f64 {
|
||||
ScoreDetails::global_score(self.details.iter()) * self.weight
|
||||
}
|
||||
|
||||
pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
|
||||
self.weighted_global_score()
|
||||
.partial_cmp(&other.weighted_global_score())
|
||||
// both are numbers, possibly infinite
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn compare(&self, other: &Self) -> Ordering {
|
||||
let mut left_it = ScoreDetails::score_values(self.details.iter());
|
||||
let mut right_it = ScoreDetails::score_values(other.details.iter());
|
||||
|
||||
loop {
|
||||
let left = left_it.next();
|
||||
let right = right_it.next();
|
||||
|
||||
match (left, right) {
|
||||
(None, None) => return Ordering::Equal,
|
||||
(None, Some(_)) => return Ordering::Less,
|
||||
(Some(_), None) => return Ordering::Greater,
|
||||
(Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
|
||||
let left = left * self.weight;
|
||||
let right = right * other.weight;
|
||||
if (left - right).abs() <= f64::EPSILON {
|
||||
continue;
|
||||
}
|
||||
return left.partial_cmp(&right).unwrap();
|
||||
}
|
||||
(Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
|
||||
match left.partial_cmp(right) {
|
||||
Some(Ordering::Equal) => continue,
|
||||
Some(order) => return order,
|
||||
None => return self.compare_weighted_global_scores(other),
|
||||
}
|
||||
}
|
||||
(Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
|
||||
match left.partial_cmp(right) {
|
||||
Some(Ordering::Equal) => continue,
|
||||
Some(order) => return order,
|
||||
None => {
|
||||
return self.compare_weighted_global_scores(other);
|
||||
}
|
||||
}
|
||||
}
|
||||
// not comparable details, use global
|
||||
(Some(ScoreValue::Score(_)), Some(_))
|
||||
| (Some(_), Some(ScoreValue::Score(_)))
|
||||
| (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
|
||||
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
|
||||
let left_count = left_it.count();
|
||||
let right_count = right_it.count();
|
||||
// compare how many remaining groups of rules each side has.
|
||||
// the group with the most remaining groups wins.
|
||||
return left_count
|
||||
.cmp(&right_count)
|
||||
// breaks ties with the global ranking score
|
||||
.then_with(|| self.compare_weighted_global_scores(other));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct QueryByIndex {
|
||||
query: SearchQuery,
|
||||
federation_options: FederationOptions,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
struct SearchResultByQuery<'a> {
|
||||
documents_ids: Vec<DocumentId>,
|
||||
document_scores: Vec<Vec<ScoreDetails>>,
|
||||
federation_options: FederationOptions,
|
||||
hit_maker: HitMaker<'a>,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
struct SearchResultByQueryIter<'a> {
|
||||
it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
|
||||
federation_options: FederationOptions,
|
||||
hit_maker: Rc<HitMaker<'a>>,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
impl<'a> SearchResultByQueryIter<'a> {
|
||||
fn new(
|
||||
SearchResultByQuery {
|
||||
documents_ids,
|
||||
document_scores,
|
||||
federation_options,
|
||||
hit_maker,
|
||||
query_index,
|
||||
}: SearchResultByQuery<'a>,
|
||||
) -> Self {
|
||||
let it = documents_ids.into_iter().zip(document_scores);
|
||||
Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
|
||||
}
|
||||
}
|
||||
|
||||
struct SearchResultByQueryIterItem<'a> {
|
||||
docid: DocumentId,
|
||||
score: Vec<ScoreDetails>,
|
||||
federation_options: FederationOptions,
|
||||
hit_maker: Rc<HitMaker<'a>>,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
fn merge_index_local_results(
|
||||
results_by_query: Vec<SearchResultByQuery<'_>>,
|
||||
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
|
||||
itertools::kmerge_by(
|
||||
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|
||||
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
|
||||
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||
|
||||
match left_score.compare(&right_score) {
|
||||
// the biggest score goes first
|
||||
Ordering::Greater => true,
|
||||
// break ties using query index
|
||||
Ordering::Equal => left.query_index < right.query_index,
|
||||
Ordering::Less => false,
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn merge_index_global_results(
|
||||
results_by_index: Vec<SearchResultByIndex>,
|
||||
) -> impl Iterator<Item = SearchHitByIndex> {
|
||||
itertools::kmerge_by(
|
||||
results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
|
||||
|left: &SearchHitByIndex, right: &SearchHitByIndex| {
|
||||
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
|
||||
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
|
||||
|
||||
match left_score.compare(&right_score) {
|
||||
// the biggest score goes first
|
||||
Ordering::Greater => true,
|
||||
// break ties using query index
|
||||
Ordering::Equal => left.query_index < right.query_index,
|
||||
Ordering::Less => false,
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
impl<'a> Iterator for SearchResultByQueryIter<'a> {
|
||||
type Item = SearchResultByQueryIterItem<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let (docid, score) = self.it.next()?;
|
||||
Some(SearchResultByQueryIterItem {
|
||||
docid,
|
||||
score,
|
||||
federation_options: self.federation_options,
|
||||
hit_maker: Rc::clone(&self.hit_maker),
|
||||
query_index: self.query_index,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct SearchHitByIndex {
|
||||
hit: SearchHit,
|
||||
score: Vec<ScoreDetails>,
|
||||
federation_options: FederationOptions,
|
||||
query_index: usize,
|
||||
}
|
||||
|
||||
struct SearchResultByIndex {
|
||||
index: String,
|
||||
hits: Vec<SearchHitByIndex>,
|
||||
estimated_total_hits: usize,
|
||||
degraded: bool,
|
||||
used_negative_operator: bool,
|
||||
facets: Option<ComputedFacets>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize)]
|
||||
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
|
||||
|
||||
impl FederatedFacets {
|
||||
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
|
||||
if let Some(facets) = facets {
|
||||
self.0.insert(index, facets);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn merge(
|
||||
self,
|
||||
MergeFacets { max_values_per_facet }: MergeFacets,
|
||||
facet_order: BTreeMap<String, (String, OrderBy)>,
|
||||
) -> Option<ComputedFacets> {
|
||||
if self.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut distribution: BTreeMap<String, _> = Default::default();
|
||||
let mut stats: BTreeMap<String, FacetStats> = Default::default();
|
||||
|
||||
for facets_by_index in self.0.into_values() {
|
||||
for (facet, index_distribution) in facets_by_index.distribution {
|
||||
match distribution.entry(facet) {
|
||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||
entry.insert(index_distribution);
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(mut entry) => {
|
||||
let distribution = entry.get_mut();
|
||||
|
||||
for (value, index_count) in index_distribution {
|
||||
distribution
|
||||
.entry(value)
|
||||
.and_modify(|count| *count += index_count)
|
||||
.or_insert(index_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (facet, index_stats) in facets_by_index.stats {
|
||||
match stats.entry(facet) {
|
||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||
entry.insert(index_stats);
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(mut entry) => {
|
||||
let stats = entry.get_mut();
|
||||
|
||||
stats.min = f64::min(stats.min, index_stats.min);
|
||||
stats.max = f64::max(stats.max, index_stats.max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fixup order
|
||||
for (facet, values) in &mut distribution {
|
||||
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
|
||||
|
||||
match order_by {
|
||||
OrderBy::Lexicographic => {
|
||||
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
|
||||
}
|
||||
OrderBy::Count => {
|
||||
values.sort_unstable_by(|_, left, _, right| {
|
||||
left.cmp(right)
|
||||
// biggest first
|
||||
.reverse()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(max_values_per_facet) = max_values_per_facet {
|
||||
values.truncate(max_values_per_facet)
|
||||
};
|
||||
}
|
||||
|
||||
Some(ComputedFacets { distribution, stats })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_federated_search(
|
||||
index_scheduler: &IndexScheduler,
|
||||
queries: Vec<SearchQueryWithIndex>,
|
||||
mut federation: Federation,
|
||||
features: RoFeatures,
|
||||
) -> Result<FederatedSearchResult, ResponseError> {
|
||||
let before_search = std::time::Instant::now();
|
||||
|
||||
// this implementation partition the queries by index to guarantee an important property:
|
||||
// - all the queries to a particular index use the same read transaction.
|
||||
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
|
||||
|
||||
// 1. partition queries by index
|
||||
let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
|
||||
for (query_index, federated_query) in queries.into_iter().enumerate() {
|
||||
if let Some(pagination_field) = federated_query.has_pagination() {
|
||||
return Err(MeilisearchHttpError::PaginationInFederatedQuery(
|
||||
query_index,
|
||||
pagination_field,
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
if let Some(facets) = federated_query.has_facets() {
|
||||
let facets = facets.to_owned();
|
||||
return Err(MeilisearchHttpError::FacetsInFederatedQuery(
|
||||
query_index,
|
||||
federated_query.index_uid.into_inner(),
|
||||
facets,
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
|
||||
|
||||
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
|
||||
query,
|
||||
federation_options: federation_options.unwrap_or_default(),
|
||||
query_index,
|
||||
})
|
||||
}
|
||||
|
||||
// 2. perform queries, merge and make hits index by index
|
||||
let required_hit_count = federation.limit + federation.offset;
|
||||
|
||||
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
|
||||
// Then in step (3), we'll update its value if there is any semantic search
|
||||
let mut semantic_hit_count = None;
|
||||
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
|
||||
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
|
||||
|
||||
// remember the order and name of first index for each facet when merging with index settings
|
||||
// to detect if the order is inconsistent for a facet.
|
||||
let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
|
||||
{
|
||||
Some(MergeFacets { .. }) => Some(Default::default()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
for (index_uid, queries) in queries_by_index {
|
||||
let first_query_index = queries.first().map(|query| query.query_index);
|
||||
|
||||
let index = match index_scheduler.index(&index_uid) {
|
||||
Ok(index) => index,
|
||||
Err(err) => {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
if let Some(query_index) = first_query_index {
|
||||
err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
// Important: this is the only transaction we'll use for this index during this federated search
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let criteria = index.criteria(&rtxn)?;
|
||||
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let separators = index.allowed_separators(&rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
|
||||
// each query gets its individual cutoff
|
||||
let cutoff = index.search_cutoff(&rtxn)?;
|
||||
|
||||
let mut degraded = false;
|
||||
let mut used_negative_operator = false;
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
|
||||
let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
|
||||
|
||||
// TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
|
||||
if let Err(mut error) =
|
||||
check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
|
||||
{
|
||||
error.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
|
||||
if let Some(query_index) = first_query_index {
|
||||
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
// 2.1. Compute all candidates for each query in the index
|
||||
let mut results_by_query = Vec::with_capacity(queries.len());
|
||||
|
||||
for QueryByIndex { query, federation_options, query_index } in queries {
|
||||
// use an immediately invoked lambda to capture the result without returning from the function
|
||||
|
||||
let res: Result<(), ResponseError> = (|| {
|
||||
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
|
||||
|
||||
let canonicalization_kind = match (&search_kind, &query.q) {
|
||||
(SearchKind::SemanticOnly { .. }, _) => {
|
||||
ranking_rules::CanonicalizationKind::Vector
|
||||
}
|
||||
(_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
|
||||
_ => ranking_rules::CanonicalizationKind::Placeholder,
|
||||
};
|
||||
|
||||
let sort = if let Some(sort) = &query.sort {
|
||||
let sorts: Vec<_> =
|
||||
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
|
||||
Ok(sorts) => sorts,
|
||||
Err(asc_desc_error) => {
|
||||
return Err(milli::Error::from(milli::SortError::from(
|
||||
asc_desc_error,
|
||||
))
|
||||
.into())
|
||||
}
|
||||
};
|
||||
Some(sorts)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let ranking_rules = ranking_rules::RankingRules::new(
|
||||
criteria.clone(),
|
||||
sort,
|
||||
query.matching_strategy.into(),
|
||||
canonicalization_kind,
|
||||
);
|
||||
|
||||
if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
|
||||
previous_query_data.take()
|
||||
{
|
||||
if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
|
||||
return Err(error.to_response_error(
|
||||
&ranking_rules,
|
||||
&previous_ranking_rules,
|
||||
query_index,
|
||||
previous_query_index,
|
||||
&index_uid,
|
||||
&previous_index_uid,
|
||||
));
|
||||
}
|
||||
previous_query_data = if previous_ranking_rules.constraint_count()
|
||||
> ranking_rules.constraint_count()
|
||||
{
|
||||
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
|
||||
} else {
|
||||
Some((ranking_rules, query_index, index_uid.clone()))
|
||||
};
|
||||
} else {
|
||||
previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
|
||||
}
|
||||
|
||||
match search_kind {
|
||||
SearchKind::KeywordOnly => {}
|
||||
_ => semantic_hit_count = Some(0),
|
||||
}
|
||||
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||
|
||||
let time_budget = match cutoff {
|
||||
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
|
||||
None => TimeBudget::default(),
|
||||
};
|
||||
|
||||
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
|
||||
prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?;
|
||||
|
||||
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
|
||||
search.offset(0);
|
||||
search.limit(required_hit_count);
|
||||
|
||||
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
|
||||
let format = AttributesFormat {
|
||||
attributes_to_retrieve: query.attributes_to_retrieve,
|
||||
retrieve_vectors,
|
||||
attributes_to_highlight: query.attributes_to_highlight,
|
||||
attributes_to_crop: query.attributes_to_crop,
|
||||
crop_length: query.crop_length,
|
||||
crop_marker: query.crop_marker,
|
||||
highlight_pre_tag: query.highlight_pre_tag,
|
||||
highlight_post_tag: query.highlight_post_tag,
|
||||
show_matches_position: query.show_matches_position,
|
||||
sort: query.sort,
|
||||
show_ranking_score: query.show_ranking_score,
|
||||
show_ranking_score_details: query.show_ranking_score_details,
|
||||
locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()),
|
||||
};
|
||||
|
||||
let milli::SearchResult {
|
||||
matching_words,
|
||||
candidates: query_candidates,
|
||||
documents_ids,
|
||||
document_scores,
|
||||
degraded: query_degraded,
|
||||
used_negative_operator: query_used_negative_operator,
|
||||
} = result;
|
||||
|
||||
candidates |= query_candidates;
|
||||
degraded |= query_degraded;
|
||||
used_negative_operator |= query_used_negative_operator;
|
||||
|
||||
let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
|
||||
|
||||
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
|
||||
|
||||
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
|
||||
|
||||
results_by_query.push(SearchResultByQuery {
|
||||
federation_options,
|
||||
hit_maker,
|
||||
query_index,
|
||||
documents_ids,
|
||||
document_scores,
|
||||
});
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
if let Err(mut error) = res {
|
||||
error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
// 2.2. merge inside index
|
||||
let mut documents_seen = RoaringBitmap::new();
|
||||
let merged_result: Result<Vec<_>, ResponseError> =
|
||||
merge_index_local_results(results_by_query)
|
||||
// skip documents we've already seen & mark that we saw the current document
|
||||
.filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
|
||||
.take(required_hit_count)
|
||||
// 2.3 make hits
|
||||
.map(
|
||||
|SearchResultByQueryIterItem {
|
||||
docid,
|
||||
score,
|
||||
federation_options,
|
||||
hit_maker,
|
||||
query_index,
|
||||
}| {
|
||||
let mut hit = hit_maker.make_hit(docid, &score)?;
|
||||
let weighted_score =
|
||||
ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
|
||||
|
||||
let _federation = serde_json::json!(
|
||||
{
|
||||
"indexUid": index_uid,
|
||||
"queriesPosition": query_index,
|
||||
"weightedRankingScore": weighted_score,
|
||||
}
|
||||
);
|
||||
hit.document.insert("_federation".to_string(), _federation);
|
||||
Ok(SearchHitByIndex { hit, score, federation_options, query_index })
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
let merged_result = merged_result?;
|
||||
|
||||
let estimated_total_hits = candidates.len() as usize;
|
||||
|
||||
let facets = facets_by_index
|
||||
.map(|facets_by_index| {
|
||||
compute_facet_distribution_stats(
|
||||
&facets_by_index,
|
||||
&index,
|
||||
&rtxn,
|
||||
candidates,
|
||||
super::Route::MultiSearch,
|
||||
)
|
||||
})
|
||||
.transpose()
|
||||
.map_err(|mut error| {
|
||||
error.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
|
||||
error.message,
|
||||
if let Some(query_index) = first_query_index {
|
||||
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
);
|
||||
error
|
||||
})?;
|
||||
|
||||
results_by_index.push(SearchResultByIndex {
|
||||
index: index_uid,
|
||||
hits: merged_result,
|
||||
estimated_total_hits,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
facets,
|
||||
});
|
||||
}
|
||||
|
||||
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
|
||||
for (index_uid, facets) in federation.facets_by_index {
|
||||
let index = match index_scheduler.index(&index_uid) {
|
||||
Ok(index) => index,
|
||||
Err(err) => {
|
||||
let mut err = ResponseError::from(err);
|
||||
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
|
||||
// here the resource not found is not part of the URL.
|
||||
err.code = StatusCode::BAD_REQUEST;
|
||||
err.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
|
||||
err.message
|
||||
);
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
// Important: this is the only transaction we'll use for this index during this federated search
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
if let Err(mut error) =
|
||||
check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
|
||||
{
|
||||
error.message = format!(
|
||||
"Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
|
||||
);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
if let Some(facets) = facets {
|
||||
if let Err(mut error) = compute_facet_distribution_stats(
|
||||
&facets,
|
||||
&index,
|
||||
&rtxn,
|
||||
Default::default(),
|
||||
super::Route::MultiSearch,
|
||||
) {
|
||||
error.message =
|
||||
format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. merge hits and metadata across indexes
|
||||
// 3.1 merge metadata
|
||||
let (estimated_total_hits, degraded, used_negative_operator, facets) = {
|
||||
let mut estimated_total_hits = 0;
|
||||
let mut degraded = false;
|
||||
let mut used_negative_operator = false;
|
||||
|
||||
let mut facets: FederatedFacets = FederatedFacets::default();
|
||||
|
||||
for SearchResultByIndex {
|
||||
index,
|
||||
hits: _,
|
||||
estimated_total_hits: estimated_total_hits_by_index,
|
||||
facets: facets_by_index,
|
||||
degraded: degraded_by_index,
|
||||
used_negative_operator: used_negative_operator_by_index,
|
||||
} in &mut results_by_index
|
||||
{
|
||||
estimated_total_hits += *estimated_total_hits_by_index;
|
||||
degraded |= *degraded_by_index;
|
||||
used_negative_operator |= *used_negative_operator_by_index;
|
||||
|
||||
let facets_by_index = std::mem::take(facets_by_index);
|
||||
let index = std::mem::take(index);
|
||||
|
||||
facets.insert(index, facets_by_index);
|
||||
}
|
||||
|
||||
(estimated_total_hits, degraded, used_negative_operator, facets)
|
||||
};
|
||||
|
||||
// 3.2 merge hits
|
||||
let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
|
||||
.skip(federation.offset)
|
||||
.take(federation.limit)
|
||||
.inspect(|hit| {
|
||||
if let Some(semantic_hit_count) = &mut semantic_hit_count {
|
||||
if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
|
||||
*semantic_hit_count += 1;
|
||||
}
|
||||
}
|
||||
})
|
||||
.map(|hit| hit.hit)
|
||||
.collect();
|
||||
|
||||
let (facet_distribution, facet_stats, facets_by_index) =
|
||||
match federation.merge_facets.zip(facet_order) {
|
||||
Some((merge_facets, facet_order)) => {
|
||||
let facets = facets.merge(merge_facets, facet_order);
|
||||
|
||||
let (facet_distribution, facet_stats) = facets
|
||||
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
|
||||
.unzip();
|
||||
|
||||
(facet_distribution, facet_stats, FederatedFacets::default())
|
||||
}
|
||||
None => (None, None, facets),
|
||||
};
|
||||
|
||||
let search_result = FederatedSearchResult {
|
||||
hits: merged_hits,
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
hits_info: HitsInfo::OffsetLimit {
|
||||
limit: federation.limit,
|
||||
offset: federation.offset,
|
||||
estimated_total_hits,
|
||||
},
|
||||
semantic_hit_count,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
facet_distribution,
|
||||
facet_stats,
|
||||
facets_by_index,
|
||||
};
|
||||
|
||||
Ok(search_result)
|
||||
}
|
||||
|
||||
fn check_facet_order(
|
||||
facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
|
||||
current_index: &str,
|
||||
facets_by_index: &Option<Vec<String>>,
|
||||
index: &milli::Index,
|
||||
rtxn: &milli::heed::RoTxn<'_>,
|
||||
) -> Result<(), ResponseError> {
|
||||
if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
|
||||
let index_facet_order = index.sort_facet_values_by(rtxn)?;
|
||||
for facet in facets_by_index {
|
||||
let index_facet_order = index_facet_order.get(facet);
|
||||
let (previous_index, previous_facet_order) = facet_order
|
||||
.entry(facet.to_owned())
|
||||
.or_insert_with(|| (current_index.to_owned(), index_facet_order));
|
||||
if previous_facet_order != &index_facet_order {
|
||||
return Err(MeilisearchHttpError::InconsistentFacetOrder {
|
||||
facet: facet.clone(),
|
||||
previous_facet_order: *previous_facet_order,
|
||||
previous_uid: previous_index.clone(),
|
||||
current_uid: current_index.to_owned(),
|
||||
index_facet_order,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
2041
crates/meilisearch/src/search/mod.rs
Normal file
2041
crates/meilisearch/src/search/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
823
crates/meilisearch/src/search/ranking_rules.rs
Normal file
823
crates/meilisearch/src/search/ranking_rules.rs
Normal file
|
@ -0,0 +1,823 @@
|
|||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
|
||||
use itertools::Itertools as _;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy};
|
||||
|
||||
pub struct RankingRules {
|
||||
canonical_criteria: Vec<Criterion>,
|
||||
canonical_sort: Option<Vec<AscDesc>>,
|
||||
canonicalization_actions: Vec<CanonicalizationAction>,
|
||||
source_criteria: Vec<Criterion>,
|
||||
source_sort: Option<Vec<AscDesc>>,
|
||||
}
|
||||
|
||||
pub enum CanonicalizationAction {
|
||||
PrependedWords {
|
||||
prepended_index: RankingRuleSource,
|
||||
},
|
||||
RemovedDuplicate {
|
||||
earlier_occurrence: RankingRuleSource,
|
||||
removed_occurrence: RankingRuleSource,
|
||||
},
|
||||
RemovedWords {
|
||||
reason: RemoveWords,
|
||||
removed_occurrence: RankingRuleSource,
|
||||
},
|
||||
RemovedPlaceholder {
|
||||
removed_occurrence: RankingRuleSource,
|
||||
},
|
||||
TruncatedVector {
|
||||
vector_rule: RankingRuleSource,
|
||||
truncated_from: RankingRuleSource,
|
||||
},
|
||||
RemovedVector {
|
||||
vector_rule: RankingRuleSource,
|
||||
removed_occurrence: RankingRuleSource,
|
||||
},
|
||||
RemovedSort {
|
||||
removed_occurrence: RankingRuleSource,
|
||||
},
|
||||
}
|
||||
|
||||
pub enum RemoveWords {
|
||||
WasPrepended,
|
||||
MatchingStrategyAll,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RemoveWords {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let reason = match self {
|
||||
RemoveWords::WasPrepended => "it was previously prepended",
|
||||
RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`",
|
||||
};
|
||||
f.write_str(reason)
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CanonicalizationKind {
|
||||
Placeholder,
|
||||
Keyword,
|
||||
Vector,
|
||||
}
|
||||
|
||||
pub struct CompatibilityError {
|
||||
previous: RankingRule,
|
||||
current: RankingRule,
|
||||
}
|
||||
impl CompatibilityError {
|
||||
pub(crate) fn to_response_error(
|
||||
&self,
|
||||
ranking_rules: &RankingRules,
|
||||
previous_ranking_rules: &RankingRules,
|
||||
query_index: usize,
|
||||
previous_query_index: usize,
|
||||
index_uid: &str,
|
||||
previous_index_uid: &str,
|
||||
) -> meilisearch_types::error::ResponseError {
|
||||
let rule = self.current.as_string(
|
||||
&ranking_rules.canonical_criteria,
|
||||
&ranking_rules.canonical_sort,
|
||||
query_index,
|
||||
index_uid,
|
||||
);
|
||||
let previous_rule = self.previous.as_string(
|
||||
&previous_ranking_rules.canonical_criteria,
|
||||
&previous_ranking_rules.canonical_sort,
|
||||
previous_query_index,
|
||||
previous_index_uid,
|
||||
);
|
||||
|
||||
let canonicalization_actions = ranking_rules.canonicalization_notes();
|
||||
let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes();
|
||||
|
||||
let mut msg = String::new();
|
||||
let reason = self.reason();
|
||||
let _ = writeln!(
|
||||
&mut msg,
|
||||
"The results of queries #{previous_query_index} and #{query_index} are incompatible: "
|
||||
);
|
||||
let _ = writeln!(&mut msg, " 1. {previous_rule}");
|
||||
let _ = writeln!(&mut msg, " 2. {rule}");
|
||||
let _ = writeln!(&mut msg, " - {reason}");
|
||||
|
||||
if !previous_canonicalization_actions.is_empty() {
|
||||
let _ = write!(&mut msg, " - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}");
|
||||
}
|
||||
|
||||
if !canonicalization_actions.is_empty() {
|
||||
let _ = write!(&mut msg, " - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}");
|
||||
}
|
||||
|
||||
ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules)
|
||||
}
|
||||
pub fn reason(&self) -> &'static str {
|
||||
match (self.previous.kind, self.current.kind) {
|
||||
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort)
|
||||
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort)
|
||||
| (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy)
|
||||
| (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => {
|
||||
"cannot compare a relevancy rule with a sort rule"
|
||||
}
|
||||
|
||||
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort)
|
||||
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort)
|
||||
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy)
|
||||
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => {
|
||||
"cannot compare a relevancy rule with a geosort rule"
|
||||
}
|
||||
|
||||
(RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)
|
||||
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => {
|
||||
"cannot compare two sort rules in opposite directions"
|
||||
}
|
||||
|
||||
(RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort)
|
||||
| (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort)
|
||||
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort)
|
||||
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort)
|
||||
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort)
|
||||
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort)
|
||||
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort)
|
||||
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => {
|
||||
"cannot compare a sort rule with a geosort rule"
|
||||
}
|
||||
|
||||
(RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort)
|
||||
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => {
|
||||
"cannot compare two geosort rules in opposite directions"
|
||||
}
|
||||
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy)
|
||||
| (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort)
|
||||
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort)
|
||||
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort)
|
||||
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => {
|
||||
"internal error, comparison should be possible"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RankingRules {
|
||||
pub fn new(
|
||||
criteria: Vec<Criterion>,
|
||||
sort: Option<Vec<AscDesc>>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
canonicalization_kind: CanonicalizationKind,
|
||||
) -> Self {
|
||||
let (canonical_criteria, canonical_sort, canonicalization_actions) =
|
||||
Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind);
|
||||
Self {
|
||||
canonical_criteria,
|
||||
canonical_sort,
|
||||
canonicalization_actions,
|
||||
source_criteria: criteria,
|
||||
source_sort: sort,
|
||||
}
|
||||
}
|
||||
|
||||
fn canonicalize(
|
||||
criteria: &[Criterion],
|
||||
sort: &Option<Vec<AscDesc>>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
canonicalization_kind: CanonicalizationKind,
|
||||
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||
match canonicalization_kind {
|
||||
CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort),
|
||||
CanonicalizationKind::Keyword => {
|
||||
Self::canonicalize_keyword(criteria, sort, terms_matching_strategy)
|
||||
}
|
||||
CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort),
|
||||
}
|
||||
}
|
||||
|
||||
fn canonicalize_placeholder(
|
||||
criteria: &[Criterion],
|
||||
sort_query: &Option<Vec<AscDesc>>,
|
||||
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||
let mut sort = None;
|
||||
|
||||
let mut sorted_fields = HashMap::new();
|
||||
let mut canonicalization_actions = Vec::new();
|
||||
let mut canonical_criteria = Vec::new();
|
||||
let mut canonical_sort = None;
|
||||
|
||||
for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||
match criterion.clone() {
|
||||
Criterion::Words
|
||||
| Criterion::Typo
|
||||
| Criterion::Proximity
|
||||
| Criterion::Attribute
|
||||
| Criterion::Exactness => {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder {
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
})
|
||||
}
|
||||
|
||||
Criterion::Sort => {
|
||||
if let Some(previous_index) = sort {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
});
|
||||
} else if let Some(sort_query) = sort_query {
|
||||
sort = Some(criterion_index);
|
||||
canonical_criteria.push(criterion.clone());
|
||||
canonical_sort = Some(canonicalize_sort(
|
||||
&mut sorted_fields,
|
||||
sort_query.as_slice(),
|
||||
criterion_index,
|
||||
&mut canonicalization_actions,
|
||||
));
|
||||
} else {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
})
|
||||
}
|
||||
}
|
||||
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||
.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: *entry.get(),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
}),
|
||||
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||
canonical_criteria.push(criterion.clone())
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||
}
|
||||
|
||||
fn canonicalize_vector(
|
||||
criteria: &[Criterion],
|
||||
sort_query: &Option<Vec<AscDesc>>,
|
||||
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||
let mut sort = None;
|
||||
|
||||
let mut sorted_fields = HashMap::new();
|
||||
let mut canonicalization_actions = Vec::new();
|
||||
let mut canonical_criteria = Vec::new();
|
||||
let mut canonical_sort = None;
|
||||
|
||||
let mut vector = None;
|
||||
|
||||
'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||
match criterion.clone() {
|
||||
Criterion::Words
|
||||
| Criterion::Typo
|
||||
| Criterion::Proximity
|
||||
| Criterion::Attribute
|
||||
| Criterion::Exactness => match vector {
|
||||
Some(previous_occurrence) => {
|
||||
if sorted_fields.is_empty() {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedVector {
|
||||
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
});
|
||||
} else {
|
||||
canonicalization_actions.push(
|
||||
CanonicalizationAction::TruncatedVector {
|
||||
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
|
||||
truncated_from: RankingRuleSource::Criterion(criterion_index),
|
||||
},
|
||||
);
|
||||
break 'criteria;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
canonical_criteria.push(criterion.clone());
|
||||
vector = Some(criterion_index);
|
||||
}
|
||||
},
|
||||
|
||||
Criterion::Sort => {
|
||||
if let Some(previous_index) = sort {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
});
|
||||
} else if let Some(sort_query) = sort_query {
|
||||
sort = Some(criterion_index);
|
||||
canonical_criteria.push(criterion.clone());
|
||||
canonical_sort = Some(canonicalize_sort(
|
||||
&mut sorted_fields,
|
||||
sort_query.as_slice(),
|
||||
criterion_index,
|
||||
&mut canonicalization_actions,
|
||||
));
|
||||
} else {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
})
|
||||
}
|
||||
}
|
||||
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||
.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: *entry.get(),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
}),
|
||||
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||
canonical_criteria.push(criterion.clone())
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||
}
|
||||
|
||||
fn canonicalize_keyword(
|
||||
criteria: &[Criterion],
|
||||
sort_query: &Option<Vec<AscDesc>>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
|
||||
let mut words = None;
|
||||
let mut typo = None;
|
||||
let mut proximity = None;
|
||||
let mut sort = None;
|
||||
let mut attribute = None;
|
||||
let mut exactness = None;
|
||||
let mut sorted_fields = HashMap::new();
|
||||
|
||||
let mut canonical_criteria = Vec::new();
|
||||
let mut canonical_sort = None;
|
||||
|
||||
let mut canonicalization_actions = Vec::new();
|
||||
|
||||
for (criterion_index, criterion) in criteria.iter().enumerate() {
|
||||
let criterion = criterion.clone();
|
||||
match criterion.clone() {
|
||||
Criterion::Words => {
|
||||
if let TermsMatchingStrategy::All = terms_matching_strategy {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
|
||||
reason: RemoveWords::MatchingStrategyAll,
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if let Some(maybe_previous_index) = words {
|
||||
if let Some(previous_index) = maybe_previous_index {
|
||||
canonicalization_actions.push(
|
||||
CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: RankingRuleSource::Criterion(
|
||||
previous_index,
|
||||
),
|
||||
removed_occurrence: RankingRuleSource::Criterion(
|
||||
criterion_index,
|
||||
),
|
||||
},
|
||||
);
|
||||
continue;
|
||||
}
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
|
||||
reason: RemoveWords::WasPrepended,
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
})
|
||||
}
|
||||
words = Some(Some(criterion_index));
|
||||
canonical_criteria.push(criterion);
|
||||
}
|
||||
Criterion::Typo => {
|
||||
canonicalize_criterion(
|
||||
criterion,
|
||||
criterion_index,
|
||||
terms_matching_strategy,
|
||||
&mut words,
|
||||
&mut canonicalization_actions,
|
||||
&mut canonical_criteria,
|
||||
&mut typo,
|
||||
);
|
||||
}
|
||||
Criterion::Proximity => {
|
||||
canonicalize_criterion(
|
||||
criterion,
|
||||
criterion_index,
|
||||
terms_matching_strategy,
|
||||
&mut words,
|
||||
&mut canonicalization_actions,
|
||||
&mut canonical_criteria,
|
||||
&mut proximity,
|
||||
);
|
||||
}
|
||||
Criterion::Attribute => {
|
||||
canonicalize_criterion(
|
||||
criterion,
|
||||
criterion_index,
|
||||
terms_matching_strategy,
|
||||
&mut words,
|
||||
&mut canonicalization_actions,
|
||||
&mut canonical_criteria,
|
||||
&mut attribute,
|
||||
);
|
||||
}
|
||||
Criterion::Exactness => {
|
||||
canonicalize_criterion(
|
||||
criterion,
|
||||
criterion_index,
|
||||
terms_matching_strategy,
|
||||
&mut words,
|
||||
&mut canonicalization_actions,
|
||||
&mut canonical_criteria,
|
||||
&mut exactness,
|
||||
);
|
||||
}
|
||||
|
||||
Criterion::Sort => {
|
||||
if let Some(previous_index) = sort {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
});
|
||||
} else if let Some(sort_query) = sort_query {
|
||||
sort = Some(criterion_index);
|
||||
canonical_criteria.push(criterion);
|
||||
canonical_sort = Some(canonicalize_sort(
|
||||
&mut sorted_fields,
|
||||
sort_query.as_slice(),
|
||||
criterion_index,
|
||||
&mut canonicalization_actions,
|
||||
));
|
||||
} else {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
})
|
||||
}
|
||||
}
|
||||
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
|
||||
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||
.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: *entry.get(),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
}),
|
||||
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(RankingRuleSource::Criterion(criterion_index));
|
||||
canonical_criteria.push(criterion)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
(canonical_criteria, canonical_sort, canonicalization_actions)
|
||||
}
|
||||
|
||||
pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> {
|
||||
for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) {
|
||||
if current.kind != previous.kind {
|
||||
return Err(CompatibilityError { current, previous });
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn constraint_count(&self) -> usize {
|
||||
self.coalesce_iterator().count()
|
||||
}
|
||||
|
||||
fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ {
|
||||
self.canonical_criteria
|
||||
.iter()
|
||||
.enumerate()
|
||||
.flat_map(|(criterion_index, criterion)| {
|
||||
RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort)
|
||||
})
|
||||
.coalesce(
|
||||
|previous @ RankingRule { source: previous_source, kind: previous_kind },
|
||||
current @ RankingRule { source, kind }| {
|
||||
match (previous_kind, kind) {
|
||||
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => {
|
||||
let merged_source = match (previous_source, source) {
|
||||
(
|
||||
RankingRuleSource::Criterion(previous),
|
||||
RankingRuleSource::Criterion(current),
|
||||
) => RankingRuleSource::CoalescedCriteria(previous, current),
|
||||
(
|
||||
RankingRuleSource::CoalescedCriteria(begin, _end),
|
||||
RankingRuleSource::Criterion(current),
|
||||
) => RankingRuleSource::CoalescedCriteria(begin, current),
|
||||
(_previous, current) => current,
|
||||
};
|
||||
Ok(RankingRule { source: merged_source, kind })
|
||||
}
|
||||
_ => Err((previous, current)),
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn canonicalization_notes(&self) -> String {
|
||||
use CanonicalizationAction::*;
|
||||
let mut notes = String::new();
|
||||
for (index, action) in self.canonicalization_actions.iter().enumerate() {
|
||||
let index = index + 1;
|
||||
let _ = match action {
|
||||
PrependedWords { prepended_index } => writeln!(
|
||||
&mut notes,
|
||||
" {index}. Prepended rule `words` before first relevancy rule `{}` at position {}",
|
||||
prepended_index.rule_name(&self.source_criteria, &self.source_sort),
|
||||
prepended_index.rule_position()
|
||||
),
|
||||
RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!(
|
||||
&mut notes,
|
||||
" {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}",
|
||||
earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||
removed_occurrence.rule_position(),
|
||||
earlier_occurrence.rule_position(),
|
||||
),
|
||||
RemovedWords { reason, removed_occurrence } => writeln!(
|
||||
&mut notes,
|
||||
" {index}. Removed rule `words` at position {} because {reason}",
|
||||
removed_occurrence.rule_position()
|
||||
),
|
||||
RemovedPlaceholder { removed_occurrence } => writeln!(
|
||||
&mut notes,
|
||||
" {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")",
|
||||
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||
removed_occurrence.rule_position()
|
||||
),
|
||||
TruncatedVector { vector_rule, truncated_from } => writeln!(
|
||||
&mut notes,
|
||||
" {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}",
|
||||
truncated_from.rule_name(&self.source_criteria, &self.source_sort),
|
||||
truncated_from.rule_position(),
|
||||
vector_rule.rule_position(),
|
||||
),
|
||||
RemovedVector { vector_rule, removed_occurrence } => writeln!(
|
||||
&mut notes,
|
||||
" {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}",
|
||||
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
|
||||
removed_occurrence.rule_position(),
|
||||
vector_rule.rule_position(),
|
||||
),
|
||||
RemovedSort { removed_occurrence } => writeln!(
|
||||
&mut notes,
|
||||
" {index}. Removed rule `sort` at position {} because `query.sort` is empty",
|
||||
removed_occurrence.rule_position()
|
||||
),
|
||||
};
|
||||
}
|
||||
notes
|
||||
}
|
||||
}
|
||||
|
||||
fn canonicalize_sort(
|
||||
sorted_fields: &mut HashMap<String, RankingRuleSource>,
|
||||
sort_query: &[AscDesc],
|
||||
criterion_index: usize,
|
||||
canonicalization_actions: &mut Vec<CanonicalizationAction>,
|
||||
) -> Vec<AscDesc> {
|
||||
let mut geo_sorted = None;
|
||||
let mut canonical_sort = Vec::new();
|
||||
for (sort_index, asc_desc) in sort_query.iter().enumerate() {
|
||||
let source = RankingRuleSource::Sort { criterion_index, sort_index };
|
||||
let asc_desc = asc_desc.clone();
|
||||
match asc_desc.clone() {
|
||||
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
|
||||
match sorted_fields.entry(s) {
|
||||
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
|
||||
.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: *entry.get(),
|
||||
removed_occurrence: source,
|
||||
}),
|
||||
std::collections::hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(source);
|
||||
canonical_sort.push(asc_desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted {
|
||||
Some(earlier_sort_index) => {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: RankingRuleSource::Sort {
|
||||
criterion_index,
|
||||
sort_index: earlier_sort_index,
|
||||
},
|
||||
removed_occurrence: source,
|
||||
})
|
||||
}
|
||||
None => {
|
||||
geo_sorted = Some(sort_index);
|
||||
canonical_sort.push(asc_desc);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
canonical_sort
|
||||
}
|
||||
|
||||
fn canonicalize_criterion(
|
||||
criterion: Criterion,
|
||||
criterion_index: usize,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
words: &mut Option<Option<usize>>,
|
||||
canonicalization_actions: &mut Vec<CanonicalizationAction>,
|
||||
canonical_criteria: &mut Vec<Criterion>,
|
||||
rule: &mut Option<usize>,
|
||||
) {
|
||||
*words = match (terms_matching_strategy, words.take()) {
|
||||
(TermsMatchingStrategy::All, words) => words,
|
||||
(_, None) => {
|
||||
// inject words
|
||||
canonicalization_actions.push(CanonicalizationAction::PrependedWords {
|
||||
prepended_index: RankingRuleSource::Criterion(criterion_index),
|
||||
});
|
||||
canonical_criteria.push(Criterion::Words);
|
||||
Some(None)
|
||||
}
|
||||
(_, words) => words,
|
||||
};
|
||||
if let Some(previous_index) = *rule {
|
||||
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
|
||||
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
|
||||
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
|
||||
});
|
||||
} else {
|
||||
*rule = Some(criterion_index);
|
||||
canonical_criteria.push(criterion)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum RankingRuleKind {
|
||||
Relevancy,
|
||||
AscendingSort,
|
||||
DescendingSort,
|
||||
AscendingGeoSort,
|
||||
DescendingGeoSort,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RankingRule {
|
||||
source: RankingRuleSource,
|
||||
kind: RankingRuleKind,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum RankingRuleSource {
|
||||
Criterion(usize),
|
||||
CoalescedCriteria(usize, usize),
|
||||
Sort { criterion_index: usize, sort_index: usize },
|
||||
}
|
||||
|
||||
impl RankingRuleSource {
|
||||
fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String {
|
||||
match self {
|
||||
RankingRuleSource::Criterion(criterion_index) => criteria
|
||||
.get(*criterion_index)
|
||||
.map(|c| c.to_string())
|
||||
.unwrap_or_else(|| "unknown".into()),
|
||||
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||
let rules: Vec<_> = criteria
|
||||
.get(*begin..=*end)
|
||||
.iter()
|
||||
.flat_map(|c| c.iter())
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
rules.join(", ")
|
||||
}
|
||||
RankingRuleSource::Sort { criterion_index: _, sort_index } => {
|
||||
match sort.as_deref().and_then(|sort| sort.get(*sort_index)) {
|
||||
Some(sort) => match sort {
|
||||
AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"),
|
||||
AscDesc::Desc(Member::Field(field_name)) => {
|
||||
format!("{field_name}:desc")
|
||||
}
|
||||
AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(),
|
||||
AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(),
|
||||
},
|
||||
None => "unknown".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn rule_position(&self) -> String {
|
||||
match self {
|
||||
RankingRuleSource::Criterion(criterion_index) => {
|
||||
format!("#{criterion_index} in ranking rules")
|
||||
}
|
||||
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||
format!("#{begin} to #{end} in ranking rules")
|
||||
}
|
||||
RankingRuleSource::Sort { criterion_index, sort_index } => format!(
|
||||
"#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)"
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RankingRule {
|
||||
fn from_criterion<'a>(
|
||||
criterion_index: usize,
|
||||
criterion: &'a Criterion,
|
||||
sort: &'a Option<Vec<AscDesc>>,
|
||||
) -> impl Iterator<Item = Self> + 'a {
|
||||
let kind = match criterion {
|
||||
Criterion::Words
|
||||
| Criterion::Typo
|
||||
| Criterion::Proximity
|
||||
| Criterion::Attribute
|
||||
| Criterion::Exactness => RankingRuleKind::Relevancy,
|
||||
Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort,
|
||||
|
||||
Criterion::Asc(_) => RankingRuleKind::AscendingSort,
|
||||
Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort,
|
||||
|
||||
Criterion::Desc(_) => RankingRuleKind::DescendingSort,
|
||||
Criterion::Sort => {
|
||||
return either::Right(sort.iter().flatten().enumerate().map(
|
||||
move |(rule_index, asc_desc)| {
|
||||
Self::from_asc_desc(asc_desc, criterion_index, rule_index)
|
||||
},
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
either::Left(std::iter::once(Self {
|
||||
source: RankingRuleSource::Criterion(criterion_index),
|
||||
kind,
|
||||
}))
|
||||
}
|
||||
|
||||
fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self {
|
||||
let kind = match asc_desc {
|
||||
AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort,
|
||||
AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort,
|
||||
AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort,
|
||||
AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort,
|
||||
};
|
||||
Self {
|
||||
source: RankingRuleSource::Sort {
|
||||
criterion_index: sort_index,
|
||||
sort_index: rule_index_in_sort,
|
||||
},
|
||||
kind,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_string(
|
||||
&self,
|
||||
canonical_criteria: &[Criterion],
|
||||
canonical_sort: &Option<Vec<AscDesc>>,
|
||||
query_index: usize,
|
||||
index_uid: &str,
|
||||
) -> String {
|
||||
let kind = match self.kind {
|
||||
RankingRuleKind::Relevancy => "relevancy",
|
||||
RankingRuleKind::AscendingSort => "ascending sort",
|
||||
RankingRuleKind::DescendingSort => "descending sort",
|
||||
RankingRuleKind::AscendingGeoSort => "ascending geo sort",
|
||||
RankingRuleKind::DescendingGeoSort => "descending geo sort",
|
||||
};
|
||||
let rules = self.fetch_from_source(canonical_criteria, canonical_sort);
|
||||
|
||||
let source = match self.source {
|
||||
RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
|
||||
RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"),
|
||||
RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
|
||||
};
|
||||
|
||||
format!("{source}: {kind} {rules}")
|
||||
}
|
||||
|
||||
fn fetch_from_source(
|
||||
&self,
|
||||
canonical_criteria: &[Criterion],
|
||||
canonical_sort: &Option<Vec<AscDesc>>,
|
||||
) -> String {
|
||||
let rule_name = match self.source {
|
||||
RankingRuleSource::Criterion(index) => {
|
||||
canonical_criteria.get(index).map(|criterion| criterion.to_string())
|
||||
}
|
||||
RankingRuleSource::CoalescedCriteria(begin, end) => {
|
||||
let rules: Vec<String> = canonical_criteria
|
||||
.get(begin..=end)
|
||||
.into_iter()
|
||||
.flat_map(|criteria| criteria.iter())
|
||||
.map(|criterion| criterion.to_string())
|
||||
.collect();
|
||||
|
||||
(!rules.is_empty()).then_some(rules.join(", "))
|
||||
}
|
||||
RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort
|
||||
.as_deref()
|
||||
.and_then(|canonical_sort| canonical_sort.get(sort_index))
|
||||
.and_then(|asc_desc: &AscDesc| match asc_desc {
|
||||
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
|
||||
Some(format!("on field `{s}`"))
|
||||
}
|
||||
_ => None,
|
||||
}),
|
||||
};
|
||||
|
||||
let rule_name = rule_name.unwrap_or_else(|| "default".into());
|
||||
|
||||
format!("rule(s) {rule_name}")
|
||||
}
|
||||
}
|
170
crates/meilisearch/src/search_queue.rs
Normal file
170
crates/meilisearch/src/search_queue.rs
Normal file
|
@ -0,0 +1,170 @@
|
|||
//! This file implements a queue of searches to process and the ability to control how many searches can be run in parallel.
|
||||
//! We need this because we don't want to process more search requests than we have cores.
|
||||
//! That slows down everything and consumes RAM for no reason.
|
||||
//! The steps to do a search are to get the `SearchQueue` data structure and try to get a search permit.
|
||||
//! This can fail if the queue is full, and we need to drop your search request to register a new one.
|
||||
//!
|
||||
//! ### How to do a search request
|
||||
//!
|
||||
//! In order to do a search request you should try to get a search permit.
|
||||
//! Retrieve the `SearchQueue` structure from actix-web (`search_queue: Data<SearchQueue>`)
|
||||
//! and right before processing the search, calls the `SearchQueue::try_get_search_permit` method: `search_queue.try_get_search_permit().await?;`
|
||||
//!
|
||||
//! What is going to happen at this point is that you're going to send a oneshot::Sender over an async mpsc channel.
|
||||
//! Then, the queue/scheduler is going to either:
|
||||
//! - Drop your oneshot channel => that means there are too many searches going on, and yours won't be executed.
|
||||
//! You should exit and free all the RAM you use ASAP.
|
||||
//! - Sends you a Permit => that will unlock the method, and you will be able to process your search.
|
||||
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SearchQueue {
|
||||
sender: mpsc::Sender<oneshot::Sender<Permit>>,
|
||||
capacity: usize,
|
||||
/// If we have waited longer than this to get a permit, we should abort the search request entirely.
|
||||
/// The client probably already closed the connection, but we have no way to find out.
|
||||
time_to_abort: Duration,
|
||||
}
|
||||
|
||||
/// You should only run search requests while holding this permit.
|
||||
/// Once it's dropped, a new search request will be able to process.
|
||||
/// You should always try to drop the permit yourself calling the `drop` async method on it.
|
||||
#[derive(Debug)]
|
||||
pub struct Permit {
|
||||
sender: mpsc::Sender<()>,
|
||||
}
|
||||
|
||||
impl Permit {
|
||||
/// Drop the permit giving back on permit to the search queue.
|
||||
pub async fn drop(self) {
|
||||
// if the channel is closed then the whole instance is down
|
||||
let _ = self.sender.send(()).await;
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Permit {
|
||||
/// The implicit drop implementation can still be called in multiple cases:
|
||||
/// - We forgot to call the explicit one somewhere => this should be fixed on our side asap
|
||||
/// - The future is cancelled while running and the permit dropped with it
|
||||
fn drop(&mut self) {
|
||||
let sender = self.sender.clone();
|
||||
// if the channel is closed then the whole instance is down
|
||||
std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
|
||||
}
|
||||
}
|
||||
|
||||
impl SearchQueue {
|
||||
pub fn new(capacity: usize, paralellism: NonZeroUsize) -> Self {
|
||||
// Search requests are going to wait until we're available anyway,
|
||||
// so let's not allocate any RAM and keep a capacity of 1.
|
||||
let (sender, receiver) = mpsc::channel(1);
|
||||
|
||||
tokio::task::spawn(Self::run(capacity, paralellism, receiver));
|
||||
Self { sender, capacity, time_to_abort: Duration::from_secs(60) }
|
||||
}
|
||||
|
||||
pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
|
||||
Self { time_to_abort, ..self }
|
||||
}
|
||||
|
||||
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
|
||||
/// how many should executes at the same time.
|
||||
///
|
||||
/// It **must never** panic or exit.
|
||||
async fn run(
|
||||
capacity: usize,
|
||||
parallelism: NonZeroUsize,
|
||||
mut receive_new_searches: mpsc::Receiver<oneshot::Sender<Permit>>,
|
||||
) {
|
||||
let mut queue: Vec<oneshot::Sender<Permit>> = Default::default();
|
||||
let mut rng: StdRng = StdRng::from_entropy();
|
||||
let mut searches_running: usize = 0;
|
||||
// By having a capacity of parallelism we ensures that every time a search finish it can release its RAM asap
|
||||
let (sender, mut search_finished) = mpsc::channel(parallelism.into());
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
// biased select because we wants to free up space before trying to register new tasks
|
||||
biased;
|
||||
_ = search_finished.recv() => {
|
||||
searches_running = searches_running.saturating_sub(1);
|
||||
if !queue.is_empty() {
|
||||
// Can't panic: the queue wasn't empty thus the range isn't empty.
|
||||
let remove = rng.gen_range(0..queue.len());
|
||||
let channel = queue.swap_remove(remove);
|
||||
let _ = channel.send(Permit { sender: sender.clone() });
|
||||
}
|
||||
},
|
||||
|
||||
search_request = receive_new_searches.recv() => {
|
||||
let search_request = match search_request {
|
||||
Some(search_request) => search_request,
|
||||
// This should never happen while actix-web is running, but it's not a reason to crash
|
||||
// and it can generate a lot of noise in the tests.
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if searches_running < usize::from(parallelism) && queue.is_empty() {
|
||||
searches_running += 1;
|
||||
// if the search requests die it's not a hard error on our side
|
||||
let _ = search_request.send(Permit { sender: sender.clone() });
|
||||
continue;
|
||||
} else if capacity == 0 {
|
||||
// in the very specific case where we have a capacity of zero
|
||||
// we must refuse the request straight away without going through
|
||||
// the queue stuff.
|
||||
drop(search_request);
|
||||
continue;
|
||||
|
||||
} else if queue.len() >= capacity {
|
||||
let remove = rng.gen_range(0..queue.len());
|
||||
let thing = queue.swap_remove(remove); // this will drop the channel and notify the search that it won't be processed
|
||||
drop(thing);
|
||||
}
|
||||
queue.push(search_request);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a search `Permit`.
|
||||
/// It should be dropped as soon as you've freed all the RAM associated with the search request being processed.
|
||||
pub async fn try_get_search_permit(&self) -> Result<Permit, MeilisearchHttpError> {
|
||||
let now = std::time::Instant::now();
|
||||
let (sender, receiver) = oneshot::channel();
|
||||
self.sender.send(sender).await.map_err(|_| MeilisearchHttpError::SearchLimiterIsDown)?;
|
||||
let permit = receiver
|
||||
.await
|
||||
.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))?;
|
||||
|
||||
// If we've been for more than one minute to get a search permit, it's better to simply
|
||||
// abort the search request than spending time processing something were the client
|
||||
// most certainly exited or got a timeout a long time ago.
|
||||
// We may find a better solution in https://github.com/actix/actix-web/issues/3462.
|
||||
if now.elapsed() > self.time_to_abort {
|
||||
permit.drop().await;
|
||||
Err(MeilisearchHttpError::TooManySearchRequests(self.capacity))
|
||||
} else {
|
||||
Ok(permit)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `Ok(())` if everything seems normal.
|
||||
/// Returns `Err(MeilisearchHttpError::SearchLimiterIsDown)` if the search limiter seems down.
|
||||
pub fn health(&self) -> Result<(), MeilisearchHttpError> {
|
||||
if self.sender.is_closed() {
|
||||
Err(MeilisearchHttpError::SearchLimiterIsDown)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue