Merge branch 'main' into tmp-release-v1.11.0

This commit is contained in:
Tamo 2024-11-04 16:14:44 +01:00
commit cf6ad1ae5e
1071 changed files with 263 additions and 106 deletions

View file

@ -0,0 +1,109 @@
use std::any::Any;
use std::sync::Arc;
use actix_web::HttpRequest;
use meilisearch_types::InstanceUid;
use serde_json::Value;
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
use crate::Opt;
pub struct MockAnalytics {
instance_uid: Option<InstanceUid>,
}
#[derive(Default)]
pub struct SearchAggregator;
#[allow(dead_code)]
impl SearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct SimilarAggregator;
#[allow(dead_code)]
impl SimilarAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct MultiSearchAggregator;
#[allow(dead_code)]
impl MultiSearchAggregator {
pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self) {}
}
#[derive(Default)]
pub struct FacetSearchAggregator;
#[allow(dead_code)]
impl FacetSearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
let instance_uid = find_user_id(&opt.db_path);
Arc::new(Self { instance_uid })
}
}
impl Analytics for MockAnalytics {
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
self.instance_uid.as_ref()
}
// These methods are noop and should be optimized out
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
fn update_documents(
&self,
_documents_query: &UpdateDocumentsQuery,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn update_documents_by_function(
&self,
_documents_query: &DocumentEditionByFunction,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
}

View file

@ -0,0 +1,166 @@
pub mod segment_analytics;
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use actix_web::HttpRequest;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_types::InstanceUid;
use mopa::mopafy;
use once_cell::sync::Lazy;
use platform_dirs::AppDirs;
// if the feature analytics is enabled we use the real analytics
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
use crate::Opt;
/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
#[macro_export]
macro_rules! empty_analytics {
($struct_name:ident, $event_name:literal) => {
#[derive(Default)]
struct $struct_name {}
impl $crate::analytics::Aggregate for $struct_name {
fn event_name(&self) -> &'static str {
$event_name
}
fn aggregate(self: Box<Self>, _other: Box<Self>) -> Box<Self> {
self
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::json!({})
}
}
};
}
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
/// `~/Library/ApplicationSupport` on macOS.
/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows.
static MEILISEARCH_CONFIG_PATH: Lazy<Option<PathBuf>> =
Lazy::new(|| AppDirs::new(Some("Meilisearch"), false).map(|appdir| appdir.config_dir));
fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
db_path
.canonicalize()
.ok()
.map(|path| path.join("instance-uid").display().to_string().replace('/', "-"))
.zip(MEILISEARCH_CONFIG_PATH.as_ref())
.map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-')))
}
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
fs::read_to_string(db_path.join("instance-uid"))
.ok()
.or_else(|| fs::read_to_string(config_user_id_path(db_path)?).ok())
.and_then(|uid| InstanceUid::from_str(&uid).ok())
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentDeletionKind {
PerDocumentId,
ClearAll,
PerBatch,
PerFilter,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
}
/// To send an event to segment, your event must be able to aggregate itself with another event of the same type.
pub trait Aggregate: 'static + mopa::Any + Send {
/// The name of the event that will be sent to segment.
fn event_name(&self) -> &'static str;
/// Will be called every time an event has been used twice before segment flushed its buffer.
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self>
where
Self: Sized;
/// Converts your structure to the final event that'll be sent to segment.
fn into_event(self: Box<Self>) -> serde_json::Value;
}
mopafy!(Aggregate);
/// Helper trait to define multiple aggregates with the same content but a different name.
/// Commonly used when you must aggregate a search with POST or with GET, for example.
pub trait AggregateMethod: 'static + Default + Send {
fn event_name() -> &'static str;
}
/// A macro used to quickly define multiple aggregate method with their name
/// Usage:
/// ```rust
/// use meilisearch::aggregate_methods;
///
/// aggregate_methods!(
/// SearchGET => "Documents Searched GET",
/// SearchPOST => "Documents Searched POST",
/// );
/// ```
#[macro_export]
macro_rules! aggregate_methods {
($method:ident => $event_name:literal) => {
#[derive(Default)]
pub struct $method {}
impl $crate::analytics::AggregateMethod for $method {
fn event_name() -> &'static str {
$event_name
}
}
};
($($method:ident => $event_name:literal,)+) => {
$(
aggregate_methods!($method => $event_name);
)+
};
}
#[derive(Clone)]
pub struct Analytics {
segment: Option<Arc<SegmentAnalytics>>,
}
impl Analytics {
pub async fn new(
opt: &Opt,
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
) -> Self {
if opt.no_analytics {
Self { segment: None }
} else {
Self { segment: SegmentAnalytics::new(opt, index_scheduler, auth_controller).await }
}
}
pub fn no_analytics() -> Self {
Self { segment: None }
}
pub fn instance_uid(&self) -> Option<&InstanceUid> {
self.segment.as_ref().map(|segment| segment.instance_uid.as_ref())
}
/// The method used to publish most analytics that do not need to be batched every hours
pub fn publish<T: Aggregate>(&self, event: T, request: &HttpRequest) {
if let Some(ref segment) = self.segment {
let _ = segment.sender.try_send(segment_analytics::Message::new(event, request));
}
}
}

View file

@ -0,0 +1,484 @@
use std::any::TypeId;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest;
use byte_unit::Byte;
use index_scheduler::IndexScheduler;
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::InstanceUid;
use once_cell::sync::Lazy;
use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize;
use serde_json::{json, Value};
use sysinfo::{Disks, System};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
use uuid::Uuid;
use super::{config_user_id_path, Aggregate, MEILISEARCH_CONFIG_PATH};
use crate::option::{
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
};
use crate::routes::{create_all_stats, Stats};
use crate::Opt;
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
let _ = fs::write(db_path.join("instance-uid"), user_id.to_string());
if let Some((meilisearch_config_path, user_id_path)) =
MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path))
{
let _ = fs::create_dir_all(meilisearch_config_path);
let _ = fs::write(user_id_path, user_id.to_string());
}
}
const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb";
pub fn extract_user_agents(request: &HttpRequest) -> HashSet<String> {
request
.headers()
.get(ANALYTICS_HEADER)
.or_else(|| request.headers().get(USER_AGENT))
.and_then(|header| header.to_str().ok())
.unwrap_or("unknown")
.split(';')
.map(str::trim)
.map(ToString::to_string)
.collect()
}
pub struct Message {
// Since the type_id is solved statically we cannot retrieve it from the Box.
// Thus we have to send it in the message directly.
type_id: TypeId,
// Same for the aggregate function.
#[allow(clippy::type_complexity)]
aggregator_function: fn(Box<dyn Aggregate>, Box<dyn Aggregate>) -> Option<Box<dyn Aggregate>>,
event: Event,
}
pub struct Event {
original: Box<dyn Aggregate>,
timestamp: OffsetDateTime,
user_agents: HashSet<String>,
total: usize,
}
/// This function should always be called on the same type. If `this` and `other`
/// aren't the same type the function will do nothing and return `None`.
fn downcast_aggregate<ConcreteType: Aggregate>(
old: Box<dyn Aggregate>,
new: Box<dyn Aggregate>,
) -> Option<Box<dyn Aggregate>> {
if old.is::<ConcreteType>() && new.is::<ConcreteType>() {
// Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping
let this = old.downcast::<ConcreteType>().ok()?;
let other = new.downcast::<ConcreteType>().ok()?;
Some(ConcreteType::aggregate(this, other))
} else {
None
}
}
impl Message {
pub fn new<T: Aggregate>(event: T, request: &HttpRequest) -> Self {
Self {
type_id: TypeId::of::<T>(),
event: Event {
original: Box::new(event),
timestamp: OffsetDateTime::now_utc(),
user_agents: extract_user_agents(request),
total: 1,
},
aggregator_function: downcast_aggregate::<T>,
}
}
}
pub struct SegmentAnalytics {
pub instance_uid: InstanceUid,
pub user: User,
pub sender: Sender<Message>,
}
impl SegmentAnalytics {
#[allow(clippy::new_ret_no_self)]
pub async fn new(
opt: &Opt,
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
) -> Option<Arc<Self>> {
let instance_uid = super::find_user_id(&opt.db_path);
let first_time_run = instance_uid.is_none();
let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4);
write_user_id(&opt.db_path, &instance_uid);
let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build();
// if reqwest throws an error we won't be able to send analytics
if client.is_err() {
return None;
}
let client =
HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string());
let user = User::UserId { user_id: instance_uid.to_string() };
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
// If Meilisearch is Launched for the first time:
// 1. Send an event Launched associated to the user `total_launch`.
// 2. Batch an event Launched with the real instance-id and send it in one hour.
if first_time_run {
let _ = batcher
.push(Track {
user: User::UserId { user_id: "total_launch".to_string() },
event: "Launched".to_string(),
..Default::default()
})
.await;
let _ = batcher.flush().await;
let _ = batcher
.push(Track {
user: user.clone(),
event: "Launched".to_string(),
..Default::default()
})
.await;
}
let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize
let segment = Box::new(Segment {
inbox,
user: user.clone(),
opt: opt.clone(),
batcher,
events: HashMap::new(),
});
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
let this = Self { instance_uid, sender, user: user.clone() };
Some(Arc::new(this))
}
}
/// This structure represent the `infos` field we send in the analytics.
/// It's quite close to the `Opt` structure except all sensitive informations
/// have been simplified to a boolean.
/// It's send as-is in amplitude thus you should never update a name of the
/// struct without the approval of the PM.
#[derive(Debug, Clone, Serialize)]
struct Infos {
env: String,
experimental_contains_filter: bool,
experimental_vector_store: bool,
experimental_enable_metrics: bool,
experimental_edit_documents_by_function: bool,
experimental_search_queue_size: usize,
experimental_drop_search_after: usize,
experimental_nb_searches_per_core: usize,
experimental_logs_mode: LogMode,
experimental_replication_parameters: bool,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize,
gpu_enabled: bool,
db_path: bool,
import_dump: bool,
dump_dir: bool,
ignore_missing_dump: bool,
ignore_dump_if_db_exists: bool,
import_snapshot: bool,
schedule_snapshot: Option<u64>,
snapshot_dir: bool,
ignore_missing_snapshot: bool,
ignore_snapshot_if_db_exists: bool,
http_addr: bool,
http_payload_size_limit: Byte,
task_queue_webhook: bool,
task_webhook_authorization_header: bool,
log_level: String,
max_indexing_memory: MaxMemory,
max_indexing_threads: MaxThreads,
with_configuration_file: bool,
ssl_auth_path: bool,
ssl_cert_path: bool,
ssl_key_path: bool,
ssl_ocsp_path: bool,
ssl_require_auth: bool,
ssl_resumption: bool,
ssl_tickets: bool,
}
impl Infos {
pub fn new(options: Opt, features: RuntimeTogglableFeatures) -> Self {
// We wants to decompose this whole struct by hand to be sure we don't forget
// to add analytics when we add a field in the Opt.
// Thus we must not insert `..` at the end.
let Opt {
db_path,
experimental_contains_filter,
experimental_enable_metrics,
experimental_search_queue_size,
experimental_drop_search_after,
experimental_nb_searches_per_core,
experimental_logs_mode,
experimental_replication_parameters,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks,
http_addr,
master_key: _,
env,
task_webhook_url,
task_webhook_authorization_header,
max_index_size: _,
max_task_db_size: _,
http_payload_size_limit,
ssl_cert_path,
ssl_key_path,
ssl_auth_path,
ssl_ocsp_path,
ssl_require_auth,
ssl_resumption,
ssl_tickets,
import_snapshot,
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
snapshot_dir,
schedule_snapshot,
import_dump,
ignore_missing_dump,
ignore_dump_if_db_exists,
dump_dir,
log_level,
indexer_options,
config_file_path,
no_analytics: _,
} = options;
let schedule_snapshot = match schedule_snapshot {
ScheduleSnapshot::Disabled => None,
ScheduleSnapshot::Enabled(interval) => Some(interval),
};
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
indexer_options;
let RuntimeTogglableFeatures {
vector_store,
metrics,
logs_route,
edit_documents_by_function,
contains_filter,
} = features;
// We're going to override every sensible information.
// We consider information sensible if it contains a path, an address, or a key.
Self {
env,
experimental_contains_filter: experimental_contains_filter | contains_filter,
experimental_vector_store: vector_store,
experimental_edit_documents_by_function: edit_documents_by_function,
experimental_enable_metrics: experimental_enable_metrics | metrics,
experimental_search_queue_size,
experimental_drop_search_after: experimental_drop_search_after.into(),
experimental_nb_searches_per_core: experimental_nb_searches_per_core.into(),
experimental_logs_mode,
experimental_replication_parameters,
experimental_enable_logs_route: experimental_enable_logs_route | logs_route,
experimental_reduce_indexing_memory_usage,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"),
ignore_missing_dump,
ignore_dump_if_db_exists,
import_snapshot: import_snapshot.is_some(),
schedule_snapshot,
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
http_addr: http_addr != default_http_addr(),
http_payload_size_limit,
experimental_max_number_of_batched_tasks,
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(),
max_indexing_memory,
max_indexing_threads,
with_configuration_file: config_file_path.is_some(),
ssl_auth_path: ssl_auth_path.is_some(),
ssl_cert_path: ssl_cert_path.is_some(),
ssl_key_path: ssl_key_path.is_some(),
ssl_ocsp_path: ssl_ocsp_path.is_some(),
ssl_require_auth,
ssl_resumption,
ssl_tickets,
}
}
}
pub struct Segment {
inbox: Receiver<Message>,
user: User,
opt: Opt,
batcher: AutoBatcher,
events: HashMap<TypeId, Event>,
}
impl Segment {
fn compute_traits(opt: &Opt, stats: Stats, features: RuntimeTogglableFeatures) -> Value {
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
static SYSTEM: Lazy<Value> = Lazy::new(|| {
let disks = Disks::new_with_refreshed_list();
let mut sys = System::new_all();
sys.refresh_all();
let kernel_version = System::kernel_version()
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
json!({
"distribution": System::name(),
"kernel_version": kernel_version,
"cores": sys.cpus().len(),
"ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
})
});
let number_of_documents =
stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>();
json!({
"start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day
"system": *SYSTEM,
"stats": {
"database_size": stats.database_size,
"indexes_number": stats.indexes.len(),
"documents_number": number_of_documents,
},
"infos": Infos::new(opt.clone(), features),
})
}
async fn run(
mut self,
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
) {
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
// The first batch must be sent after one hour.
let mut interval =
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
loop {
select! {
_ = interval.tick() => {
self.tick(index_scheduler.clone(), auth_controller.clone()).await;
},
Some(msg) = self.inbox.recv() => {
self.handle_msg(msg);
}
}
}
}
fn handle_msg(&mut self, Message { type_id, aggregator_function, event }: Message) {
let new_event = match self.events.remove(&type_id) {
Some(old) => {
// The function should never fail since we retrieved the corresponding TypeId in the map. But in the unfortunate
// case it could happens we're going to silently ignore the error
let Some(original) = (aggregator_function)(old.original, event.original) else {
return;
};
Event {
original,
// We always want to return the FIRST timestamp ever encountered
timestamp: old.timestamp,
user_agents: old.user_agents.union(&event.user_agents).cloned().collect(),
total: old.total.saturating_add(event.total),
}
}
None => event,
};
self.events.insert(type_id, new_event);
}
async fn tick(
&mut self,
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
) {
if let Ok(stats) = create_all_stats(
index_scheduler.clone().into(),
auth_controller.into(),
&AuthFilter::default(),
) {
// Replace the version number with the prototype name if any.
let version = if let Some(prototype) = build_info::DescribeResult::from_build()
.and_then(|describe| describe.as_prototype())
{
prototype
} else {
env!("CARGO_PKG_VERSION")
};
let _ = self
.batcher
.push(Identify {
context: Some(json!({
"app": {
"version": version.to_string(),
},
})),
user: self.user.clone(),
traits: Self::compute_traits(
&self.opt,
stats,
index_scheduler.features().runtime_features(),
),
..Default::default()
})
.await;
}
// We empty the list of events
let events = std::mem::take(&mut self.events);
for (_, event) in events {
let Event { original, timestamp, user_agents, total } = event;
let name = original.event_name();
let mut properties = original.into_event();
if properties["user-agent"].is_null() {
properties["user-agent"] = json!(user_agents);
};
if properties["requests"]["total_received"].is_null() {
properties["requests"]["total_received"] = total.into();
};
let _ = self
.batcher
.push(Track {
user: self.user.clone(),
event: name.to_string(),
properties,
timestamp: Some(timestamp),
..Default::default()
})
.await;
}
let _ = self.batcher.flush().await;
}
}

View file

@ -0,0 +1,227 @@
use actix_web as aweb;
use aweb::error::{JsonPayloadError, QueryPayloadError};
use byte_unit::{Byte, UnitType};
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
use meilisearch_types::milli::OrderBy;
use serde_json::Value;
use tokio::task::JoinError;
#[derive(Debug, thiserror::Error)]
pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
InvalidContentType(String, Vec<String>),
#[error("Document `{0}` not found.")]
DocumentNotFound(String),
#[error("Sending an empty filter is forbidden.")]
EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
#[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
FederationOptionsInNonFederatedRequest(usize),
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")]
PaginationInFederatedQuery(usize, &'static str),
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
FacetsInFederatedQuery(usize, String, Vec<String>),
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
InconsistentFacetOrder {
facet: String,
previous_facet_order: OrderBy,
previous_uid: String,
index_facet_order: OrderBy,
current_uid: String,
},
#[error("A {0} payload is missing.")]
MissingPayload(PayloadType),
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
TooManySearchRequests(usize),
#[error("Internal error: Search limiter is down.")]
SearchLimiterIsDown,
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_u64(*.0 as u64).get_appropriate_unit(UnitType::Binary))]
PayloadTooLarge(usize),
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
)]
SwapIndexPayloadWrongLength(Vec<IndexUid>),
#[error(transparent)]
IndexUid(#[from] IndexUidFormatError),
#[error(transparent)]
SerdeJson(#[from] serde_json::Error),
#[error(transparent)]
HeedError(#[from] meilisearch_types::heed::Error),
#[error(transparent)]
IndexScheduler(#[from] index_scheduler::Error),
#[error(transparent)]
Milli(#[from] meilisearch_types::milli::Error),
#[error(transparent)]
Payload(#[from] PayloadError),
#[error(transparent)]
FileStore(#[from] file_store::Error),
#[error(transparent)]
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
#[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
MissingSearchHybrid,
}
impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter,
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::TooManySearchRequests(_) => Code::TooManySearchRequests,
MeilisearchHttpError::SearchLimiterIsDown => Code::Internal,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
MeilisearchHttpError::Milli(e) => e.error_code(),
MeilisearchHttpError::Payload(e) => e.error_code(),
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal,
MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => {
Code::InvalidMultiSearchFederationOptions
}
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
Code::InvalidMultiSearchQueryPagination
}
MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets,
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
Code::InvalidMultiSearchFacetOrder
}
}
}
}
impl From<MeilisearchHttpError> for aweb::Error {
fn from(other: MeilisearchHttpError) -> Self {
aweb::Error::from(ResponseError::from(other))
}
}
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
fn from(error: aweb::error::PayloadError) -> Self {
match error {
aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
PayloadError::Payload(ActixPayloadError::IncompleteError),
),
_ => MeilisearchHttpError::Payload(PayloadError::Payload(
ActixPayloadError::OtherError(error),
)),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum ActixPayloadError {
#[error("The provided payload is incomplete and cannot be parsed")]
IncompleteError,
#[error(transparent)]
OtherError(aweb::error::PayloadError),
}
#[derive(Debug, thiserror::Error)]
pub enum PayloadError {
#[error(transparent)]
Payload(ActixPayloadError),
#[error(transparent)]
Json(JsonPayloadError),
#[error(transparent)]
Query(QueryPayloadError),
#[error("The json payload provided is malformed. `{0}`.")]
MalformedPayload(serde_json::error::Error),
#[error("A json payload is missing.")]
MissingPayload,
#[error("Error while receiving the playload. `{0}`.")]
ReceivePayload(Box<dyn std::error::Error + Send + Sync + 'static>),
}
impl ErrorCode for PayloadError {
fn error_code(&self) -> Code {
match self {
PayloadError::Payload(e) => match e {
ActixPayloadError::IncompleteError => Code::BadRequest,
ActixPayloadError::OtherError(error) => match error {
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::Io(_) => Code::Internal,
_ => todo!(),
},
},
PayloadError::Json(err) => match err {
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
JsonPayloadError::ContentType => Code::UnsupportedMediaType,
JsonPayloadError::Payload(aweb::error::PayloadError::Overflow) => {
Code::PayloadTooLarge
}
JsonPayloadError::Payload(_) => Code::BadRequest,
JsonPayloadError::Deserialize(_) => Code::BadRequest,
JsonPayloadError::Serialize(_) => Code::Internal,
_ => Code::Internal,
},
PayloadError::Query(err) => match err {
QueryPayloadError::Deserialize(_) => Code::BadRequest,
_ => Code::Internal,
},
PayloadError::MissingPayload => Code::MissingPayload,
PayloadError::MalformedPayload(_) => Code::MalformedPayload,
PayloadError::ReceivePayload(_) => Code::Internal,
}
}
}
impl From<JsonPayloadError> for PayloadError {
fn from(other: JsonPayloadError) -> Self {
match other {
JsonPayloadError::Deserialize(e)
if e.classify() == serde_json::error::Category::Eof
&& e.line() == 1
&& e.column() == 0 =>
{
Self::MissingPayload
}
JsonPayloadError::Deserialize(e)
if e.classify() != serde_json::error::Category::Data =>
{
Self::MalformedPayload(e)
}
_ => Self::Json(other),
}
}
}
impl From<QueryPayloadError> for PayloadError {
fn from(other: QueryPayloadError) -> Self {
Self::Query(other)
}
}
impl From<PayloadError> for aweb::Error {
fn from(other: PayloadError) -> Self {
aweb::Error::from(ResponseError::from(other))
}
}

View file

@ -0,0 +1,25 @@
use meilisearch_types::error::{Code, ErrorCode};
#[derive(Debug, thiserror::Error)]
pub enum AuthenticationError {
#[error("The Authorization header is missing. It must use the bearer authorization method.")]
MissingAuthorizationHeader,
#[error("The provided API key is invalid.")]
InvalidToken,
// Triggered on configuration error.
#[error("An internal error has occurred. `Irretrievable state`.")]
IrretrievableState,
#[error("Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.")]
MissingMasterKey,
}
impl ErrorCode for AuthenticationError {
fn error_code(&self) -> Code {
match self {
AuthenticationError::MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
AuthenticationError::InvalidToken => Code::InvalidApiKey,
AuthenticationError::IrretrievableState => Code::Internal,
AuthenticationError::MissingMasterKey => Code::MissingMasterKey,
}
}
}

View file

@ -0,0 +1,341 @@
mod error;
use std::marker::PhantomData;
use std::ops::Deref;
use std::pin::Pin;
use actix_web::web::Data;
use actix_web::FromRequest;
pub use error::AuthenticationError;
use futures::future::err;
use futures::Future;
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::error::{Code, ResponseError};
use self::policies::AuthError;
pub struct GuardedData<P, D> {
data: D,
filters: AuthFilter,
_marker: PhantomData<P>,
}
impl<P, D> GuardedData<P, D> {
pub fn filters(&self) -> &AuthFilter {
&self.filters
}
async fn auth_bearer(
auth: Data<AuthController>,
token: String,
index: Option<String>,
data: Option<D>,
) -> Result<Self, ResponseError>
where
P: Policy + 'static,
{
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, token, index).await? {
Ok(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)),
}
}
async fn auth_token(auth: Data<AuthController>, data: Option<D>) -> Result<Self, ResponseError>
where
P: Policy + 'static,
{
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, String::new(), None).await? {
Ok(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()),
}
}
async fn authenticate(
auth: Data<AuthController>,
token: String,
index: Option<String>,
) -> Result<Result<AuthFilter, AuthError>, ResponseError>
where
P: Policy + 'static,
{
tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref()))
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))
}
}
impl<P, D> Deref for GuardedData<P, D> {
type Target = D;
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D> {
type Error = ResponseError;
type Future = Pin<Box<dyn Future<Output = Result<Self, Self::Error>>>>;
fn from_request(
req: &actix_web::HttpRequest,
_payload: &mut actix_web::dev::Payload,
) -> Self::Future {
match req.app_data::<Data<AuthController>>().cloned() {
Some(auth) => match req
.headers()
.get("Authorization")
.map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' '))
{
Some(mut type_token) => match type_token.next() {
Some("Bearer") => {
// TODO: find a less hardcoded way?
let index = req.match_info().get("index_uid");
match type_token.next() {
Some(token) => Box::pin(Self::auth_bearer(
auth,
token.to_string(),
index.map(String::from),
req.app_data::<D>().cloned(),
)),
None => Box::pin(err(AuthenticationError::InvalidToken.into())),
}
}
_otherwise => {
Box::pin(err(AuthenticationError::MissingAuthorizationHeader.into()))
}
},
None => Box::pin(Self::auth_token(auth, req.app_data::<D>().cloned())),
},
None => Box::pin(err(AuthenticationError::IrretrievableState.into())),
}
}
}
pub trait Policy {
fn authenticate(
auth: Data<AuthController>,
token: &str,
index: Option<&str>,
) -> Result<AuthFilter, policies::AuthError>;
}
pub mod policies {
use actix_web::web::Data;
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
use meilisearch_types::error::{Code, ErrorCode};
// reexport actions in policies in order to be used in routes configuration.
pub use meilisearch_types::keys::{actions, Action};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use crate::extractors::authentication::Policy;
enum TenantTokenOutcome {
NotATenantToken,
Valid(Uuid, SearchRules),
}
#[derive(thiserror::Error, Debug)]
pub enum AuthError {
#[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
ExpiredTenantToken { exp: i64, now: i64 },
#[error("The provided API key is invalid.")]
InvalidApiKey,
#[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error(
"The API key used to generate this tenant token cannot acces the index `{index}`."
)]
TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
#[error(
"The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
)]
ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
#[error("The provided tenant token is invalid.")]
InvalidTenantToken,
#[error("Could not decode tenant token, {0}.")]
CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
#[error("Invalid action `{0}`.")]
InternalInvalidAction(u8),
}
impl From<jsonwebtoken::errors::Error> for AuthError {
fn from(error: jsonwebtoken::errors::Error) -> Self {
use jsonwebtoken::errors::ErrorKind;
match error.kind() {
ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
_ => AuthError::CouldNotDecodeTenantToken(error),
}
}
}
impl ErrorCode for AuthError {
fn error_code(&self) -> Code {
match self {
AuthError::InternalInvalidAction(_) => Code::Internal,
_ => Code::InvalidApiKey,
}
}
}
fn tenant_token_validation() -> Validation {
let mut validation = Validation::default();
validation.validate_exp = false;
validation.required_spec_claims.remove("exp");
validation.algorithms = vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512];
validation
}
/// Extracts the key id used to sign the payload, without performing any validation.
fn extract_key_id(token: &str) -> Result<Uuid, AuthError> {
let mut validation = tenant_token_validation();
validation.insecure_disable_signature_validation();
let dummy_key = DecodingKey::from_secret(b"secret");
let token_data = decode::<Claims>(token, &dummy_key, &validation)?;
// get token fields without validating it.
let Claims { api_key_uid, .. } = token_data.claims;
Ok(api_key_uid)
}
fn is_keys_action(action: u8) -> bool {
use actions::*;
matches!(action, KEYS_GET | KEYS_CREATE | KEYS_UPDATE | KEYS_DELETE)
}
pub struct ActionPolicy<const A: u8>;
impl<const A: u8> Policy for ActionPolicy<A> {
/// Attempts to grant authentication from a bearer token (that can be a tenant token or an API key), the requested Action,
/// and a list of requested indexes.
///
/// If the bearer token is not allowed for the specified indexes and action, returns `None`.
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
/// (that may contain more indexes than requested).
fn authenticate(
auth: Data<AuthController>,
token: &str,
index: Option<&str>,
) -> Result<AuthFilter, AuthError> {
// authenticate if token is the master key.
// Without a master key, all routes are accessible except the key-related routes.
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
return Ok(AuthFilter::default());
}
let (key_uuid, search_rules) =
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => {
(key_uuid, Some(search_rules))
}
Ok(TenantTokenOutcome::NotATenantToken)
| Err(AuthError::InvalidTenantToken) => (
auth.get_optional_uid_from_encoded_key(token.as_bytes())
.map_err(|_e| AuthError::InvalidApiKey)?
.ok_or(AuthError::InvalidApiKey)?,
None,
),
Err(e) => return Err(e),
};
// check that the indexes are allowed
let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?;
let auth_filter = auth
.get_key_filters(key_uuid, search_rules)
.map_err(|_e| AuthError::InvalidApiKey)?;
// First check if the index is authorized in the tenant token, this is a public
// information, we can return a nice error message.
if let Some(index) = index {
if !auth_filter.tenant_token_is_index_authorized(index) {
return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.tenant_token_list_index_authorized(),
});
}
if !auth_filter.api_key_is_index_authorized(index) {
if auth_filter.is_tenant_token() {
// If the error comes from a tenant token we cannot share the list
// of authorized indexes in the API key. This is not public information.
return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
});
} else {
// Otherwise we can share the list
// of authorized indexes in the API key.
return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
index: index.to_string(),
allowed: auth_filter.api_key_list_index_authorized(),
});
}
}
}
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
return Ok(auth_filter);
}
Err(AuthError::InvalidApiKey)
}
}
impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token(
auth: &AuthController,
token: &str,
) -> Result<TenantTokenOutcome, AuthError> {
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return Ok(TenantTokenOutcome::NotATenantToken);
}
let uid = extract_key_id(token)?;
// Check if tenant token is valid.
let key = if let Some(key) = auth.generate_key(uid) {
key
} else {
return Err(AuthError::InvalidTenantToken);
};
let data = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
)?;
// Check if token is expired.
if let Some(exp) = data.claims.exp {
let now = OffsetDateTime::now_utc().unix_timestamp();
if now > exp {
return Err(AuthError::ExpiredTenantToken { exp, now });
}
}
Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules))
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Claims {
search_rules: SearchRules,
exp: Option<i64>,
api_key_uid: Uuid,
}
}

View file

@ -0,0 +1,4 @@
pub mod payload;
#[macro_use]
pub mod authentication;
pub mod sequential_extractor;

View file

@ -0,0 +1,110 @@
use std::pin::Pin;
use std::task::{Context, Poll};
use actix_http::encoding::Decoder as Decompress;
use actix_web::{dev, web, FromRequest, HttpRequest};
use futures::future::{ready, Ready};
use futures::Stream;
use crate::error::MeilisearchHttpError;
pub struct Payload {
payload: Decompress<dev::Payload>,
limit: usize,
remaining: usize,
}
pub struct PayloadConfig {
limit: usize,
}
impl PayloadConfig {
pub fn new(limit: usize) -> Self {
Self { limit }
}
}
impl Default for PayloadConfig {
fn default() -> Self {
Self { limit: 256 * 1024 }
}
}
impl FromRequest for Payload {
type Error = MeilisearchHttpError;
type Future = Ready<Result<Payload, Self::Error>>;
#[inline]
fn from_request(req: &HttpRequest, payload: &mut dev::Payload) -> Self::Future {
let limit = req
.app_data::<PayloadConfig>()
.map(|c| c.limit)
.unwrap_or(PayloadConfig::default().limit);
ready(Ok(Payload {
payload: Decompress::from_headers(payload.take(), req.headers()),
limit,
remaining: limit,
}))
}
}
impl Stream for Payload {
type Item = Result<web::Bytes, MeilisearchHttpError>;
#[inline]
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.payload).poll_next(cx) {
Poll::Ready(Some(result)) => match result {
Ok(bytes) => match self.remaining.checked_sub(bytes.len()) {
Some(new_limit) => {
self.remaining = new_limit;
Poll::Ready(Some(Ok(bytes)))
}
None => {
Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge(self.limit))))
}
},
x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))),
},
otherwise => otherwise.map(|o| o.map(|o| o.map_err(MeilisearchHttpError::from))),
}
}
}
#[cfg(test)]
mod tests {
use actix_http::encoding::Decoder as Decompress;
use actix_http::BoxedPayloadStream;
use bytes::Bytes;
use futures_util::StreamExt;
use meili_snap::snapshot;
use super::*;
#[actix_rt::test]
async fn payload_to_large() {
let stream = futures::stream::iter(vec![
Ok(Bytes::from("1")),
Ok(Bytes::from("2")),
Ok(Bytes::from("3")),
Ok(Bytes::from("4")),
]);
let boxed_stream: BoxedPayloadStream = Box::pin(stream);
let actix_payload = dev::Payload::from(boxed_stream);
let payload = Payload {
limit: 3,
remaining: 3,
payload: Decompress::new(actix_payload, actix_http::ContentEncoding::Identity),
};
let mut enumerated_payload_stream = payload.enumerate();
while let Some((idx, chunk)) = enumerated_payload_stream.next().await {
if idx == 3 {
snapshot!(chunk.unwrap_err(), @"The provided payload reached the size limit. The maximum accepted payload size is 3 B.");
}
}
}
}

View file

@ -0,0 +1,152 @@
#![allow(non_snake_case)]
use std::future::Future;
use std::pin::Pin;
use std::task::Poll;
use actix_web::dev::Payload;
use actix_web::{FromRequest, Handler, HttpRequest};
use pin_project_lite::pin_project;
/// `SeqHandler` is an actix `Handler` that enforces that extractors errors are returned in the
/// same order as they are defined in the wrapped handler. This is needed because, by default, actix
/// resolves the extractors concurrently, whereas we always need the authentication extractor to
/// throw first.
#[derive(Clone)]
pub struct SeqHandler<H>(pub H);
pub struct SeqFromRequest<T>(T);
/// This macro implements `FromRequest` for arbitrary arity handler, except for one, which is
/// useless anyway.
macro_rules! gen_seq {
($ty:ident; $($T:ident)+) => {
pin_project! {
pub struct $ty<$($T: FromRequest), +> {
$(
#[pin]
$T: ExtractFuture<$T::Future, $T, $T::Error>,
)+
}
}
impl<$($T: FromRequest), +> Future for $ty<$($T),+> {
type Output = Result<SeqFromRequest<($($T),+)>, actix_web::Error>;
fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> {
let mut this = self.project();
let mut count_fut = 0;
let mut count_finished = 0;
$(
count_fut += 1;
match this.$T.as_mut().project() {
ExtractProj::Future { fut } => match fut.poll(cx) {
Poll::Ready(Ok(output)) => {
count_finished += 1;
let _ = this
.$T
.as_mut()
.project_replace(ExtractFuture::Done { output });
}
Poll::Ready(Err(error)) => {
count_finished += 1;
let _ = this
.$T
.as_mut()
.project_replace(ExtractFuture::Error { error });
}
Poll::Pending => (),
},
ExtractProj::Done { .. } => count_finished += 1,
ExtractProj::Error { .. } => {
// short circuit if all previous are finished and we had an error.
if count_finished == count_fut {
match this.$T.project_replace(ExtractFuture::Empty) {
ExtractReplaceProj::Error { error } => {
return Poll::Ready(Err(error.into()))
}
_ => unreachable!("Invalid future state"),
}
} else {
count_finished += 1;
}
}
ExtractProj::Empty => unreachable!("From request polled after being finished. {}", stringify!($T)),
}
)+
if count_fut == count_finished {
let result = (
$(
match this.$T.project_replace(ExtractFuture::Empty) {
ExtractReplaceProj::Done { output } => output,
ExtractReplaceProj::Error { error } => return Poll::Ready(Err(error.into())),
_ => unreachable!("Invalid future state"),
},
)+
);
Poll::Ready(Ok(SeqFromRequest(result)))
} else {
Poll::Pending
}
}
}
impl<$($T: FromRequest,)+> FromRequest for SeqFromRequest<($($T,)+)> {
type Error = actix_web::Error;
type Future = $ty<$($T),+>;
fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future {
$ty {
$(
$T: ExtractFuture::Future {
fut: $T::from_request(req, payload),
},
)+
}
}
}
impl<Han, $($T: FromRequest),+> Handler<SeqFromRequest<($($T),+)>> for SeqHandler<Han>
where
Han: Handler<($($T),+)>,
{
type Output = Han::Output;
type Future = Han::Future;
fn call(&self, args: SeqFromRequest<($($T),+)>) -> Self::Future {
self.0.call(args.0)
}
}
};
}
// Not working for a single argument, but then, it is not really necessary.
// gen_seq! { SeqFromRequestFut1; A }
gen_seq! { SeqFromRequestFut2; A B }
gen_seq! { SeqFromRequestFut3; A B C }
gen_seq! { SeqFromRequestFut4; A B C D }
gen_seq! { SeqFromRequestFut5; A B C D E }
gen_seq! { SeqFromRequestFut6; A B C D E F }
gen_seq! { SeqFromRequestFut7; A B C D E F G }
pin_project! {
#[project = ExtractProj]
#[project_replace = ExtractReplaceProj]
enum ExtractFuture<Fut, Res, Err> {
Future {
#[pin]
fut: Fut,
},
Done {
output: Res,
},
Error {
error: Err,
},
Empty,
}
}

View file

@ -0,0 +1,545 @@
#![allow(rustdoc::private_intra_doc_links)]
#[macro_use]
pub mod error;
pub mod analytics;
#[macro_use]
pub mod extractors;
pub mod metrics;
pub mod middleware;
pub mod option;
pub mod routes;
pub mod search;
pub mod search_queue;
use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use actix_cors::Cors;
use actix_http::body::MessageBody;
use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::error::JsonPayloadError;
use actix_web::http::header::{CONTENT_TYPE, USER_AGENT};
use actix_web::web::Data;
use actix_web::{web, HttpRequest};
use analytics::Analytics;
use anyhow::bail;
use error::PayloadError;
use extractors::payload::PayloadConfig;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::versioning::{check_version_file, create_current_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use search_queue::SearchQueue;
use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError;
/// Default number of simultaneously opened indexes.
///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// Lower for Windows that dedicates a smaller virtual address space to processes.
///
/// The value was chosen this way:
///
/// - Windows provides a small virtual address space of about 10TiB to processes.
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
#[cfg(windows)]
const DEFAULT_INDEX_COUNT: usize = 4;
/// Default number of simultaneously opened indexes.
///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// The higher, the better for avoiding reopening indexes.
///
/// The value was chosen this way:
///
/// - Opening an index consumes a file descriptor.
/// - The default on many unices is about 256 file descriptors for a process.
/// - 100 is a little bit less than half this value.
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
#[cfg(not(windows))]
const DEFAULT_INDEX_COUNT: usize = 20;
/// Check if a db is empty. It does not provide any information on the
/// validity of the data in it.
/// We consider a database as non empty when it's a non empty directory.
fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
let db_path = db_path.as_ref();
if !db_path.exists() {
true
// if we encounter an error or if the db is a file we consider the db non empty
} else if let Ok(dir) = db_path.read_dir() {
dir.count() == 0
} else {
true
}
}
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub type SubscriberForSecondLayer = tracing_subscriber::layer::Layered<
tracing_subscriber::reload::Layer<LogRouteType, tracing_subscriber::Registry>,
tracing_subscriber::Registry,
>;
pub type LogStderrHandle =
tracing_subscriber::reload::Handle<LogStderrType, SubscriberForSecondLayer>;
pub type LogStderrType = tracing_subscriber::filter::Filtered<
Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
Targets,
SubscriberForSecondLayer,
>;
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
search_queue: Data<SearchQueue>,
opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
impl ServiceFactory<
actix_web::dev::ServiceRequest,
Config = (),
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
InitError = (),
>,
> {
let app = actix_web::App::new()
.configure(|s| {
configure_data(
s,
index_scheduler.clone(),
auth_controller.clone(),
search_queue.clone(),
&opt,
logs,
analytics.clone(),
)
})
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
let app = app.wrap(middleware::RouteMetrics);
app.wrap(
Cors::default()
.send_wildcard()
.allow_any_header()
.allow_any_origin()
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
}
struct AwebTracingLogger;
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
use tracing::field::Empty;
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
}
fn on_request_end<B: MessageBody>(
span: tracing::Span,
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
) {
match &outcome {
Ok(response) => {
let code: i32 = response.response().status().as_u16().into();
span.record("status_code", code);
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
}
};
}
}
enum OnFailure {
RemoveDb,
KeepDb,
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists();
// the db is empty and the snapshot exists, import it
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
// the db already exists and we should not ignore the snapshot => throw an error
} else if !empty_db && !opt.ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
)
// the snapshot doesn't exist and we can't ignore it => throw an error
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
open_or_create_database(opt, empty_db)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
// the db is empty and the dump exists, import it
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) =
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
// the db already exists and we should not ignore the dump option => throw an error
} else if !empty_db && !opt.ignore_dump_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
)
// the dump doesn't exist and we can't ignore it => throw an error
} else if !src_path_exists && !opt.ignore_missing_dump {
bail!("dump doesn't exist at {:?}", path)
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
open_or_create_database(opt, empty_db)?
}
} else {
open_or_create_database(opt, empty_db)?
};
// We create a loop in a thread that registers snapshotCreation tasks
let index_scheduler = Arc::new(index_scheduler);
let auth_controller = Arc::new(auth_controller);
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
let snapshot_delay = Duration::from_secs(snapshot_delay);
let index_scheduler = index_scheduler.clone();
thread::Builder::new()
.name(String::from("register-snapshot-tasks"))
.spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) =
index_scheduler.register(KindWithContent::SnapshotCreation, None, false)
{
error!("Error while registering snapshot: {}", e);
}
})
.unwrap();
}
Ok((index_scheduler, auth_controller))
}
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
fn open_or_create_database_unchecked(
opt: &Opt,
on_failure: OnFailure,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let instance_features = opt.to_instance_features();
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
task_db_size: opt.max_task_db_size.as_u64() as usize,
index_base_map_size: opt.max_index_size.as_u64() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
})?)
};
match (
index_scheduler_builder(),
auth_controller.map_err(anyhow::Error::from),
create_current_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
if matches!(on_failure, OnFailure::RemoveDb) {
std::fs::remove_dir_all(&opt.db_path)?;
}
Err(e)
}
}
}
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
fn open_or_create_database(
opt: &Opt,
empty_db: bool,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
if !empty_db {
check_version_file(&opt.db_path)?;
}
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
}
fn import_dump(
db_path: &Path,
dump_path: &Path,
index_scheduler: &mut IndexScheduler,
auth: &mut AuthController,
) -> Result<(), anyhow::Error> {
let reader = File::open(dump_path)?;
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
%date,
"Importing a dump of meilisearch"
);
} else {
tracing::info!(
version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
"Importing a dump of meilisearch",
);
}
let instance_uid = dump_reader.instance_uid()?;
// 1. Import the instance-uid.
if let Some(ref instance_uid) = instance_uid {
// we don't want to panic if there is an error with the instance-uid.
let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes());
};
// 2. Import the `Key`s.
let mut keys = Vec::new();
auth.raw_delete_all_keys()?;
for key in dump_reader.keys()? {
let key = key?;
auth.raw_insert_key(key.clone())?;
keys.push(key);
}
// 3. Import the runtime features.
let features = dump_reader.features()?.unwrap_or_default();
index_scheduler.put_runtime_features(features)?;
let indexer_config = index_scheduler.indexer_config();
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
// try to process tasks while we're trying to import the indexes.
// 4. Import the indexes.
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
tracing::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
let mut wtxn = index.write_txn()?;
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
// 4.1 Import the primary key if there is one.
if let Some(ref primary_key) = metadata.primary_key {
builder.set_primary_key(primary_key.to_string());
}
// 4.2 Import the settings.
tracing::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
builder
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 4.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
wtxn.commit()?;
tracing::info!("All documents successfully imported.");
}
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
// 5. Import the tasks.
for ret in dump_reader.tasks()? {
let (task, file) = ret?;
index_scheduler_dump.register_dumped_task(task, file)?;
}
Ok(index_scheduler_dump.finish()?)
}
pub fn configure_data(
config: &mut web::ServiceConfig,
index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>,
search_queue: Data<SearchQueue>,
opt: &Opt,
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
) {
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(search_queue)
.app_data(analytics)
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data(web::Data::new(opt.clone()))
.app_data(
web::JsonConfig::default()
.limit(http_payload_size_limit)
.content_type(|mime| mime == mime::APPLICATION_JSON)
.error_handler(|err, req: &HttpRequest| match err {
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
Some(content_type) => MeilisearchHttpError::InvalidContentType(
content_type.to_str().unwrap_or("unknown").to_string(),
vec![mime::APPLICATION_JSON.to_string()],
)
.into(),
None => MeilisearchHttpError::MissingContentType(vec![
mime::APPLICATION_JSON.to_string(),
])
.into(),
},
err => PayloadError::from(err).into(),
}),
)
.app_data(PayloadConfig::new(http_payload_size_limit))
.app_data(
web::QueryConfig::default().error_handler(|err, _req| PayloadError::from(err).into()),
);
}
#[cfg(feature = "mini-dashboard")]
pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
use actix_web::HttpResponse;
use static_files::Resource;
mod generated {
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
}
if enable_frontend {
let generated = generated::generate();
// Generate routes for mini-dashboard assets
for (path, resource) in generated.into_iter() {
let Resource { mime_type, data, .. } = resource;
// Redirect index.html to /
if path == "index.html" {
config.service(web::resource("/").route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
} else {
config.service(web::resource(path).route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
}
}
} else {
config.service(web::resource("/").route(web::get().to(routes::running)));
}
}
#[cfg(not(feature = "mini-dashboard"))]
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
config.service(web::resource("/").route(web::get().to(routes::running)));
}

View file

@ -0,0 +1,331 @@
use std::env;
use std::io::{stderr, LineWriter, Write};
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::thread::available_parallelism;
use std::time::Duration;
use actix_web::http::KeepAlive;
use actix_web::web::Data;
use actix_web::HttpServer;
use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics;
use meilisearch::option::LogMode;
use meilisearch::search_queue::SearchQueue;
use meilisearch::{
analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
LogStderrType, Opt, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator]
static ALLOC: MiMalloc = MiMalloc;
fn default_log_route_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
fn default_log_stderr_layer(opt: &Opt) -> LogStderrType {
let layer = tracing_subscriber::fmt::layer()
.with_writer(|| LineWriter::new(std::io::stderr()))
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let layer = match opt.experimental_logs_mode {
LogMode::Human => Box::new(layer)
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
LogMode::Json => Box::new(layer.json())
as Box<dyn tracing_subscriber::Layer<SubscriberForSecondLayer> + Send + Sync>,
};
layer.with_filter(
tracing_subscriber::filter::Targets::new()
.with_target("", LevelFilter::from_str(&opt.log_level.to_string()).unwrap()),
)
}
/// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<(LogRouteHandle, LogStderrHandle)> {
let (route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(default_log_route_layer());
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let (stderr_layer, stderr_layer_handle) =
tracing_subscriber::reload::Layer::new(default_log_stderr_layer(opt));
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let subscriber = tracing_subscriber::registry().with(route_layer).with(stderr_layer);
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
Ok((route_layer_handle, stderr_layer_handle))
}
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string().replace('\n', " ");
tracing::error!(%info);
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
try_main().await.inspect_err(|error| {
tracing::error!(%error);
let mut current = error.source();
let mut depth = 0;
while let Some(source) = current {
tracing::info!(%source, depth, "Error caused by");
current = source.source();
depth += 1;
}
})
}
async fn try_main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
);
let log_handle = setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) {
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
anyhow::bail!(
"The master key must be at least {MASTER_KEY_MIN_SIZE} bytes in a production environment. The provided key is only {} bytes.
{}",
master_key.len(),
generated_master_key_message(),
)
}
("production", None) => {
anyhow::bail!(
"You must provide a master key to secure your instance in a production environment. It can be specified via the MEILI_MASTER_KEY environment variable or the --master-key launch option.
{}",
generated_master_key_message()
)
}
// No error; continue
_ => (),
}
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
let analytics =
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;
print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;
Ok(())
}
async fn run_http(
index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>,
opt: Opt,
logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<Analytics>,
) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone();
let index_scheduler = Data::from(index_scheduler);
let auth_controller = Data::from(auth_controller);
let analytics = Data::from(analytics);
let search_queue = SearchQueue::new(
opt.experimental_search_queue_size,
available_parallelism()
.unwrap_or(NonZeroUsize::new(2).unwrap())
.checked_mul(opt.experimental_nb_searches_per_core)
.unwrap_or(NonZeroUsize::MAX),
)
.with_time_to_abort(Duration::from_secs(
usize::from(opt.experimental_drop_search_after) as u64
));
let search_queue = Data::new(search_queue);
let http_server = HttpServer::new(move || {
create_app(
index_scheduler.clone(),
auth_controller.clone(),
search_queue.clone(),
opt.clone(),
logs.clone(),
analytics.clone(),
enable_dashboard,
)
})
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals()
.keep_alive(KeepAlive::Os);
if let Some(config) = opt_clone.get_ssl_config()? {
http_server.bind_rustls_0_23(opt_clone.http_addr, config)?.run().await?;
} else {
http_server.bind(&opt_clone.http_addr)?.run().await?;
}
Ok(())
}
pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Option<PathBuf>) {
let build_info = build_info::BuildInfo::from_build();
let protocol =
if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" };
let ascii_name = r#"
888b d888 d8b 888 d8b 888
8888b d8888 Y8P 888 Y8P 888
88888b.d88888 888 888
888Y88888P888 .d88b. 888 888 888 .d8888b .d88b. 8888b. 888d888 .d8888b 88888b.
888 Y888P 888 d8P Y8b 888 888 888 88K d8P Y8b "88b 888P" d88P" 888 "88b
888 Y8P 888 88888888 888 888 888 "Y8888b. 88888888 .d888888 888 888 888 888
888 " 888 Y8b. 888 888 888 X88 Y8b. 888 888 888 Y88b. 888 888
888 888 "Y8888 888 888 888 88888P' "Y8888 "Y888888 888 "Y8888P 888 888
"#;
eprintln!("{}", ascii_name);
eprintln!(
"Config file path:\t{:?}",
config_read_from
.map(|config_file_path| config_file_path.display().to_string())
.unwrap_or_else(|| "none".to_string())
);
eprintln!("Database path:\t\t{:?}", opt.db_path);
eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
eprintln!("Environment:\t\t{:?}", opt.env);
eprintln!("Commit SHA:\t\t{:?}", build_info.commit_sha1.unwrap_or("unknown"));
eprintln!(
"Commit date:\t\t{:?}",
build_info
.commit_timestamp
.and_then(|commit_timestamp| commit_timestamp
.format(&time::format_description::well_known::Rfc3339)
.ok())
.unwrap_or("unknown".into())
);
eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
if let Some(prototype) = build_info.describe.and_then(|describe| describe.as_prototype()) {
eprintln!("Prototype:\t\t{:?}", prototype);
}
{
if !opt.no_analytics {
eprintln!(
"
Thank you for using Meilisearch!
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry
Anonymous telemetry:\t\"Enabled\""
);
} else {
eprintln!("Anonymous telemetry:\t\"Disabled\"");
}
}
if let Some(instance_uid) = analytics.instance_uid() {
eprintln!("Instance UID:\t\t\"{}\"", instance_uid);
}
eprintln!();
match (opt.env.as_ref(), &opt.master_key) {
("production", Some(_)) => {
eprintln!("A master key has been set. Requests to Meilisearch won't be authorized unless you provide an authentication key.");
}
("development", Some(master_key)) => {
eprintln!("A master key has been set. Requests to Meilisearch won't be authorized unless you provide an authentication key.");
if master_key.len() < MASTER_KEY_MIN_SIZE {
print_master_key_too_short_warning()
}
}
("development", None) => print_missing_master_key_warning(),
// unreachable because Opt::try_build above would have failed already if any other value had been produced
_ => unreachable!(),
}
eprintln!();
eprintln!("Check out Meilisearch Cloud!\thttps://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=engine&utm_medium=cli");
eprintln!("Documentation:\t\t\thttps://www.meilisearch.com/docs");
eprintln!("Source code:\t\t\thttps://github.com/meilisearch/meilisearch");
eprintln!("Discord:\t\t\thttps://discord.meilisearch.com");
eprintln!();
}
const WARNING_BG_COLOR: Option<Color> = Some(Color::Ansi256(178));
const WARNING_FG_COLOR: Option<Color> = Some(Color::Ansi256(0));
fn print_master_key_too_short_warning() {
let choice = if stderr().is_terminal() { ColorChoice::Auto } else { ColorChoice::Never };
let mut stderr = StandardStream::stderr(choice);
stderr
.set_color(
ColorSpec::new().set_bg(WARNING_BG_COLOR).set_fg(WARNING_FG_COLOR).set_bold(true),
)
.unwrap();
writeln!(stderr, "\n").unwrap();
writeln!(
stderr,
" Meilisearch started with a master key considered unsafe for use in a production environment.
A master key of at least {MASTER_KEY_MIN_SIZE} bytes will be required when switching to a production environment."
)
.unwrap();
stderr.reset().unwrap();
writeln!(stderr).unwrap();
eprintln!("\n{}", generated_master_key_message());
eprintln!(
"\nRestart Meilisearch with the argument above to use this new and secure master key."
)
}
fn print_missing_master_key_warning() {
let choice = if stderr().is_terminal() { ColorChoice::Auto } else { ColorChoice::Never };
let mut stderr = StandardStream::stderr(choice);
stderr
.set_color(
ColorSpec::new().set_bg(WARNING_BG_COLOR).set_fg(WARNING_FG_COLOR).set_bold(true),
)
.unwrap();
writeln!(stderr, "\n").unwrap();
writeln!(
stderr,
" No master key was found. The server will accept unidentified requests.
A master key of at least {MASTER_KEY_MIN_SIZE} bytes will be required when switching to a production environment."
)
.unwrap();
stderr.reset().unwrap();
writeln!(stderr).unwrap();
eprintln!("\n{}", generated_master_key_message());
eprintln!(
"\nRestart Meilisearch with the argument above to use this new and secure master key."
)
}
fn generated_master_key_message() -> String {
format!(
"We generated a new secure master key for you (you can safely use this token):
>> --master-key {} <<",
generate_master_key()
)
}

View file

@ -0,0 +1,52 @@
use lazy_static::lazy_static;
use prometheus::{
opts, register_histogram_vec, register_int_counter_vec, register_int_gauge,
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
};
lazy_static! {
pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"),
&["method", "path", "status"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_DEGRADED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
"meilisearch_degraded_search_requests",
"Meilisearch number of degraded search requests"
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
.expect("Can't create a metric");
pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!(
"meilisearch_used_db_size_bytes",
"Meilisearch Used DB Size In Bytes"
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
.expect("Can't create a metric");
pub static ref MEILISEARCH_INDEX_DOCS_COUNT: IntGaugeVec = register_int_gauge_vec!(
opts!("meilisearch_index_docs_count", "Meilisearch Index Docs Count"),
&["index"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
"meilisearch_http_response_time_seconds",
"Meilisearch HTTP response times",
&["method", "path"],
vec![0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!(
opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"),
&["kind", "value"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_LAST_UPDATE: IntGauge =
register_int_gauge!(opts!("meilisearch_last_update", "Meilisearch Last Update"))
.expect("Can't create a metric");
pub static ref MEILISEARCH_IS_INDEXING: IntGauge =
register_int_gauge!(opts!("meilisearch_is_indexing", "Meilisearch Is Indexing"))
.expect("Can't create a metric");
}

View file

@ -0,0 +1,89 @@
//! Contains all the custom middleware used in meilisearch
use std::future::{ready, Ready};
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
use actix_web::web::Data;
use actix_web::Error;
use futures_util::future::LocalBoxFuture;
use index_scheduler::IndexScheduler;
use prometheus::HistogramTimer;
pub struct RouteMetrics;
// Middleware factory is `Transform` trait from actix-service crate
// `S` - type of the next service
// `B` - type of response's body
impl<S, B> Transform<S, ServiceRequest> for RouteMetrics
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = Error;
type InitError = ();
type Transform = RouteMetricsMiddleware<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ready(Ok(RouteMetricsMiddleware { service }))
}
}
pub struct RouteMetricsMiddleware<S> {
service: S,
}
impl<S, B> Service<ServiceRequest> for RouteMetricsMiddleware<S>
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = Error;
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
dev::forward_ready!(service);
fn call(&self, req: ServiceRequest) -> Self::Future {
let mut histogram_timer: Option<HistogramTimer> = None;
// calling unwrap here is safe because index scheduler is added to app data while creating actix app.
// also, the tests will fail if this is not present.
let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
let features = index_scheduler.features();
let request_path = req.path();
let request_pattern = req.match_pattern();
let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str).to_string();
let request_method = req.method().to_string();
if features.check_metrics().is_ok() {
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, &metric_path])
.start_timer(),
);
}
};
let fut = self.service.call(req);
Box::pin(async move {
let res = fut.await?;
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, &metric_path, res.status().as_str()])
.inc();
if let Some(histogram_timer) = histogram_timer {
histogram_timer.observe_duration();
};
Ok(res)
})
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,180 @@
use std::str;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use meilisearch_auth::error::AuthControllerError;
use meilisearch_auth::AuthController;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::keys::{CreateApiKey, Key, PatchApiKey};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use super::PAGINATION_DEFAULT_LIMIT;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::Pagination;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::post().to(SeqHandler(create_api_key)))
.route(web::get().to(SeqHandler(list_api_keys))),
)
.service(
web::resource("/{key}")
.route(web::get().to(SeqHandler(get_api_key)))
.route(web::patch().to(SeqHandler(patch_api_key)))
.route(web::delete().to(SeqHandler(delete_api_key))),
);
}
pub async fn create_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, Data<AuthController>>,
body: AwebJson<CreateApiKey, DeserrJsonError>,
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let v = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let key = auth_controller.create_key(v)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Created().json(res))
}
#[derive(Deserr, Debug, Clone, Copy)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct ListApiKeys {
#[deserr(default, error = DeserrQueryParamError<InvalidApiKeyOffset>)]
pub offset: Param<usize>,
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidApiKeyLimit>)]
pub limit: Param<usize>,
}
impl ListApiKeys {
fn as_pagination(self) -> Pagination {
Pagination { offset: self.offset.0, limit: self.limit.0 }
}
}
pub async fn list_api_keys(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let paginate = list_api_keys.into_inner().as_pagination();
let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let keys = auth_controller.list_keys()?;
let page_view = paginate
.auto_paginate_sized(keys.into_iter().map(|k| KeyView::from_key(k, &auth_controller)));
Ok(page_view)
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(page_view))
}
pub async fn get_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.get_key(uid)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(res))
}
pub async fn patch_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, Data<AuthController>>,
body: AwebJson<PatchApiKey, DeserrJsonError>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
let patch_api_key = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.update_key(uid, patch_api_key)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(res))
}
pub async fn delete_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, Data<AuthController>>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
tokio::task::spawn_blocking(move || {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
auth_controller.delete_key(uid)
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::NoContent().finish())
}
#[derive(Deserialize)]
pub struct AuthParam {
key: String,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct KeyView {
name: Option<String>,
description: Option<String>,
key: String,
uid: Uuid,
actions: Vec<Action>,
indexes: Vec<String>,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
expires_at: Option<OffsetDateTime>,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
created_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
updated_at: OffsetDateTime,
}
impl KeyView {
fn from_key(key: Key, auth: &AuthController) -> Self {
let generated_key = auth.generate_key(key.uid).unwrap_or_default();
KeyView {
name: key.name,
description: key.description,
key: generated_key,
uid: key.uid,
actions: key.actions,
indexes: key.indexes.into_iter().map(|x| x.to_string()).collect(),
expires_at: key.expires_at,
created_at: key.created_at,
updated_at: key.updated_at,
}
}
}

View file

@ -0,0 +1,44 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::Opt;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
}
crate::empty_analytics!(DumpAnalytics, "Dump Created");
pub async fn create_dump(
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish(DumpAnalytics::default(), &req);
let task = KindWithContent::DumpCreation {
keys: auth_controller.list_keys()?,
instance_uid: analytics.instance_uid().cloned(),
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Create dump");
Ok(HttpResponse::Accepted().json(task))
}

View file

@ -0,0 +1,130 @@
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use tracing::debug;
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(get_features))
.route(web::patch().to(SeqHandler(patch_features))),
);
}
async fn get_features(
index_scheduler: GuardedData<
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
Data<IndexScheduler>,
>,
) -> HttpResponse {
let features = index_scheduler.features();
let features = features.runtime_features();
debug!(returns = ?features, "Get features");
HttpResponse::Ok().json(features)
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct RuntimeTogglableFeatures {
#[deserr(default)]
pub vector_store: Option<bool>,
#[deserr(default)]
pub metrics: Option<bool>,
#[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub edit_documents_by_function: Option<bool>,
#[deserr(default)]
pub contains_filter: Option<bool>,
}
#[derive(Serialize)]
pub struct PatchExperimentalFeatureAnalytics {
vector_store: bool,
metrics: bool,
logs_route: bool,
edit_documents_by_function: bool,
contains_filter: bool,
}
impl Aggregate for PatchExperimentalFeatureAnalytics {
fn event_name(&self) -> &'static str {
"Experimental features Updated"
}
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
Box::new(Self {
vector_store: new.vector_store,
metrics: new.metrics,
logs_route: new.logs_route,
edit_documents_by_function: new.edit_documents_by_function,
contains_filter: new.contains_filter,
})
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
async fn patch_features(
index_scheduler: GuardedData<
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
Data<IndexScheduler>,
>,
new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
req: HttpRequest,
analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features();
debug!(parameters = ?new_features, "Patch features");
let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
edit_documents_by_function: new_features
.0
.edit_documents_by_function
.unwrap_or(old_features.edit_documents_by_function),
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures {
vector_store,
metrics,
logs_route,
edit_documents_by_function,
contains_filter,
} = new_features;
analytics.publish(
PatchExperimentalFeatureAnalytics {
vector_store,
metrics,
logs_route,
edit_documents_by_function,
contains_filter,
},
&req,
);
index_scheduler.put_runtime_features(new_features)?;
debug!(returns = ?new_features, "Patch features");
Ok(HttpResponse::Ok().json(new_features))
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,260 @@
use std::collections::{BinaryHeap, HashSet};
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use serde_json::Value;
use tracing::debug;
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy,
RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
use crate::search_queue::SearchQueue;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(search)));
}
/// # Important
///
/// Intentionally don't use `deny_unknown_fields` to ignore search parameters sent by user
#[derive(Debug, Clone, Default, PartialEq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase)]
pub struct FacetSearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
pub facet_query: Option<String>,
#[deserr(error = DeserrJsonError<InvalidFacetSearchFacetName>, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)]
pub facet_name: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
pub vector: Option<Vec<f32>>,
#[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
pub hybrid: Option<HybridQuery>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>,
}
#[derive(Default)]
pub struct FacetSearchAggregator {
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// The set of all facetNames that were used
facet_names: HashSet<String>,
// As there been any other parameter than the facetName or facetQuery ones?
additional_search_parameters_provided: bool,
}
impl FacetSearchAggregator {
#[allow(clippy::field_reassign_with_default)]
pub fn from_query(query: &FacetSearchQuery) -> Self {
let FacetSearchQuery {
facet_query: _,
facet_name,
vector,
q,
filter,
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
locales,
} = query;
Self {
total_received: 1,
facet_names: Some(facet_name.clone()).into_iter().collect(),
additional_search_parameters_provided: q.is_some()
|| vector.is_some()
|| filter.is_some()
|| *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some()
|| hybrid.is_some()
|| ranking_score_threshold.is_some()
|| locales.is_some(),
..Default::default()
}
}
pub fn succeed(&mut self, result: &FacetSearchResult) {
let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result;
self.total_succeeded = 1;
self.time_spent.push(*processing_time_ms as usize);
}
}
impl Aggregate for FacetSearchAggregator {
fn event_name(&self) -> &'static str {
"Facet Searched POST"
}
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
for time in new.time_spent {
self.time_spent.push(time);
}
Box::new(Self {
total_received: self.total_received.saturating_add(new.total_received),
total_succeeded: self.total_succeeded.saturating_add(new.total_succeeded),
time_spent: self.time_spent,
facet_names: self.facet_names.union(&new.facet_names).cloned().collect(),
additional_search_parameters_provided: self.additional_search_parameters_provided
| new.additional_search_parameters_provided,
})
}
fn into_event(self: Box<Self>) -> serde_json::Value {
let Self {
total_received,
total_succeeded,
time_spent,
facet_names,
additional_search_parameters_provided,
} = *self;
// the index of the 99th percentage of value
let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.;
// we get all the values in a sorted manner
let time_spent = time_spent.into_sorted_vec();
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th as usize);
serde_json::json!({
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
},
"facets": {
"total_distinct_facet_count": facet_names.len(),
"additional_search_parameters_provided": additional_search_parameters_provided,
},
})
}
}
pub async fn search(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: Data<SearchQueue>,
index_uid: web::Path<String>,
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!(parameters = ?query, "Facet search");
let mut aggregate = FacetSearchAggregator::from_query(&query);
let facet_query = query.facet_query.clone();
let facet_name = query.facet_name.clone();
let locales = query.locales.clone().map(|l| l.into_iter().map(Into::into).collect());
let mut search_query = SearchQuery::from(query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut search_query.filter, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(
&index,
search_query,
facet_query,
facet_name,
search_kind,
index_scheduler.features(),
locales,
)
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
debug!(returns = ?search_result, "Facet search");
Ok(HttpResponse::Ok().json(search_result))
}
impl From<FacetSearchQuery> for SearchQuery {
fn from(value: FacetSearchQuery) -> Self {
let FacetSearchQuery {
facet_query: _,
facet_name: _,
q,
vector,
filter,
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
locales,
} = value;
SearchQuery {
q,
offset: DEFAULT_SEARCH_OFFSET(),
limit: DEFAULT_SEARCH_LIMIT(),
page: None,
hits_per_page: None,
attributes_to_retrieve: None,
retrieve_vectors: false,
attributes_to_crop: None,
crop_length: DEFAULT_CROP_LENGTH(),
attributes_to_highlight: None,
show_matches_position: false,
show_ranking_score: false,
show_ranking_score_details: false,
filter,
sort: None,
distinct: None,
facets: None,
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(),
matching_strategy,
vector,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
locales,
}
}
}

View file

@ -0,0 +1,318 @@
use std::collections::BTreeSet;
use std::convert::Infallible;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use meilisearch_types::tasks::KindWithContent;
use serde::Serialize;
use time::OffsetDateTime;
use tracing::debug;
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::is_dry_run;
use crate::Opt;
pub mod documents;
pub mod facet_search;
pub mod search;
mod search_analytics;
pub mod settings;
mod settings_analytics;
pub mod similar;
mod similar_analytics;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(list_indexes))
.route(web::post().to(SeqHandler(create_index))),
)
.service(
web::scope("/{index_uid}")
.service(
web::resource("")
.route(web::get().to(SeqHandler(get_index)))
.route(web::patch().to(SeqHandler(update_index)))
.route(web::delete().to(SeqHandler(delete_index))),
)
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/similar").configure(similar::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexView {
pub uid: String,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
impl IndexView {
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
// It is important that this function does not keep the Index handle or a clone of it, because
// `list_indexes` relies on this property to avoid opening all indexes at once.
let rtxn = index.read_txn()?;
Ok(IndexView {
uid,
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
primary_key: index.primary_key(&rtxn)?.map(String::from),
})
}
}
#[derive(Deserr, Debug, Clone, Copy)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct ListIndexes {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexOffset>)]
pub offset: Param<usize>,
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidIndexLimit>)]
pub limit: Param<usize>,
}
impl ListIndexes {
fn as_pagination(self) -> Pagination {
Pagination { offset: self.offset.0, limit: self.limit.0 }
}
}
pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
if !filters.is_index_authorized(uid) {
return Ok(None);
}
Ok(Some(IndexView::new(uid.to_string(), index)?))
})?;
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!(returns = ?ret, "List indexes");
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Deserr, Debug)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct IndexCreateRequest {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
uid: IndexUid,
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
primary_key: Option<String>,
}
#[derive(Serialize)]
struct IndexCreatedAggregate {
primary_key: BTreeSet<String>,
}
impl Aggregate for IndexCreatedAggregate {
fn event_name(&self) -> &'static str {
"Index Created"
}
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
pub async fn create_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
if allow_index_creation {
analytics.publish(
IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() },
&req,
);
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Create index");
Ok(HttpResponse::Accepted().json(task))
} else {
Err(AuthenticationError::InvalidToken.into())
}
}
fn deny_immutable_fields_index(
field: &str,
accepted: &[&str],
location: ValuePointerRef,
) -> DeserrJsonError {
match field {
"uid" => immutable_field_error(field, accepted, Code::ImmutableIndexUid),
"createdAt" => immutable_field_error(field, accepted, Code::ImmutableIndexCreatedAt),
"updatedAt" => immutable_field_error(field, accepted, Code::ImmutableIndexUpdatedAt),
_ => deserr::take_cf_content(DeserrJsonError::<BadRequest>::error::<Infallible>(
None,
deserr::ErrorKind::UnknownKey { key: field, accepted },
location,
)),
}
}
#[derive(Deserr, Debug)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_index)]
pub struct UpdateIndexRequest {
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
primary_key: Option<String>,
}
pub async fn get_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!(returns = ?index_view, "Get index");
Ok(HttpResponse::Ok().json(index_view))
}
#[derive(Serialize)]
struct IndexUpdatedAggregate {
primary_key: BTreeSet<String>,
}
impl Aggregate for IndexUpdatedAggregate {
fn event_name(&self) -> &'static str {
"Index Updated"
}
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() })
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
pub async fn update_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Update index");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
analytics.publish(
IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() },
&req,
);
let task = KindWithContent::IndexUpdate {
index_uid: index_uid.into_inner(),
primary_key: body.primary_key,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Update index");
Ok(HttpResponse::Accepted().json(task))
}
pub async fn delete_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
opt: web::Data<Opt>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Delete index");
Ok(HttpResponse::Accepted().json(task))
}
/// Stats of an `Index`, as known to the `stats` route.
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
/// Number of documents in the index
pub number_of_documents: u64,
/// Whether the index is currently performing indexation, according to the scheduler.
pub is_indexing: bool,
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
}
impl From<index_scheduler::IndexStats> for IndexStats {
fn from(stats: index_scheduler::IndexStats) -> Self {
IndexStats {
number_of_documents: stats.inner_stats.number_of_documents,
is_indexing: stats.is_indexing,
field_distribution: stats.inner_stats.field_distribution,
}
}
}
pub async fn get_index_stats(
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
debug!(returns = ?stats, "Get index stats");
Ok(HttpResponse::Ok().json(stats))
}

View file

@ -0,0 +1,385 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::{IndexScheduler, RoFeatures};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(search_with_url_query)))
.route(web::post().to(SeqHandler(search_with_post))),
);
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
q: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchVector>)]
vector: Option<CS<f32>>,
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
offset: Param<usize>,
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPage>)]
page: Option<Param<usize>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchHitsPerPage>)]
hits_per_page: Option<Param<usize>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
attributes_to_retrieve: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
attributes_to_crop: Option<CS<String>>,
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
crop_length: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToHighlight>)]
attributes_to_highlight: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
sort: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)]
distinct: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
facets: Option<CS<String>>,
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
highlight_pre_tag: String,
#[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPostTag>)]
highlight_post_tag: String,
#[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError<InvalidSearchCropMarker>)]
crop_marker: String,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
pub attributes_to_search_on: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub hybrid_embedder: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
pub locales: Option<CS<Locale>>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
pub struct SemanticRatioGet(SemanticRatio);
impl std::convert::TryFrom<String> for SemanticRatioGet {
type Error = InvalidSearchSemanticRatio;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
}
}
impl std::ops::Deref for SemanticRatioGet {
type Target = SemanticRatio;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl TryFrom<SearchQueryGet> for SearchQuery {
type Error = ResponseError;
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
let filter = match other.filter {
Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v),
_ => Some(Value::String(f)),
},
None => None,
};
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
(None, None) => None,
(None, Some(_)) => {
return Err(ResponseError::from_msg(
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
meilisearch_types::error::Code::InvalidHybridQuery,
));
}
(Some(embedder), None) => {
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
}
(Some(embedder), Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
}
};
if other.vector.is_some() && hybrid.is_none() {
return Err(ResponseError::from_msg(
"`hybridEmbedder` is mandatory when `vector` is present".into(),
meilisearch_types::error::Code::MissingSearchHybrid,
));
}
Ok(Self {
q: other.q,
vector: other.vector.map(CS::into_inner),
offset: other.offset.0,
limit: other.limit.0,
page: other.page.as_deref().copied(),
hits_per_page: other.hits_per_page.as_deref().copied(),
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
retrieve_vectors: other.retrieve_vectors.0,
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
crop_length: other.crop_length.0,
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
distinct: other.distinct,
show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0,
facets: other.facets.map(|o| o.into_iter().collect()),
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,
crop_marker: other.crop_marker,
matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()),
})
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
let mut sort_parameters = Vec::new();
let mut merge = false;
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
if current_sort.starts_with("_geoPoint(") {
sort_parameters.push(current_sort.to_string());
merge = true;
} else if merge && !sort_parameters.is_empty() {
let s = sort_parameters.last_mut().unwrap();
s.push(',');
s.push_str(current_sort);
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
merge = false;
}
} else {
sort_parameters.push(current_sort.to_string());
merge = false;
}
}
sort_parameters
}
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Search get");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().try_into()?;
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
}
let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query);
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
debug!(returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result))
}
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner();
debug!(parameters = ?query, "Search post");
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
}
let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query);
let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features();
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
})
.await;
permit.drop().await;
let search_result = search_result?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
if search_result.degraded {
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
}
}
analytics.publish(aggregate, &req);
let search_result = search_result?;
debug!(returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result))
}
pub fn search_kind(
query: &SearchQuery,
index_scheduler: &IndexScheduler,
index: &milli::Index,
features: RoFeatures,
) -> Result<SearchKind, ResponseError> {
if query.vector.is_some() {
features.check_vector("Passing `vector` as a parameter")?;
}
if query.hybrid.is_some() {
features.check_vector("Passing `hybrid` as a parameter")?;
}
// handle with care, the order of cases matters, the semantics is subtle
match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
// empty query, no vector => placeholder search
(Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
// no query, no vector => placeholder search
(None, _, None) => Ok(SearchKind::KeywordOnly),
// hybrid.semantic_ratio == 1.0 => vector
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
}
// hybrid.semantic_ratio == 0.0 => keyword
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
Ok(SearchKind::KeywordOnly)
}
// no query, hybrid, vector => semantic
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
}
// query, no hybrid, no vector => keyword
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
// query, hybrid, maybe vector => hybrid
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
index_scheduler,
index,
embedder,
**semantic_ratio,
v.map(|v| v.len()),
),
(_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_fix_sort_query_parameters() {
let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc");
assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]);
let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc");
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]);
let sort = fix_sort_query_parameters(
"doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc",
);
assert_eq!(
sort,
vec![
"doggo:asc".to_string(),
"_geoPoint(12.45,13.56,2590352):desc".to_string(),
"catto:desc".to_string(),
]
);
let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc");
// This is ugly but eh, I don't want to write a full parser just for this unused route
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]);
}
}

View file

@ -0,0 +1,485 @@
use once_cell::sync::Lazy;
use regex::Regex;
use serde_json::{json, Value};
use std::collections::{BTreeSet, BinaryHeap, HashMap};
use meilisearch_types::locales::Locale;
use crate::{
aggregate_methods,
analytics::{Aggregate, AggregateMethod},
search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEMANTIC_RATIO,
},
};
aggregate_methods!(
SearchGET => "Documents Searched GET",
SearchPOST => "Documents Searched POST",
);
#[derive(Default)]
pub struct SearchAggregator<Method: AggregateMethod> {
// requests
total_received: usize,
total_succeeded: usize,
total_degraded: usize,
total_used_negative_operator: usize,
time_spent: BinaryHeap<usize>,
// sort
sort_with_geo_point: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
sort_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize,
// distinct
distinct: bool,
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
filter_total_number_of_criteria: usize,
used_syntax: HashMap<String, usize>,
// attributes_to_search_on
// every time a search is done using attributes_to_search_on
attributes_to_search_on_total_number_of_uses: usize,
// q
// The maximum number of terms in a q request
max_terms_number: usize,
// vector
// The maximum number of floats in a vector request
max_vector_size: usize,
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
semantic_ratio: bool,
hybrid: bool,
retrieve_vectors: bool,
// every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>,
// List of the unique Locales passed as parameter
locales: BTreeSet<Locale>,
// pagination
max_limit: usize,
max_offset: usize,
finite_pagination: usize,
// formatting
max_attributes_to_retrieve: usize,
max_attributes_to_highlight: usize,
highlight_pre_tag: bool,
highlight_post_tag: bool,
max_attributes_to_crop: usize,
crop_marker: bool,
show_matches_position: bool,
crop_length: bool,
// facets
facets_sum_of_terms: usize,
facets_total_number_of_facets: usize,
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
ranking_score_threshold: bool,
marker: std::marker::PhantomData<Method>,
}
impl<Method: AggregateMethod> SearchAggregator<Method> {
#[allow(clippy::field_reassign_with_default)]
pub fn from_query(query: &SearchQuery) -> Self {
let SearchQuery {
q,
vector,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve: _,
retrieve_vectors,
attributes_to_crop: _,
crop_length,
attributes_to_highlight: _,
show_matches_position,
show_ranking_score,
show_ranking_score_details,
filter,
sort,
distinct,
facets: _,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
locales,
} = query;
let mut ret = Self::default();
ret.total_received = 1;
if let Some(ref sort) = sort {
ret.sort_total_number_of_criteria = 1;
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
ret.sort_sum_of_criteria_terms = sort.len();
}
ret.distinct = distinct.is_some();
if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1;
let syntax = match filter {
Value::String(_) => "string".to_string(),
Value::Array(values) => {
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
"mixed".to_string()
} else {
"array".to_string()
}
}
_ => "none".to_string(),
};
// convert the string to a HashMap
ret.used_syntax.insert(syntax, 1);
let stringified_filters = filter.to_string();
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
}
// attributes_to_search_on
if attributes_to_search_on.is_some() {
ret.attributes_to_search_on_total_number_of_uses = 1;
}
if let Some(ref q) = q {
ret.max_terms_number = q.split_whitespace().count();
}
if let Some(ref vector) = vector {
ret.max_vector_size = vector.len();
}
ret.retrieve_vectors |= retrieve_vectors;
if query.is_finite_pagination() {
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
ret.max_limit = limit;
ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
ret.finite_pagination = 1;
} else {
ret.max_limit = *limit;
ret.max_offset = *offset;
ret.finite_pagination = 0;
}
ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
if let Some(locales) = locales {
ret.locales = locales.iter().copied().collect();
}
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
ret.show_matches_position = *show_matches_position;
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
ret.hybrid = true;
}
ret
}
pub fn succeed(&mut self, result: &SearchResult) {
let SearchResult {
hits: _,
query: _,
processing_time_ms,
hits_info: _,
semantic_hit_count: _,
facet_distribution: _,
facet_stats: _,
degraded,
used_negative_operator,
} = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
if *degraded {
self.total_degraded = self.total_degraded.saturating_add(1);
}
if *used_negative_operator {
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
}
self.time_spent.push(*processing_time_ms as usize);
}
}
impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
}
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
let Self {
total_received,
total_succeeded,
mut time_spent,
sort_with_geo_point,
sort_sum_of_criteria_terms,
sort_total_number_of_criteria,
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
attributes_to_search_on_total_number_of_uses,
max_terms_number,
max_vector_size,
retrieve_vectors,
matching_strategy,
max_limit,
max_offset,
finite_pagination,
max_attributes_to_retrieve,
max_attributes_to_highlight,
highlight_pre_tag,
highlight_post_tag,
max_attributes_to_crop,
crop_marker,
show_matches_position,
crop_length,
facets_sum_of_terms,
facets_total_number_of_facets,
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
hybrid,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
mut locales,
marker: _,
} = *new;
// request
self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
self.total_used_negative_operator =
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
self.time_spent.append(&mut time_spent);
// sort
self.sort_with_geo_point |= sort_with_geo_point;
self.sort_sum_of_criteria_terms =
self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
self.sort_total_number_of_criteria =
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
// distinct
self.distinct |= distinct;
// filter
self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
self.filter_sum_of_criteria_terms =
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
self.filter_total_number_of_criteria =
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
for (key, value) in used_syntax.into_iter() {
let used_syntax = self.used_syntax.entry(key).or_insert(0);
*used_syntax = used_syntax.saturating_add(value);
}
// attributes_to_search_on
self.attributes_to_search_on_total_number_of_uses = self
.attributes_to_search_on_total_number_of_uses
.saturating_add(attributes_to_search_on_total_number_of_uses);
// q
self.max_terms_number = self.max_terms_number.max(max_terms_number);
// vector
self.max_vector_size = self.max_vector_size.max(max_vector_size);
self.retrieve_vectors |= retrieve_vectors;
self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid;
// pagination
self.max_limit = self.max_limit.max(max_limit);
self.max_offset = self.max_offset.max(max_offset);
self.finite_pagination += finite_pagination;
// formatting
self.max_attributes_to_retrieve =
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
self.max_attributes_to_highlight =
self.max_attributes_to_highlight.max(max_attributes_to_highlight);
self.highlight_pre_tag |= highlight_pre_tag;
self.highlight_post_tag |= highlight_post_tag;
self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
self.crop_marker |= crop_marker;
self.show_matches_position |= show_matches_position;
self.crop_length |= crop_length;
// facets
self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
self.facets_total_number_of_facets =
self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
// matching strategy
for (key, value) in matching_strategy.into_iter() {
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
*matching_strategy = matching_strategy.saturating_add(value);
}
// scoring
self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
// locales
self.locales.append(&mut locales);
self
}
fn into_event(self: Box<Self>) -> serde_json::Value {
let Self {
total_received,
total_succeeded,
time_spent,
sort_with_geo_point,
sort_sum_of_criteria_terms,
sort_total_number_of_criteria,
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
attributes_to_search_on_total_number_of_uses,
max_terms_number,
max_vector_size,
retrieve_vectors,
matching_strategy,
max_limit,
max_offset,
finite_pagination,
max_attributes_to_retrieve,
max_attributes_to_highlight,
highlight_pre_tag,
highlight_post_tag,
max_attributes_to_crop,
crop_marker,
show_matches_position,
crop_length,
facets_sum_of_terms,
facets_total_number_of_facets,
show_ranking_score,
show_ranking_score_details,
semantic_ratio,
hybrid,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
locales,
marker: _,
} = *self;
// we get all the values in a sorted manner
let time_spent = time_spent.into_sorted_vec();
// the index of the 99th percentage of value
let percentile_99th = time_spent.len() * 99 / 100;
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th);
json!({
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
"total_degraded": total_degraded,
"total_used_negative_operator": total_used_negative_operator,
},
"sort": {
"with_geoPoint": sort_with_geo_point,
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
},
"distinct": distinct,
"filter": {
"with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box,
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},
"attributes_to_search_on": {
"total_number_of_uses": attributes_to_search_on_total_number_of_uses,
},
"q": {
"max_terms_number": max_terms_number,
},
"vector": {
"max_vector_size": max_vector_size,
"retrieve_vectors": retrieve_vectors,
},
"hybrid": {
"enabled": hybrid,
"semantic_ratio": semantic_ratio,
},
"pagination": {
"max_limit": max_limit,
"max_offset": max_offset,
"most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
},
"formatting": {
"max_attributes_to_retrieve": max_attributes_to_retrieve,
"max_attributes_to_highlight": max_attributes_to_highlight,
"highlight_pre_tag": highlight_pre_tag,
"highlight_post_tag": highlight_post_tag,
"max_attributes_to_crop": max_attributes_to_crop,
"crop_marker": crop_marker,
"show_matches_position": show_matches_position,
"crop_length": crop_length,
},
"facets": {
"avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
},
"matching_strategy": {
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},
"locales": locales,
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
})
}
}

View file

@ -0,0 +1,532 @@
use super::settings_analytics::*;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::Opt;
#[macro_export]
macro_rules! make_setting_route {
($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => {
pub mod $attr {
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData;
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::Opt;
use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView};
pub async fn delete(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: web::Path<String>,
req: HttpRequest,
opt: web::Data<Opt>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
let allow_index_creation =
index_scheduler.filters().allow_index_creation(&index_uid);
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
new_settings: Box::new(new_settings),
is_deletion: true,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Delete settings");
Ok(HttpResponse::Accepted().json(task))
}
pub async fn update(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> std::result::Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner();
debug!(parameters = ?body, "Update settings");
#[allow(clippy::redundant_closure_call)]
analytics.publish(
$crate::routes::indexes::settings_analytics::$analytics::new(body.as_ref()).into_settings(),
&req,
);
let new_settings = Settings {
$attr: match body {
Some(inner_body) => Setting::Set(inner_body).into(),
None => Setting::Reset.into(),
},
..Default::default()
};
let new_settings = $crate::routes::indexes::settings::validate_settings(
new_settings,
&index_scheduler,
)?;
let allow_index_creation =
index_scheduler.filters().allow_index_creation(&index_uid);
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
new_settings: Box::new(new_settings),
is_deletion: false,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Update settings");
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_GET }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?;
debug!(returns = ?settings, "Update settings");
Ok(HttpResponse::Ok().json(settings.$attr))
}
pub fn resources() -> Resource {
Resource::new($route)
.route(web::get().to(SeqHandler(get)))
.route(web::$update_verb().to(SeqHandler(update)))
.route(web::delete().to(SeqHandler(delete)))
}
}
};
}
make_setting_route!(
"/filterable-attributes",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
>,
filterable_attributes,
"filterableAttributes",
FilterableAttributesAnalytics
);
make_setting_route!(
"/sortable-attributes",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
>,
sortable_attributes,
"sortableAttributes",
SortableAttributesAnalytics
);
make_setting_route!(
"/displayed-attributes",
put,
Vec<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
>,
displayed_attributes,
"displayedAttributes",
DisplayedAttributesAnalytics
);
make_setting_route!(
"/typo-tolerance",
patch,
meilisearch_types::settings::TypoSettings,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
>,
typo_tolerance,
"typoTolerance",
TypoToleranceAnalytics
);
make_setting_route!(
"/searchable-attributes",
put,
Vec<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
>,
searchable_attributes,
"searchableAttributes",
SearchableAttributesAnalytics
);
make_setting_route!(
"/stop-words",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
>,
stop_words,
"stopWords",
StopWordsAnalytics
);
make_setting_route!(
"/non-separator-tokens",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
>,
non_separator_tokens,
"nonSeparatorTokens",
NonSeparatorTokensAnalytics
);
make_setting_route!(
"/separator-tokens",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
>,
separator_tokens,
"separatorTokens",
SeparatorTokensAnalytics
);
make_setting_route!(
"/dictionary",
put,
std::collections::BTreeSet<String>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
>,
dictionary,
"dictionary",
DictionaryAnalytics
);
make_setting_route!(
"/synonyms",
put,
std::collections::BTreeMap<String, Vec<String>>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
>,
synonyms,
"synonyms",
SynonymsAnalytics
);
make_setting_route!(
"/distinct-attribute",
put,
String,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
>,
distinct_attribute,
"distinctAttribute",
DistinctAttributeAnalytics
);
make_setting_route!(
"/proximity-precision",
put,
meilisearch_types::settings::ProximityPrecisionView,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
>,
proximity_precision,
"proximityPrecision",
ProximityPrecisionAnalytics
);
make_setting_route!(
"/localized-attributes",
put,
Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
>,
localized_attributes,
"localizedAttributes",
LocalesAnalytics
);
make_setting_route!(
"/ranking-rules",
put,
Vec<meilisearch_types::settings::RankingRuleView>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
>,
ranking_rules,
"rankingRules",
RankingRulesAnalytics
);
make_setting_route!(
"/faceting",
patch,
meilisearch_types::settings::FacetingSettings,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
>,
faceting,
"faceting",
FacetingAnalytics
);
make_setting_route!(
"/pagination",
patch,
meilisearch_types::settings::PaginationSettings,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
>,
pagination,
"pagination",
PaginationAnalytics
);
make_setting_route!(
"/embedders",
patch,
std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
>,
embedders,
"embedders",
EmbeddersAnalytics
);
make_setting_route!(
"/search-cutoff-ms",
put,
u64,
meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
>,
search_cutoff_ms,
"searchCutoffMs",
SearchCutoffMsAnalytics
);
macro_rules! generate_configure {
($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) {
use crate::extractors::sequential_extractor::SeqHandler;
cfg.service(
web::resource("")
.route(web::patch().to(SeqHandler(update_all)))
.route(web::get().to(SeqHandler(get_all)))
.route(web::delete().to(SeqHandler(delete_all))))
$(.service($mod::resources()))*;
}
};
}
generate_configure!(
filterable_attributes,
sortable_attributes,
displayed_attributes,
localized_attributes,
searchable_attributes,
distinct_attribute,
proximity_precision,
stop_words,
separator_tokens,
non_separator_tokens,
dictionary,
synonyms,
ranking_rules,
typo_tolerance,
pagination,
faceting,
embedders,
search_cutoff_ms
);
pub async fn update_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = body.into_inner();
debug!(parameters = ?new_settings, "Update all settings");
let new_settings = validate_settings(new_settings, &index_scheduler)?;
analytics.publish(
SettingsAnalytics {
ranking_rules: RankingRulesAnalytics::new(new_settings.ranking_rules.as_ref().set()),
searchable_attributes: SearchableAttributesAnalytics::new(
new_settings.searchable_attributes.as_ref().set(),
),
displayed_attributes: DisplayedAttributesAnalytics::new(
new_settings.displayed_attributes.as_ref().set(),
),
sortable_attributes: SortableAttributesAnalytics::new(
new_settings.sortable_attributes.as_ref().set(),
),
filterable_attributes: FilterableAttributesAnalytics::new(
new_settings.filterable_attributes.as_ref().set(),
),
distinct_attribute: DistinctAttributeAnalytics::new(
new_settings.distinct_attribute.as_ref().set(),
),
proximity_precision: ProximityPrecisionAnalytics::new(
new_settings.proximity_precision.as_ref().set(),
),
typo_tolerance: TypoToleranceAnalytics::new(new_settings.typo_tolerance.as_ref().set()),
faceting: FacetingAnalytics::new(new_settings.faceting.as_ref().set()),
pagination: PaginationAnalytics::new(new_settings.pagination.as_ref().set()),
stop_words: StopWordsAnalytics::new(new_settings.stop_words.as_ref().set()),
synonyms: SynonymsAnalytics::new(new_settings.synonyms.as_ref().set()),
embedders: EmbeddersAnalytics::new(new_settings.embedders.as_ref().set()),
search_cutoff_ms: SearchCutoffMsAnalytics::new(
new_settings.search_cutoff_ms.as_ref().set(),
),
locales: LocalesAnalytics::new(new_settings.localized_attributes.as_ref().set()),
dictionary: DictionaryAnalytics::new(new_settings.dictionary.as_ref().set()),
separator_tokens: SeparatorTokensAnalytics::new(
new_settings.separator_tokens.as_ref().set(),
),
non_separator_tokens: NonSeparatorTokensAnalytics::new(
new_settings.non_separator_tokens.as_ref().set(),
),
},
&req,
);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: false,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Update all settings");
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn, SecretPolicy::HideSecrets)?;
debug!(returns = ?new_settings, "Get all settings");
Ok(HttpResponse::Ok().json(new_settings))
}
pub async fn delete_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
opt: web::Data<Opt>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = Settings::cleared().into_unchecked();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: true,
allow_index_creation,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Delete all settings");
Ok(HttpResponse::Accepted().json(task))
}
fn validate_settings(
settings: Settings<Unchecked>,
index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> {
if matches!(settings.embedders, Setting::Set(_)) {
index_scheduler.features().check_vector("Passing `embedders` in settings")?
}
Ok(settings.validate()?)
}

View file

@ -0,0 +1,621 @@
//! All the structures used to make the analytics on the settings works.
//! The signatures of the `new` functions are not very rust idiomatic because they must match the types received
//! through the sub-settings route directly without any manipulation.
//! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`.
use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::settings::{
FacetingSettings, PaginationSettings, ProximityPrecisionView, TypoSettings,
};
use meilisearch_types::{facet_values_sort::FacetValuesSort, settings::RankingRuleView};
use serde::Serialize;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use crate::analytics::Aggregate;
#[derive(Serialize, Default)]
pub struct SettingsAnalytics {
pub ranking_rules: RankingRulesAnalytics,
pub searchable_attributes: SearchableAttributesAnalytics,
pub displayed_attributes: DisplayedAttributesAnalytics,
pub sortable_attributes: SortableAttributesAnalytics,
pub filterable_attributes: FilterableAttributesAnalytics,
pub distinct_attribute: DistinctAttributeAnalytics,
pub proximity_precision: ProximityPrecisionAnalytics,
pub typo_tolerance: TypoToleranceAnalytics,
pub faceting: FacetingAnalytics,
pub pagination: PaginationAnalytics,
pub stop_words: StopWordsAnalytics,
pub synonyms: SynonymsAnalytics,
pub embedders: EmbeddersAnalytics,
pub search_cutoff_ms: SearchCutoffMsAnalytics,
pub locales: LocalesAnalytics,
pub dictionary: DictionaryAnalytics,
pub separator_tokens: SeparatorTokensAnalytics,
pub non_separator_tokens: NonSeparatorTokensAnalytics,
}
impl Aggregate for SettingsAnalytics {
fn event_name(&self) -> &'static str {
"Settings Updated"
}
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
Box::new(Self {
ranking_rules: RankingRulesAnalytics {
words_position: new
.ranking_rules
.words_position
.or(self.ranking_rules.words_position),
typo_position: new.ranking_rules.typo_position.or(self.ranking_rules.typo_position),
proximity_position: new
.ranking_rules
.proximity_position
.or(self.ranking_rules.proximity_position),
attribute_position: new
.ranking_rules
.attribute_position
.or(self.ranking_rules.attribute_position),
sort_position: new.ranking_rules.sort_position.or(self.ranking_rules.sort_position),
exactness_position: new
.ranking_rules
.exactness_position
.or(self.ranking_rules.exactness_position),
values: new.ranking_rules.values.or(self.ranking_rules.values),
},
searchable_attributes: SearchableAttributesAnalytics {
total: new.searchable_attributes.total.or(self.searchable_attributes.total),
with_wildcard: new
.searchable_attributes
.with_wildcard
.or(self.searchable_attributes.with_wildcard),
},
displayed_attributes: DisplayedAttributesAnalytics {
total: new.displayed_attributes.total.or(self.displayed_attributes.total),
with_wildcard: new
.displayed_attributes
.with_wildcard
.or(self.displayed_attributes.with_wildcard),
},
sortable_attributes: SortableAttributesAnalytics {
total: new.sortable_attributes.total.or(self.sortable_attributes.total),
has_geo: new.sortable_attributes.has_geo.or(self.sortable_attributes.has_geo),
},
filterable_attributes: FilterableAttributesAnalytics {
total: new.filterable_attributes.total.or(self.filterable_attributes.total),
has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo),
},
distinct_attribute: DistinctAttributeAnalytics {
set: self.distinct_attribute.set | new.distinct_attribute.set,
},
proximity_precision: ProximityPrecisionAnalytics {
set: self.proximity_precision.set | new.proximity_precision.set,
value: new.proximity_precision.value.or(self.proximity_precision.value),
},
typo_tolerance: TypoToleranceAnalytics {
enabled: new.typo_tolerance.enabled.or(self.typo_tolerance.enabled),
disable_on_attributes: new
.typo_tolerance
.disable_on_attributes
.or(self.typo_tolerance.disable_on_attributes),
disable_on_words: new
.typo_tolerance
.disable_on_words
.or(self.typo_tolerance.disable_on_words),
min_word_size_for_one_typo: new
.typo_tolerance
.min_word_size_for_one_typo
.or(self.typo_tolerance.min_word_size_for_one_typo),
min_word_size_for_two_typos: new
.typo_tolerance
.min_word_size_for_two_typos
.or(self.typo_tolerance.min_word_size_for_two_typos),
},
faceting: FacetingAnalytics {
max_values_per_facet: new
.faceting
.max_values_per_facet
.or(self.faceting.max_values_per_facet),
sort_facet_values_by_star_count: new
.faceting
.sort_facet_values_by_star_count
.or(self.faceting.sort_facet_values_by_star_count),
sort_facet_values_by_total: new
.faceting
.sort_facet_values_by_total
.or(self.faceting.sort_facet_values_by_total),
},
pagination: PaginationAnalytics {
max_total_hits: new.pagination.max_total_hits.or(self.pagination.max_total_hits),
},
stop_words: StopWordsAnalytics {
total: new.stop_words.total.or(self.stop_words.total),
},
synonyms: SynonymsAnalytics { total: new.synonyms.total.or(self.synonyms.total) },
embedders: EmbeddersAnalytics {
total: new.embedders.total.or(self.embedders.total),
sources: match (self.embedders.sources, new.embedders.sources) {
(None, None) => None,
(Some(sources), None) | (None, Some(sources)) => Some(sources),
(Some(this), Some(other)) => Some(this.union(&other).cloned().collect()),
},
document_template_used: match (
self.embedders.document_template_used,
new.embedders.document_template_used,
) {
(None, None) => None,
(Some(used), None) | (None, Some(used)) => Some(used),
(Some(this), Some(other)) => Some(this | other),
},
document_template_max_bytes: match (
self.embedders.document_template_max_bytes,
new.embedders.document_template_max_bytes,
) {
(None, None) => None,
(Some(bytes), None) | (None, Some(bytes)) => Some(bytes),
(Some(this), Some(other)) => Some(this.max(other)),
},
binary_quantization_used: match (
self.embedders.binary_quantization_used,
new.embedders.binary_quantization_used,
) {
(None, None) => None,
(Some(bq), None) | (None, Some(bq)) => Some(bq),
(Some(this), Some(other)) => Some(this | other),
},
},
search_cutoff_ms: SearchCutoffMsAnalytics {
search_cutoff_ms: new
.search_cutoff_ms
.search_cutoff_ms
.or(self.search_cutoff_ms.search_cutoff_ms),
},
locales: LocalesAnalytics { locales: new.locales.locales.or(self.locales.locales) },
dictionary: DictionaryAnalytics {
total: new.dictionary.total.or(self.dictionary.total),
},
separator_tokens: SeparatorTokensAnalytics {
total: new.non_separator_tokens.total.or(self.separator_tokens.total),
},
non_separator_tokens: NonSeparatorTokensAnalytics {
total: new.non_separator_tokens.total.or(self.non_separator_tokens.total),
},
})
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
#[derive(Serialize, Default)]
pub struct RankingRulesAnalytics {
pub words_position: Option<usize>,
pub typo_position: Option<usize>,
pub proximity_position: Option<usize>,
pub attribute_position: Option<usize>,
pub sort_position: Option<usize>,
pub exactness_position: Option<usize>,
pub values: Option<String>,
}
impl RankingRulesAnalytics {
pub fn new(rr: Option<&Vec<RankingRuleView>>) -> Self {
RankingRulesAnalytics {
words_position: rr.as_ref().and_then(|rr| {
rr.iter()
.position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))
}),
typo_position: rr.as_ref().and_then(|rr| {
rr.iter()
.position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))
}),
proximity_position: rr.as_ref().and_then(|rr| {
rr.iter().position(|s| {
matches!(s, meilisearch_types::settings::RankingRuleView::Proximity)
})
}),
attribute_position: rr.as_ref().and_then(|rr| {
rr.iter().position(|s| {
matches!(s, meilisearch_types::settings::RankingRuleView::Attribute)
})
}),
sort_position: rr.as_ref().and_then(|rr| {
rr.iter()
.position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))
}),
exactness_position: rr.as_ref().and_then(|rr| {
rr.iter().position(|s| {
matches!(s, meilisearch_types::settings::RankingRuleView::Exactness)
})
}),
values: rr.as_ref().map(|rr| {
rr.iter()
.filter(|s| {
matches!(
s,
meilisearch_types::settings::RankingRuleView::Asc(_)
| meilisearch_types::settings::RankingRuleView::Desc(_)
)
})
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", ")
}),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { ranking_rules: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct SearchableAttributesAnalytics {
pub total: Option<usize>,
pub with_wildcard: Option<bool>,
}
impl SearchableAttributesAnalytics {
pub fn new(setting: Option<&Vec<String>>) -> Self {
Self {
total: setting.as_ref().map(|searchable| searchable.len()),
with_wildcard: setting
.as_ref()
.map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { searchable_attributes: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct DisplayedAttributesAnalytics {
pub total: Option<usize>,
pub with_wildcard: Option<bool>,
}
impl DisplayedAttributesAnalytics {
pub fn new(displayed: Option<&Vec<String>>) -> Self {
Self {
total: displayed.as_ref().map(|displayed| displayed.len()),
with_wildcard: displayed
.as_ref()
.map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { displayed_attributes: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct SortableAttributesAnalytics {
pub total: Option<usize>,
pub has_geo: Option<bool>,
}
impl SortableAttributesAnalytics {
pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
Self {
total: setting.as_ref().map(|sort| sort.len()),
has_geo: setting.as_ref().map(|sort| sort.contains("_geo")),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { sortable_attributes: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct FilterableAttributesAnalytics {
pub total: Option<usize>,
pub has_geo: Option<bool>,
}
impl FilterableAttributesAnalytics {
pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
Self {
total: setting.as_ref().map(|filter| filter.len()),
has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { filterable_attributes: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct DistinctAttributeAnalytics {
pub set: bool,
}
impl DistinctAttributeAnalytics {
pub fn new(distinct: Option<&String>) -> Self {
Self { set: distinct.is_some() }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { distinct_attribute: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct ProximityPrecisionAnalytics {
pub set: bool,
pub value: Option<ProximityPrecisionView>,
}
impl ProximityPrecisionAnalytics {
pub fn new(precision: Option<&ProximityPrecisionView>) -> Self {
Self { set: precision.is_some(), value: precision.cloned() }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { proximity_precision: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct TypoToleranceAnalytics {
pub enabled: Option<bool>,
pub disable_on_attributes: Option<bool>,
pub disable_on_words: Option<bool>,
pub min_word_size_for_one_typo: Option<u8>,
pub min_word_size_for_two_typos: Option<u8>,
}
impl TypoToleranceAnalytics {
pub fn new(setting: Option<&TypoSettings>) -> Self {
Self {
enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
disable_on_attributes: setting
.as_ref()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
disable_on_words: setting
.as_ref()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
min_word_size_for_one_typo: setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set()))
.flatten(),
min_word_size_for_two_typos: setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set()))
.flatten(),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { typo_tolerance: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct FacetingAnalytics {
pub max_values_per_facet: Option<usize>,
pub sort_facet_values_by_star_count: Option<bool>,
pub sort_facet_values_by_total: Option<usize>,
}
impl FacetingAnalytics {
pub fn new(setting: Option<&FacetingSettings>) -> Self {
Self {
max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
sort_facet_values_by_star_count: setting.as_ref().and_then(|s| {
s.sort_facet_values_by
.as_ref()
.set()
.map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count))
}),
sort_facet_values_by_total: setting
.as_ref()
.and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { faceting: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct PaginationAnalytics {
pub max_total_hits: Option<usize>,
}
impl PaginationAnalytics {
pub fn new(setting: Option<&PaginationSettings>) -> Self {
Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { pagination: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct StopWordsAnalytics {
pub total: Option<usize>,
}
impl StopWordsAnalytics {
pub fn new(stop_words: Option<&BTreeSet<String>>) -> Self {
Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { stop_words: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct SynonymsAnalytics {
pub total: Option<usize>,
}
impl SynonymsAnalytics {
pub fn new(synonyms: Option<&BTreeMap<String, Vec<String>>>) -> Self {
Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { synonyms: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct EmbeddersAnalytics {
// last
pub total: Option<usize>,
// Merge the sources
pub sources: Option<HashSet<String>>,
// |=
pub document_template_used: Option<bool>,
// max
pub document_template_max_bytes: Option<usize>,
// |=
pub binary_quantization_used: Option<bool>,
}
impl EmbeddersAnalytics {
pub fn new(setting: Option<&BTreeMap<String, Setting<EmbeddingSettings>>>) -> Self {
let mut sources = std::collections::HashSet::new();
if let Some(s) = &setting {
for source in s
.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.source.set())
{
use meilisearch_types::milli::vector::settings::EmbedderSource;
match source {
EmbedderSource::OpenAi => sources.insert("openAi".to_string()),
EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()),
EmbedderSource::UserProvided => sources.insert("userProvided".to_string()),
EmbedderSource::Ollama => sources.insert("ollama".to_string()),
EmbedderSource::Rest => sources.insert("rest".to_string()),
};
}
};
Self {
total: setting.as_ref().map(|s| s.len()),
sources: Some(sources),
document_template_used: setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.any(|config| config.document_template.set().is_some())
}),
document_template_max_bytes: setting.as_ref().and_then(|map| {
map.values()
.filter_map(|config| config.clone().set())
.filter_map(|config| config.document_template_max_bytes.set())
.max()
}),
binary_quantization_used: setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.any(|config| config.binary_quantized.set().is_some())
}),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { embedders: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
#[serde(transparent)]
pub struct SearchCutoffMsAnalytics {
pub search_cutoff_ms: Option<u64>,
}
impl SearchCutoffMsAnalytics {
pub fn new(setting: Option<&u64>) -> Self {
Self { search_cutoff_ms: setting.copied() }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { search_cutoff_ms: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
#[serde(transparent)]
pub struct LocalesAnalytics {
pub locales: Option<BTreeSet<Locale>>,
}
impl LocalesAnalytics {
pub fn new(rules: Option<&Vec<LocalizedAttributesRuleView>>) -> Self {
LocalesAnalytics {
locales: rules.as_ref().map(|rules| {
rules
.iter()
.flat_map(|rule| rule.locales.iter().cloned())
.collect::<std::collections::BTreeSet<_>>()
}),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { locales: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct DictionaryAnalytics {
pub total: Option<usize>,
}
impl DictionaryAnalytics {
pub fn new(dictionary: Option<&BTreeSet<String>>) -> Self {
Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { dictionary: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct SeparatorTokensAnalytics {
pub total: Option<usize>,
}
impl SeparatorTokensAnalytics {
pub fn new(separator_tokens: Option<&BTreeSet<String>>) -> Self {
Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { separator_tokens: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct NonSeparatorTokensAnalytics {
pub total: Option<usize>,
}
impl NonSeparatorTokensAnalytics {
pub fn new(non_separator_tokens: Option<&BTreeSet<String>>) -> Self {
Self {
total: non_separator_tokens
.as_ref()
.map(|non_separator_tokens| non_separator_tokens.len()),
}
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
}
}

View file

@ -0,0 +1,201 @@
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{ErrorCode as _, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions;
use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value;
use tracing::debug;
use super::ActionPolicy;
use crate::analytics::Analytics;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
use crate::search::{
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(similar_get)))
.route(web::post().to(SeqHandler(similar_post))),
);
}
pub async fn similar_get(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.try_into()?;
let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);
debug!(parameters = ?query, "Similar get");
let similar = similar(index_scheduler, index_uid, query).await;
if let Ok(similar) = &similar {
aggregate.succeed(similar);
}
analytics.publish(aggregate, &req);
let similar = similar?;
debug!(returns = ?similar, "Similar get");
Ok(HttpResponse::Ok().json(similar))
}
pub async fn similar_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<SimilarQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!(parameters = ?query, "Similar post");
let mut aggregate = SimilarAggregator::<SimilarPOST>::from_query(&query);
let similar = similar(index_scheduler, index_uid, query).await;
if let Ok(similar) = &similar {
aggregate.succeed(similar);
}
analytics.publish(aggregate, &req);
let similar = similar?;
debug!(returns = ?similar, "Similar post");
Ok(HttpResponse::Ok().json(similar))
}
async fn similar(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: IndexUid,
mut query: SimilarQuery,
) -> Result<SimilarResult, ResponseError> {
let features = index_scheduler.features();
features.check_vector("Using the similar API")?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let (embedder_name, embedder, quantized) =
SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
tokio::task::spawn_blocking(move || {
perform_similar(
&index,
query,
embedder_name,
embedder,
quantized,
retrieve_vectors,
index_scheduler.features(),
)
})
.await?
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct SimilarQueryGet {
#[deserr(error = DeserrQueryParamError<InvalidSimilarId>)]
id: Param<String>,
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSimilarOffset>)]
offset: Param<usize>,
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSimilarLimit>)]
limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
attributes_to_retrieve: Option<CS<String>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)]
retrieve_vectors: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: String,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
}
}
impl TryFrom<SimilarQueryGet> for SimilarQuery {
type Error = ResponseError;
fn try_from(
SimilarQueryGet {
id,
offset,
limit,
attributes_to_retrieve,
retrieve_vectors,
filter,
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
}: SimilarQueryGet,
) -> Result<Self, Self::Error> {
let filter = match filter {
Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v),
_ => Some(Value::String(f)),
},
None => None,
};
Ok(SimilarQuery {
id: id.0.try_into().map_err(|code: InvalidSimilarId| {
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0,
limit: limit.0,
filter,
embedder,
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
retrieve_vectors: retrieve_vectors.0,
show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
})
}
}

View file

@ -0,0 +1,235 @@
use std::collections::{BinaryHeap, HashMap};
use once_cell::sync::Lazy;
use regex::Regex;
use serde_json::{json, Value};
use crate::{
aggregate_methods,
analytics::{Aggregate, AggregateMethod},
search::{SimilarQuery, SimilarResult},
};
aggregate_methods!(
SimilarPOST => "Similar POST",
SimilarGET => "Similar GET",
);
#[derive(Default)]
pub struct SimilarAggregator<Method: AggregateMethod> {
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
filter_total_number_of_criteria: usize,
used_syntax: HashMap<String, usize>,
// Whether a non-default embedder was specified
retrieve_vectors: bool,
// pagination
max_limit: usize,
max_offset: usize,
// formatting
max_attributes_to_retrieve: usize,
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
ranking_score_threshold: bool,
marker: std::marker::PhantomData<Method>,
}
impl<Method: AggregateMethod> SimilarAggregator<Method> {
#[allow(clippy::field_reassign_with_default)]
pub fn from_query(query: &SimilarQuery) -> Self {
let SimilarQuery {
id: _,
embedder: _,
offset,
limit,
attributes_to_retrieve: _,
retrieve_vectors,
show_ranking_score,
show_ranking_score_details,
filter,
ranking_score_threshold,
} = query;
let mut ret = Self::default();
ret.total_received = 1;
if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1;
let syntax = match filter {
Value::String(_) => "string".to_string(),
Value::Array(values) => {
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
"mixed".to_string()
} else {
"array".to_string()
}
}
_ => "none".to_string(),
};
// convert the string to a HashMap
ret.used_syntax.insert(syntax, 1);
let stringified_filters = filter.to_string();
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
}
ret.max_limit = *limit;
ret.max_offset = *offset;
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
ret.retrieve_vectors = *retrieve_vectors;
ret
}
pub fn succeed(&mut self, result: &SimilarResult) {
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(*processing_time_ms as usize);
}
}
impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
}
/// Aggregate one [SimilarAggregator] into another.
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
let Self {
total_received,
total_succeeded,
mut time_spent,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
max_limit,
max_offset,
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
ranking_score_threshold,
retrieve_vectors,
marker: _,
} = *new;
// request
self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
self.time_spent.append(&mut time_spent);
// filter
self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
self.filter_sum_of_criteria_terms =
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
self.filter_total_number_of_criteria =
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
for (key, value) in used_syntax.into_iter() {
let used_syntax = self.used_syntax.entry(key).or_insert(0);
*used_syntax = used_syntax.saturating_add(value);
}
self.retrieve_vectors |= retrieve_vectors;
// pagination
self.max_limit = self.max_limit.max(max_limit);
self.max_offset = self.max_offset.max(max_offset);
// formatting
self.max_attributes_to_retrieve =
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
// scoring
self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
self
}
fn into_event(self: Box<Self>) -> serde_json::Value {
let Self {
total_received,
total_succeeded,
time_spent,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
filter_total_number_of_criteria,
used_syntax,
max_limit,
max_offset,
max_attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
ranking_score_threshold,
retrieve_vectors,
marker: _,
} = *self;
// we get all the values in a sorted manner
let time_spent = time_spent.into_sorted_vec();
// the index of the 99th percentage of value
let percentile_99th = time_spent.len() * 99 / 100;
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th);
json!({
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
},
"filter": {
"with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box,
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},
"vector": {
"retrieve_vectors": retrieve_vectors,
},
"pagination": {
"max_limit": max_limit,
"max_offset": max_offset,
},
"formatting": {
"max_attributes_to_retrieve": max_attributes_to_retrieve,
},
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
}
})
}
}

View file

@ -0,0 +1,318 @@
use std::convert::Infallible;
use std::io::Write;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::web::{Bytes, Data};
use actix_web::{web, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use futures_util::Stream;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use tokio::sync::mpsc;
use tracing_subscriber::filter::Targets;
use tracing_subscriber::Layer;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::{LogRouteHandle, LogStderrHandle};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("stream")
.route(web::post().to(SeqHandler(get_logs)))
.route(web::delete().to(SeqHandler(cancel_logs))),
)
.service(web::resource("stderr").route(web::post().to(SeqHandler(update_stderr_target))));
}
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
#[deserr(rename_all = camelCase)]
pub enum LogMode {
#[default]
Human,
Json,
Profile,
}
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Clone, Debug)]
struct MyTargets(Targets);
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Debug, thiserror::Error)]
enum MyParseError {
#[error(transparent)]
ParseError(#[from] tracing_subscriber::filter::ParseError),
#[error(
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
)]
Example,
}
impl FromStr for MyTargets {
type Err = MyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(MyParseError::Example)
} else {
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
}
}
}
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
fn merge(
_self_: Option<Self>,
other: MyParseError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct GetLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
#[deserr(default, error = DeserrJsonError<BadRequest>)]
mode: LogMode,
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
profile_memory: bool,
}
fn validate_get_logs<E: DeserializeError>(
logs: GetLogs,
location: ValuePointerRef,
) -> Result<GetLogs, E> {
if logs.profile_memory && logs.mode != LogMode::Profile {
Err(deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected {
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
},
location,
)))
} else {
Ok(logs)
}
}
struct LogWriter {
sender: mpsc::UnboundedSender<Vec<u8>>,
}
impl Write for LogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
struct HandleGuard {
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
logs: Arc<LogRouteHandle>,
}
impl Drop for HandleGuard {
fn drop(&mut self) {
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
}
}
fn byte_stream(
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
guard: HandleGuard,
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
let vec = receiver.recv().await;
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
})
}
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
fn make_layer<
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
>(
opt: &GetLogs,
logs: Data<LogRouteHandle>,
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
let guard = HandleGuard { logs: logs.into_inner() };
match opt.mode {
LogMode::Human => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Json => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.json()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Profile => {
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
let stream = entry_stream(trace, guard);
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
}
}
fn entry_stream(
trace: tracing_trace::Trace,
guard: HandleGuard,
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
let receiver = trace.into_receiver();
let entry_buf = Vec::new();
futures_util::stream::unfold(
(receiver, entry_buf, guard),
move |(mut receiver, mut entry_buf, guard)| async move {
let mut bytes = Vec::new();
while bytes.len() < 8192 {
entry_buf.clear();
let Ok(count) = tokio::time::timeout(
std::time::Duration::from_secs(1),
receiver.recv_many(&mut entry_buf, 100),
)
.await
else {
break;
};
if count == 0 {
if !bytes.is_empty() {
break;
}
// channel closed, exit
return None;
}
for entry in &entry_buf {
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
tracing::error!(
error = &error as &dyn std::error::Error,
"deserializing entry"
);
return Some((
Err(ResponseError::from_msg(
format!("error deserializing entry: {error}"),
Code::Internal,
)),
(receiver, entry_buf, guard),
));
}
}
}
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
},
)
}
pub async fn get_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
body: AwebJson<GetLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
let mut stream = None;
logs.modify(|layer| match layer.inner_mut() {
None => {
// there is no one getting logs
*layer.filter_mut() = opt.target.0.clone();
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
*layer.inner_mut() = Some(new_layer);
stream = Some(new_stream);
}
Some(_) => {
// there is already someone getting logs
}
})
.unwrap();
if let Some(stream) = stream {
Ok(HttpResponse::Ok().streaming(stream))
} else {
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
}
}
pub async fn cancel_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
Ok(HttpResponse::NoContent().finish())
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateStderrLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
}
pub async fn update_stderr_target(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogStderrHandle>,
body: AwebJson<UpdateStderrLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
logs.modify(|layer| {
*layer.filter_mut() = opt.target.0.clone();
})
.unwrap();
Ok(HttpResponse::NoContent().finish())
}

View file

@ -0,0 +1,64 @@
use actix_web::http::header;
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use prometheus::{Encoder, TextEncoder};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::routes::create_all_stats;
pub fn configure(config: &mut web::ServiceConfig) {
config.service(web::resource("").route(web::get().to(get_metrics)));
}
pub async fn get_metrics(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
auth_controller: Data<AuthController>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_metrics()?;
let auth_filters = index_scheduler.filters();
if !auth_filters.all_indexes_authorized() {
let mut error = ResponseError::from(AuthenticationError::InvalidToken);
error
.message
.push_str(" The API key for the `/metrics` route must allow access to all indexes.");
return Err(error);
}
let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?;
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
for (index, value) in response.indexes.iter() {
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
.with_label_values(&[index])
.set(value.number_of_documents as i64);
}
for (kind, value) in index_scheduler.get_stats()? {
for (value, count) in value {
crate::metrics::MEILISEARCH_NB_TASKS
.with_label_values(&[&kind, &value])
.set(count as i64);
}
}
if let Some(last_update) = response.last_update {
crate::metrics::MEILISEARCH_LAST_UPDATE.set(last_update.unix_timestamp());
}
crate::metrics::MEILISEARCH_IS_INDEXING.set(index_scheduler.is_task_processing()? as i64);
let encoder = TextEncoder::new();
let mut buffer = vec![];
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
}

View file

@ -0,0 +1,381 @@
use std::collections::BTreeMap;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use tracing::debug;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search_queue::SearchQueue;
use crate::Opt;
const PAGINATION_DEFAULT_LIMIT: usize = 20;
mod api_key;
mod dump;
pub mod features;
pub mod indexes;
mod logs;
mod metrics;
mod multi_search;
mod multi_search_analytics;
mod snapshot;
mod swap_indexes;
pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/logs").configure(logs::configure))
.service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure))
.service(web::resource("/stats").route(web::get().to(get_stats)))
.service(web::resource("/version").route(web::get().to(get_version)))
.service(web::scope("/indexes").configure(indexes::configure))
.service(web::scope("/multi-search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
.service(web::scope("/metrics").configure(metrics::configure))
.service(web::scope("/experimental-features").configure(features::configure));
}
pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result<Option<TaskId>, ResponseError> {
if !opt.experimental_replication_parameters {
return Ok(None);
}
let task_id = req
.headers()
.get("TaskId")
.map(|header| {
header.to_str().map_err(|e| {
ResponseError::from_msg(
format!("TaskId is not a valid utf-8 string: {e}"),
Code::BadRequest,
)
})
})
.transpose()?
.map(|s| {
s.parse::<TaskId>().map_err(|e| {
ResponseError::from_msg(
format!(
"Could not parse the TaskId as a {}: {e}",
std::any::type_name::<TaskId>(),
),
Code::BadRequest,
)
})
})
.transpose()?;
Ok(task_id)
}
pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
if !opt.experimental_replication_parameters {
return Ok(false);
}
Ok(req
.headers()
.get("DryRun")
.map(|header| {
header.to_str().map_err(|e| {
ResponseError::from_msg(
format!("DryRun is not a valid utf-8 string: {e}"),
Code::BadRequest,
)
})
})
.transpose()?
.map_or(false, |s| s.to_lowercase() == "true"))
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
task_uid: TaskId,
index_uid: Option<String>,
status: Status,
#[serde(rename = "type")]
kind: Kind,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
enqueued_at: OffsetDateTime,
}
impl From<Task> for SummarizedTaskView {
fn from(task: Task) -> Self {
SummarizedTaskView {
task_uid: task.uid,
index_uid: task.index_uid().map(|s| s.to_string()),
status: task.status,
kind: task.kind.as_kind(),
enqueued_at: task.enqueued_at,
}
}
}
pub struct Pagination {
pub offset: usize,
pub limit: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct PaginationView<T> {
pub results: Vec<T>,
pub offset: usize,
pub limit: usize,
pub total: usize,
}
impl Pagination {
/// Given the full data to paginate, returns the selected section.
pub fn auto_paginate_sized<T>(
self,
content: impl IntoIterator<Item = T> + ExactSizeIterator,
) -> PaginationView<T>
where
T: Serialize,
{
let total = content.len();
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
self.format_with(total, content)
}
/// Given an iterator and the total number of elements, returns the selected section.
pub fn auto_paginate_unsized<T>(
self,
total: usize,
content: impl IntoIterator<Item = T>,
) -> PaginationView<T>
where
T: Serialize,
{
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
self.format_with(total, content)
}
/// Given the data already paginated + the total number of elements, it stores
/// everything in a [PaginationResult].
pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<T>
where
T: Serialize,
{
PaginationView { results, offset: self.offset, limit: self.limit, total }
}
}
impl<T> PaginationView<T> {
pub fn new(offset: usize, limit: usize, total: usize, results: Vec<T>) -> Self {
Self { offset, limit, results, total }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[allow(clippy::large_enum_variant)]
#[serde(tag = "name")]
pub enum UpdateType {
ClearAll,
Customs,
DocumentsAddition {
#[serde(skip_serializing_if = "Option::is_none")]
number: Option<usize>,
},
DocumentsPartial {
#[serde(skip_serializing_if = "Option::is_none")]
number: Option<usize>,
},
DocumentsDeletion {
#[serde(skip_serializing_if = "Option::is_none")]
number: Option<usize>,
},
Settings {
settings: Settings<Unchecked>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ProcessedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FailedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
pub error: ResponseError,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct EnqueuedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(skip_serializing_if = "Option::is_none", with = "time::serde::rfc3339::option")]
pub started_processing_at: Option<OffsetDateTime>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "status")]
pub enum UpdateStatusResponse {
Enqueued {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Processing {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Failed {
#[serde(flatten)]
content: FailedUpdateResult,
},
Processed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexUpdateResponse {
pub update_id: u64,
}
impl IndexUpdateResponse {
pub fn with_id(update_id: u64) -> Self {
Self { update_id }
}
}
/// Always return a 200 with:
/// ```json
/// {
/// "status": "Meilisearch is running"
/// }
/// ```
pub async fn running() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" }))
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
#[serde(skip)]
pub used_database_size: u64,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
pub last_update: Option<OffsetDateTime>,
pub indexes: BTreeMap<String, indexes::IndexStats>,
}
async fn get_stats(
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
) -> Result<HttpResponse, ResponseError> {
let filters = index_scheduler.filters();
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!(returns = ?stats, "Get stats");
Ok(HttpResponse::Ok().json(stats))
}
pub fn create_all_stats(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
filters: &meilisearch_auth::AuthFilter,
) -> Result<Stats, ResponseError> {
let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let mut used_database_size = 0;
for index_uid in index_scheduler.index_names()? {
// Accumulate the size of all indexes, even unauthorized ones, so
// as to return a database_size representative of the correct database size on disk.
// See <https://github.com/meilisearch/meilisearch/pull/3541#discussion_r1126747643> for context.
let stats = index_scheduler.index_stats(&index_uid)?;
database_size += stats.inner_stats.database_size;
used_database_size += stats.inner_stats.used_database_size;
if !filters.is_index_authorized(&index_uid) {
continue;
}
last_task = last_task.map_or(Some(stats.inner_stats.updated_at), |last| {
Some(last.max(stats.inner_stats.updated_at))
});
indexes.insert(index_uid.to_string(), stats.into());
}
database_size += index_scheduler.size()?;
used_database_size += index_scheduler.used_size()?;
database_size += auth_controller.size()?;
used_database_size += auth_controller.used_size()?;
let stats = Stats { database_size, used_database_size, last_update: last_task, indexes };
Ok(stats)
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct VersionResponse {
commit_sha: String,
commit_date: String,
pkg_version: String,
}
async fn get_version(
_index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
) -> HttpResponse {
let build_info = build_info::BuildInfo::from_build();
HttpResponse::Ok().json(VersionResponse {
commit_sha: build_info.commit_sha1.unwrap_or("unknown").to_string(),
commit_date: build_info
.commit_timestamp
.and_then(|commit_timestamp| {
commit_timestamp
.format(&time::format_description::well_known::Iso8601::DEFAULT)
.ok()
})
.unwrap_or("unknown".into()),
pkg_version: env!("CARGO_PKG_VERSION").to_string(),
})
}
pub async fn get_health(
index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>,
search_queue: Data<SearchQueue>,
) -> Result<HttpResponse, ResponseError> {
search_queue.health().unwrap();
index_scheduler.health().unwrap();
auth_controller.health().unwrap();
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
}

View file

@ -0,0 +1,185 @@
use actix_http::StatusCode;
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use tracing::debug;
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors,
SearchQueryWithIndex, SearchResultWithIndex,
};
use crate::search_queue::SearchQueue;
use super::multi_search_analytics::MultiSearchAggregator;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
}
#[derive(Serialize)]
struct SearchResults {
results: Vec<SearchResultWithIndex>,
}
pub async fn multi_search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: Data<SearchQueue>,
params: AwebJson<FederatedSearch, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
// Since we don't want to process half of the search requests and then get a permit refused
// we're going to get one permit for the whole duration of the multi-search request.
let permit = search_queue.try_get_search_permit().await?;
let federated_search = params.into_inner();
let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search);
let FederatedSearch { mut queries, federation } = federated_search;
let features = index_scheduler.features();
// regardless of federation, check authorization and apply search rules
let auth = 'check_authorization: {
for (query_index, federated_query) in queries.iter_mut().enumerate() {
let index_uid = federated_query.index_uid.as_str();
// Check index from API key
if !index_scheduler.filters().is_index_authorized(index_uid) {
break 'check_authorization Err(AuthenticationError::InvalidToken)
.with_index(query_index);
}
// Apply search rules from tenant token
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid)
{
add_search_rules(&mut federated_query.filter, search_rules);
}
}
Ok(())
};
auth.map_err(|(mut err, query_index)| {
// Add the query index that failed as context for the error message.
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
// of result and we can benefit from static typing.
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
err
})?;
let response = match federation {
Some(federation) => {
let search_result = tokio::task::spawn_blocking(move || {
perform_federated_search(&index_scheduler, queries, federation, features)
})
.await;
permit.drop().await;
if let Ok(Ok(_)) = search_result {
multi_aggregate.succeed();
}
analytics.publish(multi_aggregate, &req);
HttpResponse::Ok().json(search_result??)
}
None => {
// Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
// so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
// changes.
let search_results: Result<_, (ResponseError, usize)> = async {
let mut search_results = Vec::with_capacity(queries.len());
for (query_index, (index_uid, query, federation_options)) in queries
.into_iter()
.map(SearchQueryWithIndex::into_index_query_federation)
.enumerate()
{
debug!(on_index = query_index, parameters = ?query, "Multi-search");
if federation_options.is_some() {
return Err((
MeilisearchHttpError::FederationOptionsInNonFederatedRequest(
query_index,
)
.into(),
query_index,
));
}
let index = index_scheduler
.index(&index_uid)
.map_err(|err| {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err
})
.with_index(query_index)?;
let search_kind =
search_kind(&query, index_scheduler.get_ref(), &index, features)
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
.with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector, features)
})
.await
.with_index(query_index)?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result.with_index(query_index)?,
});
}
Ok(search_results)
}
.await;
permit.drop().await;
if search_results.is_ok() {
multi_aggregate.succeed();
}
analytics.publish(multi_aggregate, &req);
let search_results = search_results.map_err(|(mut err, query_index)| {
// Add the query index that failed as context for the error message.
// We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type
// of result and we can benefit from static typing.
err.message = format!("Inside `.queries[{query_index}]`: {}", err.message);
err
})?;
debug!(returns = ?search_results, "Multi-search");
HttpResponse::Ok().json(SearchResults { results: search_results })
}
};
Ok(response)
}
/// Local `Result` extension trait to avoid `map_err` boilerplate.
trait WithIndex {
type T;
/// convert the error type inside of the `Result` to a `ResponseError`, and return a couple of it + the usize.
fn with_index(self, index: usize) -> Result<Self::T, (ResponseError, usize)>;
}
impl<T, E: Into<ResponseError>> WithIndex for Result<T, E> {
type T = T;
fn with_index(self, index: usize) -> Result<T, (ResponseError, usize)> {
self.map_err(|err| (err.into(), index))
}
}

View file

@ -0,0 +1,170 @@
use std::collections::HashSet;
use serde_json::json;
use crate::{
analytics::Aggregate,
search::{FederatedSearch, SearchQueryWithIndex},
};
#[derive(Default)]
pub struct MultiSearchAggregator {
// requests
total_received: usize,
total_succeeded: usize,
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
total_distinct_index_count: usize,
// number of queries with a single index, use with total_received to compute a proportion
total_single_index: usize,
// sum of the number of search queries in the requests, use with total_received to compute an average
total_search_count: usize,
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
// federation
use_federation: bool,
}
impl MultiSearchAggregator {
pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
let use_federation = federated_search.federation.is_some();
let distinct_indexes: HashSet<_> = federated_search
.queries
.iter()
.map(|query| {
let query = &query;
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
let SearchQueryWithIndex {
index_uid,
federation_options: _,
q: _,
vector: _,
offset: _,
limit: _,
page: _,
hits_per_page: _,
attributes_to_retrieve: _,
retrieve_vectors: _,
attributes_to_crop: _,
crop_length: _,
attributes_to_highlight: _,
show_ranking_score: _,
show_ranking_score_details: _,
show_matches_position: _,
filter: _,
sort: _,
distinct: _,
facets: _,
highlight_pre_tag: _,
highlight_post_tag: _,
crop_marker: _,
matching_strategy: _,
attributes_to_search_on: _,
hybrid: _,
ranking_score_threshold: _,
locales: _,
} = query;
index_uid.as_str()
})
.collect();
let show_ranking_score =
federated_search.queries.iter().any(|query| query.show_ranking_score);
let show_ranking_score_details =
federated_search.queries.iter().any(|query| query.show_ranking_score_details);
Self {
total_received: 1,
total_succeeded: 0,
total_distinct_index_count: distinct_indexes.len(),
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
total_search_count: federated_search.queries.len(),
show_ranking_score,
show_ranking_score_details,
use_federation,
}
}
pub fn succeed(&mut self) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
}
}
impl Aggregate for MultiSearchAggregator {
fn event_name(&self) -> &'static str {
"Documents Searched by Multi-Search POST"
}
/// Aggregate one [MultiSearchAggregator] into another.
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
// write the aggregate in a way that will cause a compilation error if a field is added.
// get ownership of self, replacing it by a default value.
let this = *self;
let total_received = this.total_received.saturating_add(new.total_received);
let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
let total_distinct_index_count =
this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
let show_ranking_score_details =
this.show_ranking_score_details || new.show_ranking_score_details;
let use_federation = this.use_federation || new.use_federation;
Box::new(Self {
total_received,
total_succeeded,
total_distinct_index_count,
total_single_index,
total_search_count,
show_ranking_score,
show_ranking_score_details,
use_federation,
})
}
fn into_event(self: Box<Self>) -> serde_json::Value {
let Self {
total_received,
total_succeeded,
total_distinct_index_count,
total_single_index,
total_search_count,
show_ranking_score,
show_ranking_score_details,
use_federation,
} = *self;
json!({
"requests": {
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
},
"indexes": {
"total_single_index": total_single_index,
"total_distinct_index_count": total_distinct_index_count,
"avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
},
"searches": {
"total_search_count": total_search_count,
"avg_search_count": (total_search_count as f64) / (total_received as f64),
},
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
},
"federation": {
"use_federation": use_federation,
}
})
}
}

View file

@ -0,0 +1,39 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::Opt;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
}
crate::empty_analytics!(SnapshotAnalytics, "Snapshot Created");
pub async fn create_snapshot(
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish(SnapshotAnalytics::default(), &req);
let task = KindWithContent::SnapshotCreation;
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
debug!(returns = ?task, "Create snapshot");
Ok(HttpResponse::Accepted().json(task))
}

View file

@ -0,0 +1,87 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::InvalidSwapIndexes;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
use serde::Serialize;
use super::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::analytics::{Aggregate, Analytics};
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::Opt;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
}
#[derive(Deserr, Debug, Clone, PartialEq, Eq)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SwapIndexesPayload {
#[deserr(error = DeserrJsonError<InvalidSwapIndexes>, missing_field_error = DeserrJsonError::missing_swap_indexes)]
indexes: Vec<IndexUid>,
}
#[derive(Serialize)]
struct IndexSwappedAnalytics {
swap_operation_number: usize,
}
impl Aggregate for IndexSwappedAnalytics {
fn event_name(&self) -> &'static str {
"Indexes Swapped"
}
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
Box::new(Self {
swap_operation_number: self.swap_operation_number.max(new.swap_operation_number),
})
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
pub async fn swap_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let params = params.into_inner();
analytics.publish(IndexSwappedAnalytics { swap_operation_number: params.len() }, &req);
let filters = index_scheduler.filters();
let mut swaps = vec![];
for SwapIndexesPayload { indexes } in params.into_iter() {
// TODO: switch to deserr
let (lhs, rhs) = match indexes.as_slice() {
[lhs, rhs] => (lhs, rhs),
_ => {
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
}
};
if !filters.is_index_authorized(lhs) || !filters.is_index_authorized(rhs) {
return Err(AuthenticationError::InvalidToken.into());
}
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()) });
}
let task = KindWithContent::IndexSwap { swaps };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
Ok(HttpResponse::Accepted().json(task))
}

View file

@ -0,0 +1,774 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebQueryParameter;
use deserr::Deserr;
use index_scheduler::{IndexScheduler, Query, TaskId};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
use serde::Serialize;
use time::format_description::well_known::Rfc3339;
use time::macros::format_description;
use time::{Date, Duration, OffsetDateTime, Time};
use tokio::task;
use super::{get_task_id, is_dry_run, SummarizedTaskView};
use crate::analytics::{Aggregate, AggregateMethod, Analytics};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::{aggregate_methods, Opt};
const DEFAULT_LIMIT: u32 = 20;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_tasks)))
.route(web::delete().to(SeqHandler(delete_tasks))),
)
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery {
#[deserr(default = Param(DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidTaskLimit>)]
pub limit: Param<u32>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskFrom>)]
pub from: Option<Param<TaskId>>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
pub uids: OptionStarOrList<u32>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
pub canceled_by: OptionStarOrList<u32>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
pub types: OptionStarOrList<Kind>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
pub statuses: OptionStarOrList<Status>,
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
pub index_uids: OptionStarOrList<IndexUid>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
impl TasksFilterQuery {
fn into_query(self) -> Query {
Query {
limit: Some(self.limit.0),
from: self.from.as_deref().copied(),
statuses: self.statuses.merge_star_and_none(),
types: self.types.merge_star_and_none(),
index_uids: self.index_uids.map(|x| x.to_string()).merge_star_and_none(),
uids: self.uids.merge_star_and_none(),
canceled_by: self.canceled_by.merge_star_and_none(),
before_enqueued_at: self.before_enqueued_at.merge_star_and_none(),
after_enqueued_at: self.after_enqueued_at.merge_star_and_none(),
before_started_at: self.before_started_at.merge_star_and_none(),
after_started_at: self.after_started_at.merge_star_and_none(),
before_finished_at: self.before_finished_at.merge_star_and_none(),
after_finished_at: self.after_finished_at.merge_star_and_none(),
}
}
}
impl TaskDeletionOrCancelationQuery {
fn is_empty(&self) -> bool {
matches!(
self,
TaskDeletionOrCancelationQuery {
uids: OptionStarOrList::None,
canceled_by: OptionStarOrList::None,
types: OptionStarOrList::None,
statuses: OptionStarOrList::None,
index_uids: OptionStarOrList::None,
after_enqueued_at: OptionStarOr::None,
before_enqueued_at: OptionStarOr::None,
after_started_at: OptionStarOr::None,
before_started_at: OptionStarOr::None,
after_finished_at: OptionStarOr::None,
before_finished_at: OptionStarOr::None
}
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TaskDeletionOrCancelationQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
pub uids: OptionStarOrList<u32>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
pub canceled_by: OptionStarOrList<u32>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
pub types: OptionStarOrList<Kind>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
pub statuses: OptionStarOrList<Status>,
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
pub index_uids: OptionStarOrList<IndexUid>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
impl TaskDeletionOrCancelationQuery {
fn into_query(self) -> Query {
Query {
limit: None,
from: None,
statuses: self.statuses.merge_star_and_none(),
types: self.types.merge_star_and_none(),
index_uids: self.index_uids.map(|x| x.to_string()).merge_star_and_none(),
uids: self.uids.merge_star_and_none(),
canceled_by: self.canceled_by.merge_star_and_none(),
before_enqueued_at: self.before_enqueued_at.merge_star_and_none(),
after_enqueued_at: self.after_enqueued_at.merge_star_and_none(),
before_started_at: self.before_started_at.merge_star_and_none(),
after_started_at: self.after_started_at.merge_star_and_none(),
before_finished_at: self.before_finished_at.merge_star_and_none(),
after_finished_at: self.after_finished_at.merge_star_and_none(),
}
}
}
aggregate_methods!(
CancelTasks => "Tasks Canceled",
DeleteTasks => "Tasks Deleted",
);
#[derive(Serialize)]
struct TaskFilterAnalytics<Method: AggregateMethod> {
filtered_by_uid: bool,
filtered_by_index_uid: bool,
filtered_by_type: bool,
filtered_by_status: bool,
filtered_by_canceled_by: bool,
filtered_by_before_enqueued_at: bool,
filtered_by_after_enqueued_at: bool,
filtered_by_before_started_at: bool,
filtered_by_after_started_at: bool,
filtered_by_before_finished_at: bool,
filtered_by_after_finished_at: bool,
#[serde(skip)]
marker: std::marker::PhantomData<Method>,
}
impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
}
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
Box::new(Self {
filtered_by_uid: self.filtered_by_uid | new.filtered_by_uid,
filtered_by_index_uid: self.filtered_by_index_uid | new.filtered_by_index_uid,
filtered_by_type: self.filtered_by_type | new.filtered_by_type,
filtered_by_status: self.filtered_by_status | new.filtered_by_status,
filtered_by_canceled_by: self.filtered_by_canceled_by | new.filtered_by_canceled_by,
filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at
| new.filtered_by_before_enqueued_at,
filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at
| new.filtered_by_after_enqueued_at,
filtered_by_before_started_at: self.filtered_by_before_started_at
| new.filtered_by_before_started_at,
filtered_by_after_started_at: self.filtered_by_after_started_at
| new.filtered_by_after_started_at,
filtered_by_before_finished_at: self.filtered_by_before_finished_at
| new.filtered_by_before_finished_at,
filtered_by_after_finished_at: self.filtered_by_after_finished_at
| new.filtered_by_after_finished_at,
marker: std::marker::PhantomData,
})
}
fn into_event(self: Box<Self>) -> serde_json::Value {
serde_json::to_value(*self).unwrap_or_default()
}
}
async fn cancel_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let params = params.into_inner();
if params.is_empty() {
return Err(index_scheduler::Error::TaskCancelationWithEmptyQuery.into());
}
analytics.publish(
TaskFilterAnalytics::<CancelTasks> {
filtered_by_uid: params.uids.is_some(),
filtered_by_index_uid: params.index_uids.is_some(),
filtered_by_type: params.types.is_some(),
filtered_by_status: params.statuses.is_some(),
filtered_by_canceled_by: params.canceled_by.is_some(),
filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
filtered_by_before_started_at: params.before_started_at.is_some(),
filtered_by_after_started_at: params.after_started_at.is_some(),
filtered_by_before_finished_at: params.before_finished_at.is_some(),
filtered_by_after_finished_at: params.after_finished_at.is_some(),
marker: std::marker::PhantomData,
},
&req,
);
let query = params.into_query();
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
)?;
let task_cancelation =
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task =
task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run))
.await??;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))
}
async fn delete_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
opt: web::Data<Opt>,
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let params = params.into_inner();
if params.is_empty() {
return Err(index_scheduler::Error::TaskDeletionWithEmptyQuery.into());
}
analytics.publish(
TaskFilterAnalytics::<DeleteTasks> {
filtered_by_uid: params.uids.is_some(),
filtered_by_index_uid: params.index_uids.is_some(),
filtered_by_type: params.types.is_some(),
filtered_by_status: params.statuses.is_some(),
filtered_by_canceled_by: params.canceled_by.is_some(),
filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(),
filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(),
filtered_by_before_started_at: params.before_started_at.is_some(),
filtered_by_after_started_at: params.after_started_at.is_some(),
filtered_by_before_finished_at: params.before_finished_at.is_some(),
filtered_by_after_finished_at: params.after_finished_at.is_some(),
marker: std::marker::PhantomData,
},
&req,
);
let query = params.into_query();
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
)?;
let task_deletion =
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run))
.await??;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))
}
#[derive(Debug, Serialize)]
pub struct AllTasks {
results: Vec<TaskView>,
total: u64,
limit: u32,
from: Option<u32>,
next: Option<u32>,
}
async fn get_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TasksFilterQuery, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let mut params = params.into_inner();
// We +1 just to know if there is more after this "page" or not.
params.limit.0 = params.limit.0.saturating_add(1);
let limit = params.limit.0;
let query = params.into_query();
let filters = index_scheduler.filters();
let (tasks, total) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?;
let mut results: Vec<_> = tasks.iter().map(TaskView::from_task).collect();
// If we were able to fetch the number +1 tasks we asked
// it means that there is more to come.
let next = if results.len() == limit as usize { results.pop().map(|t| t.uid) } else { None };
let from = results.first().map(|t| t.uid);
let tasks = AllTasks { results, limit: limit.saturating_sub(1), total, from, next };
Ok(HttpResponse::Ok().json(tasks))
}
async fn get_task(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
task_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let task_uid_string = task_uid.into_inner();
let task_uid: TaskId = match task_uid_string.parse() {
Ok(id) => id,
Err(_e) => {
return Err(index_scheduler::Error::InvalidTaskUids { task_uid: task_uid_string }.into())
}
};
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
let filters = index_scheduler.filters();
let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(query, filters)?;
if let Some(task) = tasks.first() {
let task_view = TaskView::from_task(task);
Ok(HttpResponse::Ok().json(task_view))
} else {
Err(index_scheduler::Error::TaskNotFound(task_uid).into())
}
}
pub enum DeserializeDateOption {
Before,
After,
}
pub fn deserialize_date(
value: &str,
option: DeserializeDateOption,
) -> std::result::Result<OffsetDateTime, InvalidTaskDateError> {
// We can't parse using time's rfc3339 format, since then we won't know what part of the
// datetime was not explicitly specified, and thus we won't be able to increment it to the
// next step.
if let Ok(datetime) = OffsetDateTime::parse(value, &Rfc3339) {
// fully specified up to the second
// we assume that the subseconds are 0 if not specified, and we don't increment to the next second
Ok(datetime)
} else if let Ok(datetime) = Date::parse(
value,
format_description!("[year repr:full base:calendar]-[month repr:numerical]-[day]"),
) {
let datetime = datetime.with_time(Time::MIDNIGHT).assume_utc();
// add one day since the time was not specified
match option {
DeserializeDateOption::Before => Ok(datetime),
DeserializeDateOption::After => {
let datetime = datetime.checked_add(Duration::days(1)).unwrap_or(datetime);
Ok(datetime)
}
}
} else {
Err(InvalidTaskDateError(value.to_owned()))
}
}
pub fn deserialize_date_after(
value: OptionStarOr<String>,
) -> std::result::Result<OptionStarOr<OffsetDateTime>, InvalidTaskDateError> {
value.try_map(|x| deserialize_date(&x, DeserializeDateOption::After))
}
pub fn deserialize_date_before(
value: OptionStarOr<String>,
) -> std::result::Result<OptionStarOr<OffsetDateTime>, InvalidTaskDateError> {
value.try_map(|x| deserialize_date(&x, DeserializeDateOption::Before))
}
#[cfg(test)]
mod tests {
use deserr::Deserr;
use meili_snap::snapshot;
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::{Code, ResponseError};
use crate::routes::tasks::{TaskDeletionOrCancelationQuery, TasksFilterQuery};
fn deserr_query_params<T>(j: &str) -> Result<T, ResponseError>
where
T: Deserr<DeserrQueryParamError>,
{
let value = serde_urlencoded::from_str::<serde_json::Value>(j)
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::BadRequest))?;
match deserr::deserialize::<_, _, DeserrQueryParamError>(value) {
Ok(data) => Ok(data),
Err(e) => Err(ResponseError::from(e)),
}
}
#[test]
fn deserialize_task_filter_dates() {
{
let params = "afterEnqueuedAt=2021-12-03&beforeEnqueuedAt=2021-12-03&afterStartedAt=2021-12-03&beforeStartedAt=2021-12-03&afterFinishedAt=2021-12-03&beforeFinishedAt=2021-12-03";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(2021-12-04 0:00:00.0 +00:00:00)");
snapshot!(format!("{:?}", query.before_enqueued_at), @"Other(2021-12-03 0:00:00.0 +00:00:00)");
snapshot!(format!("{:?}", query.after_started_at), @"Other(2021-12-04 0:00:00.0 +00:00:00)");
snapshot!(format!("{:?}", query.before_started_at), @"Other(2021-12-03 0:00:00.0 +00:00:00)");
snapshot!(format!("{:?}", query.after_finished_at), @"Other(2021-12-04 0:00:00.0 +00:00:00)");
snapshot!(format!("{:?}", query.before_finished_at), @"Other(2021-12-03 0:00:00.0 +00:00:00)");
}
{
let params =
"afterEnqueuedAt=2021-12-03T23:45:23Z&beforeEnqueuedAt=2021-12-03T23:45:23Z";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(2021-12-03 23:45:23.0 +00:00:00)");
snapshot!(format!("{:?}", query.before_enqueued_at), @"Other(2021-12-03 23:45:23.0 +00:00:00)");
}
{
let params = "afterEnqueuedAt=1997-11-12T09:55:06-06:20";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(1997-11-12 9:55:06.0 -06:20:00)");
}
{
let params = "afterEnqueuedAt=1997-11-12T09:55:06%2B00:00";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(1997-11-12 9:55:06.0 +00:00:00)");
}
{
let params = "afterEnqueuedAt=1997-11-12T09:55:06.200000300Z";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.after_enqueued_at), @"Other(1997-11-12 9:55:06.2000003 +00:00:00)");
}
{
// Stars are allowed in date fields as well
let params = "afterEnqueuedAt=*&beforeStartedAt=*&afterFinishedAt=*&beforeFinishedAt=*&afterStartedAt=*&beforeEnqueuedAt=*";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query), @"TaskDeletionOrCancelationQuery { uids: None, canceled_by: None, types: None, statuses: None, index_uids: None, after_enqueued_at: Star, before_enqueued_at: Star, after_started_at: Star, before_started_at: Star, after_finished_at: Star, before_finished_at: Star }");
}
{
let params = "afterFinishedAt=2021";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `afterFinishedAt`: `2021` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
"code": "invalid_task_after_finished_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_after_finished_at"
}
"###);
}
{
let params = "beforeFinishedAt=2021";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `beforeFinishedAt`: `2021` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
"code": "invalid_task_before_finished_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_before_finished_at"
}
"###);
}
{
let params = "afterEnqueuedAt=2021-12";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `afterEnqueuedAt`: `2021-12` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
"code": "invalid_task_after_enqueued_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_after_enqueued_at"
}
"###);
}
{
let params = "beforeEnqueuedAt=2021-12-03T23";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `beforeEnqueuedAt`: `2021-12-03T23` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
"code": "invalid_task_before_enqueued_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_before_enqueued_at"
}
"###);
}
{
let params = "afterStartedAt=2021-12-03T23:45";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `afterStartedAt`: `2021-12-03T23:45` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
"code": "invalid_task_after_started_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_after_started_at"
}
"###);
}
{
let params = "beforeStartedAt=2021-12-03T23:45";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `beforeStartedAt`: `2021-12-03T23:45` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
"code": "invalid_task_before_started_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_before_started_at"
}
"###);
}
}
#[test]
fn deserialize_task_filter_uids() {
{
let params = "uids=78,1,12,73";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.uids), @"List([78, 1, 12, 73])");
}
{
let params = "uids=1";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.uids), @"List([1])");
}
{
let params = "uids=cat,*,dog";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `uids[0]`: could not parse `cat` as a positive integer",
"code": "invalid_task_uids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
}
"###);
}
{
let params = "uids=78,hello,world";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `uids[1]`: could not parse `hello` as a positive integer",
"code": "invalid_task_uids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
}
"###);
}
{
let params = "uids=cat";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `uids`: could not parse `cat` as a positive integer",
"code": "invalid_task_uids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
}
"###);
}
}
#[test]
fn deserialize_task_filter_status() {
{
let params = "statuses=succeeded,failed,enqueued,processing,canceled";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.statuses), @"List([Succeeded, Failed, Enqueued, Processing, Canceled])");
}
{
let params = "statuses=enqueued";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.statuses), @"List([Enqueued])");
}
{
let params = "statuses=finished";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `statuses`: `finished` is not a valid task status. Available statuses are `enqueued`, `processing`, `succeeded`, `failed`, `canceled`.",
"code": "invalid_task_statuses",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_statuses"
}
"###);
}
}
#[test]
fn deserialize_task_filter_types() {
{
let params = "types=documentAdditionOrUpdate,documentDeletion,settingsUpdate,indexCreation,indexDeletion,indexUpdate,indexSwap,taskCancelation,taskDeletion,dumpCreation,snapshotCreation";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.types), @"List([DocumentAdditionOrUpdate, DocumentDeletion, SettingsUpdate, IndexCreation, IndexDeletion, IndexUpdate, IndexSwap, TaskCancelation, TaskDeletion, DumpCreation, SnapshotCreation])");
}
{
let params = "types=settingsUpdate";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.types), @"List([SettingsUpdate])");
}
{
let params = "types=createIndex";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"###);
}
}
#[test]
fn deserialize_task_filter_index_uids() {
{
let params = "indexUids=toto,tata-78";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.index_uids), @r###"List([IndexUid("toto"), IndexUid("tata-78")])"###);
}
{
let params = "indexUids=index_a";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query.index_uids), @r###"List([IndexUid("index_a")])"###);
}
{
let params = "indexUids=1,hé";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
}
"###);
}
{
let params = "indexUids=hé";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
}
"###);
}
}
#[test]
fn deserialize_task_filter_general() {
{
let params = "from=12&limit=15&indexUids=toto,tata-78&statuses=succeeded,enqueued&afterEnqueuedAt=2012-04-23&uids=1,2,3";
let query = deserr_query_params::<TasksFilterQuery>(params).unwrap();
snapshot!(format!("{:?}", query), @r###"TasksFilterQuery { limit: Param(15), from: Some(Param(12)), uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: List([Succeeded, Enqueued]), index_uids: List([IndexUid("toto"), IndexUid("tata-78")]), after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }"###);
}
{
// Stars should translate to `None` in the query
// Verify value of the default limit
let params = "indexUids=*&statuses=succeeded,*&afterEnqueuedAt=2012-04-23&uids=1,2,3";
let query = deserr_query_params::<TasksFilterQuery>(params).unwrap();
snapshot!(format!("{:?}", query), @"TasksFilterQuery { limit: Param(20), from: None, uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: Star, index_uids: Star, after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
}
{
// Stars should also translate to `None` in task deletion/cancelation queries
let params = "indexUids=*&statuses=succeeded,*&afterEnqueuedAt=2012-04-23&uids=1,2,3";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
snapshot!(format!("{:?}", query), @"TaskDeletionOrCancelationQuery { uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: Star, index_uids: Star, after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
}
{
// Star in from not allowed
let params = "uids=*&from=*";
let err = deserr_query_params::<TasksFilterQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `from`: could not parse `*` as a positive integer",
"code": "invalid_task_from",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_from"
}
"###);
}
{
// From not allowed in task deletion/cancelation queries
let params = "from=12";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Unknown parameter `from`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
{
// Limit not allowed in task deletion/cancelation queries
let params = "limit=12";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Unknown parameter `limit`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
}
#[test]
fn deserialize_task_delete_or_cancel_empty() {
{
let params = "";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
assert!(query.is_empty());
}
{
let params = "statuses=*";
let query = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap();
assert!(!query.is_empty());
snapshot!(format!("{query:?}"), @"TaskDeletionOrCancelationQuery { uids: None, canceled_by: None, types: None, statuses: Star, index_uids: None, after_enqueued_at: None, before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
}
}
}

View file

@ -0,0 +1,910 @@
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::fmt;
use std::iter::Zip;
use std::rc::Rc;
use std::str::FromStr as _;
use std::time::Duration;
use std::vec::{IntoIter, Vec};
use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures};
use indexmap::IndexMap;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{
InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
InvalidSearchOffset,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
use roaring::RoaringBitmap;
use serde::Serialize;
use super::ranking_rules::{self, RankingRules};
use super::{
compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
};
use crate::error::MeilisearchHttpError;
use crate::routes::indexes::search::search_kind;
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FederationOptions {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
pub weight: Weight,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
pub struct Weight(f64);
impl Default for Weight {
fn default() -> Self {
Weight(DEFAULT_FEDERATED_WEIGHT)
}
}
impl std::convert::TryFrom<f64> for Weight {
type Error = InvalidMultiSearchWeight;
fn try_from(f: f64) -> Result<Self, Self::Error> {
if f < 0.0 {
Err(InvalidMultiSearchWeight)
} else {
Ok(Weight(f))
}
}
}
impl std::ops::Deref for Weight {
type Target = f64;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct Federation {
#[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
pub merge_facets: Option<MergeFacets>,
}
#[derive(Copy, Clone, Debug, deserr::Deserr, Default)]
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
pub struct MergeFacets {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
pub max_values_per_facet: Option<usize>,
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FederatedSearch {
pub queries: Vec<SearchQueryWithIndex>,
#[deserr(default)]
pub federation: Option<Federation>,
}
#[derive(Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct FederatedSearchResult {
pub hits: Vec<SearchHit>,
pub processing_time_ms: u128,
#[serde(flatten)]
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(skip_serializing_if = "FederatedFacets::is_empty")]
pub facets_by_index: FederatedFacets,
// These fields are only used for analytics purposes
#[serde(skip)]
pub degraded: bool,
#[serde(skip)]
pub used_negative_operator: bool,
}
impl fmt::Debug for FederatedSearchResult {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let FederatedSearchResult {
hits,
processing_time_ms,
hits_info,
semantic_hit_count,
degraded,
used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
} = self;
let mut debug = f.debug_struct("SearchResult");
// The most important thing when looking at a search result is the time it took to process
debug.field("processing_time_ms", &processing_time_ms);
debug.field("hits", &format!("[{} hits returned]", hits.len()));
debug.field("hits_info", &hits_info);
if *used_negative_operator {
debug.field("used_negative_operator", used_negative_operator);
}
if *degraded {
debug.field("degraded", degraded);
}
if let Some(facet_distribution) = facet_distribution {
debug.field("facet_distribution", &facet_distribution);
}
if let Some(facet_stats) = facet_stats {
debug.field("facet_stats", &facet_stats);
}
if let Some(semantic_hit_count) = semantic_hit_count {
debug.field("semantic_hit_count", &semantic_hit_count);
}
if !facets_by_index.is_empty() {
debug.field("facets_by_index", &facets_by_index);
}
debug.finish()
}
}
struct WeightedScore<'a> {
details: &'a [ScoreDetails],
weight: f64,
}
impl<'a> WeightedScore<'a> {
pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
Self { details, weight }
}
pub fn weighted_global_score(&self) -> f64 {
ScoreDetails::global_score(self.details.iter()) * self.weight
}
pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
self.weighted_global_score()
.partial_cmp(&other.weighted_global_score())
// both are numbers, possibly infinite
.unwrap()
}
pub fn compare(&self, other: &Self) -> Ordering {
let mut left_it = ScoreDetails::score_values(self.details.iter());
let mut right_it = ScoreDetails::score_values(other.details.iter());
loop {
let left = left_it.next();
let right = right_it.next();
match (left, right) {
(None, None) => return Ordering::Equal,
(None, Some(_)) => return Ordering::Less,
(Some(_), None) => return Ordering::Greater,
(Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
let left = left * self.weight;
let right = right * other.weight;
if (left - right).abs() <= f64::EPSILON {
continue;
}
return left.partial_cmp(&right).unwrap();
}
(Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
match left.partial_cmp(right) {
Some(Ordering::Equal) => continue,
Some(order) => return order,
None => return self.compare_weighted_global_scores(other),
}
}
(Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
match left.partial_cmp(right) {
Some(Ordering::Equal) => continue,
Some(order) => return order,
None => {
return self.compare_weighted_global_scores(other);
}
}
}
// not comparable details, use global
(Some(ScoreValue::Score(_)), Some(_))
| (Some(_), Some(ScoreValue::Score(_)))
| (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
| (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
let left_count = left_it.count();
let right_count = right_it.count();
// compare how many remaining groups of rules each side has.
// the group with the most remaining groups wins.
return left_count
.cmp(&right_count)
// breaks ties with the global ranking score
.then_with(|| self.compare_weighted_global_scores(other));
}
}
}
}
}
struct QueryByIndex {
query: SearchQuery,
federation_options: FederationOptions,
query_index: usize,
}
struct SearchResultByQuery<'a> {
documents_ids: Vec<DocumentId>,
document_scores: Vec<Vec<ScoreDetails>>,
federation_options: FederationOptions,
hit_maker: HitMaker<'a>,
query_index: usize,
}
struct SearchResultByQueryIter<'a> {
it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
federation_options: FederationOptions,
hit_maker: Rc<HitMaker<'a>>,
query_index: usize,
}
impl<'a> SearchResultByQueryIter<'a> {
fn new(
SearchResultByQuery {
documents_ids,
document_scores,
federation_options,
hit_maker,
query_index,
}: SearchResultByQuery<'a>,
) -> Self {
let it = documents_ids.into_iter().zip(document_scores);
Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
}
}
struct SearchResultByQueryIterItem<'a> {
docid: DocumentId,
score: Vec<ScoreDetails>,
federation_options: FederationOptions,
hit_maker: Rc<HitMaker<'a>>,
query_index: usize,
}
fn merge_index_local_results(
results_by_query: Vec<SearchResultByQuery<'_>>,
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
itertools::kmerge_by(
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
match left_score.compare(&right_score) {
// the biggest score goes first
Ordering::Greater => true,
// break ties using query index
Ordering::Equal => left.query_index < right.query_index,
Ordering::Less => false,
}
},
)
}
fn merge_index_global_results(
results_by_index: Vec<SearchResultByIndex>,
) -> impl Iterator<Item = SearchHitByIndex> {
itertools::kmerge_by(
results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
|left: &SearchHitByIndex, right: &SearchHitByIndex| {
let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
match left_score.compare(&right_score) {
// the biggest score goes first
Ordering::Greater => true,
// break ties using query index
Ordering::Equal => left.query_index < right.query_index,
Ordering::Less => false,
}
},
)
}
impl<'a> Iterator for SearchResultByQueryIter<'a> {
type Item = SearchResultByQueryIterItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
let (docid, score) = self.it.next()?;
Some(SearchResultByQueryIterItem {
docid,
score,
federation_options: self.federation_options,
hit_maker: Rc::clone(&self.hit_maker),
query_index: self.query_index,
})
}
}
struct SearchHitByIndex {
hit: SearchHit,
score: Vec<ScoreDetails>,
federation_options: FederationOptions,
query_index: usize,
}
struct SearchResultByIndex {
index: String,
hits: Vec<SearchHitByIndex>,
estimated_total_hits: usize,
degraded: bool,
used_negative_operator: bool,
facets: Option<ComputedFacets>,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
impl FederatedFacets {
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
if let Some(facets) = facets {
self.0.insert(index, facets);
}
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn merge(
self,
MergeFacets { max_values_per_facet }: MergeFacets,
facet_order: BTreeMap<String, (String, OrderBy)>,
) -> Option<ComputedFacets> {
if self.is_empty() {
return None;
}
let mut distribution: BTreeMap<String, _> = Default::default();
let mut stats: BTreeMap<String, FacetStats> = Default::default();
for facets_by_index in self.0.into_values() {
for (facet, index_distribution) in facets_by_index.distribution {
match distribution.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_distribution);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let distribution = entry.get_mut();
for (value, index_count) in index_distribution {
distribution
.entry(value)
.and_modify(|count| *count += index_count)
.or_insert(index_count);
}
}
}
}
for (facet, index_stats) in facets_by_index.stats {
match stats.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_stats);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let stats = entry.get_mut();
stats.min = f64::min(stats.min, index_stats.min);
stats.max = f64::max(stats.max, index_stats.max);
}
}
}
}
// fixup order
for (facet, values) in &mut distribution {
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
match order_by {
OrderBy::Lexicographic => {
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
}
OrderBy::Count => {
values.sort_unstable_by(|_, left, _, right| {
left.cmp(right)
// biggest first
.reverse()
})
}
}
if let Some(max_values_per_facet) = max_values_per_facet {
values.truncate(max_values_per_facet)
};
}
Some(ComputedFacets { distribution, stats })
}
}
pub fn perform_federated_search(
index_scheduler: &IndexScheduler,
queries: Vec<SearchQueryWithIndex>,
mut federation: Federation,
features: RoFeatures,
) -> Result<FederatedSearchResult, ResponseError> {
let before_search = std::time::Instant::now();
// this implementation partition the queries by index to guarantee an important property:
// - all the queries to a particular index use the same read transaction.
// This is an important property, otherwise we cannot guarantee the self-consistency of the results.
// 1. partition queries by index
let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
for (query_index, federated_query) in queries.into_iter().enumerate() {
if let Some(pagination_field) = federated_query.has_pagination() {
return Err(MeilisearchHttpError::PaginationInFederatedQuery(
query_index,
pagination_field,
)
.into());
}
if let Some(facets) = federated_query.has_facets() {
let facets = facets.to_owned();
return Err(MeilisearchHttpError::FacetsInFederatedQuery(
query_index,
federated_query.index_uid.into_inner(),
facets,
)
.into());
}
let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
query,
federation_options: federation_options.unwrap_or_default(),
query_index,
})
}
// 2. perform queries, merge and make hits index by index
let required_hit_count = federation.limit + federation.offset;
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
// Then in step (3), we'll update its value if there is any semantic search
let mut semantic_hit_count = None;
let mut results_by_index = Vec::with_capacity(queries_by_index.len());
let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
// remember the order and name of first index for each facet when merging with index settings
// to detect if the order is inconsistent for a facet.
let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
{
Some(MergeFacets { .. }) => Some(Default::default()),
_ => None,
};
for (index_uid, queries) in queries_by_index {
let first_query_index = queries.first().map(|query| query.query_index);
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
if let Some(query_index) = first_query_index {
err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
}
return Err(err);
}
};
// Important: this is the only transaction we'll use for this index during this federated search
let rtxn = index.read_txn()?;
let criteria = index.criteria(&rtxn)?;
let dictionary = index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
let separators = index.allowed_separators(&rtxn)?;
let separators: Option<Vec<_>> =
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
// each query gets its individual cutoff
let cutoff = index.search_cutoff(&rtxn)?;
let mut degraded = false;
let mut used_negative_operator = false;
let mut candidates = RoaringBitmap::new();
let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
// TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
return Err(error);
}
// 2.1. Compute all candidates for each query in the index
let mut results_by_query = Vec::with_capacity(queries.len());
for QueryByIndex { query, federation_options, query_index } in queries {
// use an immediately invoked lambda to capture the result without returning from the function
let res: Result<(), ResponseError> = (|| {
let search_kind = search_kind(&query, index_scheduler, &index, features)?;
let canonicalization_kind = match (&search_kind, &query.q) {
(SearchKind::SemanticOnly { .. }, _) => {
ranking_rules::CanonicalizationKind::Vector
}
(_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
_ => ranking_rules::CanonicalizationKind::Placeholder,
};
let sort = if let Some(sort) = &query.sort {
let sorts: Vec<_> =
match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(milli::SortError::from(
asc_desc_error,
))
.into())
}
};
Some(sorts)
} else {
None
};
let ranking_rules = ranking_rules::RankingRules::new(
criteria.clone(),
sort,
query.matching_strategy.into(),
canonicalization_kind,
);
if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
previous_query_data.take()
{
if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
return Err(error.to_response_error(
&ranking_rules,
&previous_ranking_rules,
query_index,
previous_query_index,
&index_uid,
&previous_index_uid,
));
}
previous_query_data = if previous_ranking_rules.constraint_count()
> ranking_rules.constraint_count()
{
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
} else {
Some((ranking_rules, query_index, index_uid.clone()))
};
} else {
previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
}
match search_kind {
SearchKind::KeywordOnly => {}
_ => semantic_hit_count = Some(0),
}
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
let time_budget = match cutoff {
Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
None => TimeBudget::default(),
};
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?;
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
search.offset(0);
search.limit(required_hit_count);
let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
let format = AttributesFormat {
attributes_to_retrieve: query.attributes_to_retrieve,
retrieve_vectors,
attributes_to_highlight: query.attributes_to_highlight,
attributes_to_crop: query.attributes_to_crop,
crop_length: query.crop_length,
crop_marker: query.crop_marker,
highlight_pre_tag: query.highlight_pre_tag,
highlight_post_tag: query.highlight_post_tag,
show_matches_position: query.show_matches_position,
sort: query.sort,
show_ranking_score: query.show_ranking_score,
show_ranking_score_details: query.show_ranking_score_details,
locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()),
};
let milli::SearchResult {
matching_words,
candidates: query_candidates,
documents_ids,
document_scores,
degraded: query_degraded,
used_negative_operator: query_used_negative_operator,
} = result;
candidates |= query_candidates;
degraded |= query_degraded;
used_negative_operator |= query_used_negative_operator;
let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
results_by_query.push(SearchResultByQuery {
federation_options,
hit_maker,
query_index,
documents_ids,
document_scores,
});
Ok(())
})();
if let Err(mut error) = res {
error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
return Err(error);
}
}
// 2.2. merge inside index
let mut documents_seen = RoaringBitmap::new();
let merged_result: Result<Vec<_>, ResponseError> =
merge_index_local_results(results_by_query)
// skip documents we've already seen & mark that we saw the current document
.filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
.take(required_hit_count)
// 2.3 make hits
.map(
|SearchResultByQueryIterItem {
docid,
score,
federation_options,
hit_maker,
query_index,
}| {
let mut hit = hit_maker.make_hit(docid, &score)?;
let weighted_score =
ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
let _federation = serde_json::json!(
{
"indexUid": index_uid,
"queriesPosition": query_index,
"weightedRankingScore": weighted_score,
}
);
hit.document.insert("_federation".to_string(), _federation);
Ok(SearchHitByIndex { hit, score, federation_options, query_index })
},
)
.collect();
let merged_result = merged_result?;
let estimated_total_hits = candidates.len() as usize;
let facets = facets_by_index
.map(|facets_by_index| {
compute_facet_distribution_stats(
&facets_by_index,
&index,
&rtxn,
candidates,
super::Route::MultiSearch,
)
})
.transpose()
.map_err(|mut error| {
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
error.message,
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
error
})?;
results_by_index.push(SearchResultByIndex {
index: index_uid,
hits: merged_result,
estimated_total_hits,
degraded,
used_negative_operator,
facets,
});
}
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
for (index_uid, facets) in federation.facets_by_index {
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
err.message
);
return Err(err);
}
};
// Important: this is the only transaction we'll use for this index during this federated search
let rtxn = index.read_txn()?;
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
);
return Err(error);
}
if let Some(facets) = facets {
if let Err(mut error) = compute_facet_distribution_stats(
&facets,
&index,
&rtxn,
Default::default(),
super::Route::MultiSearch,
) {
error.message =
format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
return Err(error);
}
}
}
// 3. merge hits and metadata across indexes
// 3.1 merge metadata
let (estimated_total_hits, degraded, used_negative_operator, facets) = {
let mut estimated_total_hits = 0;
let mut degraded = false;
let mut used_negative_operator = false;
let mut facets: FederatedFacets = FederatedFacets::default();
for SearchResultByIndex {
index,
hits: _,
estimated_total_hits: estimated_total_hits_by_index,
facets: facets_by_index,
degraded: degraded_by_index,
used_negative_operator: used_negative_operator_by_index,
} in &mut results_by_index
{
estimated_total_hits += *estimated_total_hits_by_index;
degraded |= *degraded_by_index;
used_negative_operator |= *used_negative_operator_by_index;
let facets_by_index = std::mem::take(facets_by_index);
let index = std::mem::take(index);
facets.insert(index, facets_by_index);
}
(estimated_total_hits, degraded, used_negative_operator, facets)
};
// 3.2 merge hits
let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
.skip(federation.offset)
.take(federation.limit)
.inspect(|hit| {
if let Some(semantic_hit_count) = &mut semantic_hit_count {
if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
*semantic_hit_count += 1;
}
}
})
.map(|hit| hit.hit)
.collect();
let (facet_distribution, facet_stats, facets_by_index) =
match federation.merge_facets.zip(facet_order) {
Some((merge_facets, facet_order)) => {
let facets = facets.merge(merge_facets, facet_order);
let (facet_distribution, facet_stats) = facets
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
(facet_distribution, facet_stats, FederatedFacets::default())
}
None => (None, None, facets),
};
let search_result = FederatedSearchResult {
hits: merged_hits,
processing_time_ms: before_search.elapsed().as_millis(),
hits_info: HitsInfo::OffsetLimit {
limit: federation.limit,
offset: federation.offset,
estimated_total_hits,
},
semantic_hit_count,
degraded,
used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
};
Ok(search_result)
}
fn check_facet_order(
facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
current_index: &str,
facets_by_index: &Option<Vec<String>>,
index: &milli::Index,
rtxn: &milli::heed::RoTxn<'_>,
) -> Result<(), ResponseError> {
if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
let index_facet_order = index.sort_facet_values_by(rtxn)?;
for facet in facets_by_index {
let index_facet_order = index_facet_order.get(facet);
let (previous_index, previous_facet_order) = facet_order
.entry(facet.to_owned())
.or_insert_with(|| (current_index.to_owned(), index_facet_order));
if previous_facet_order != &index_facet_order {
return Err(MeilisearchHttpError::InconsistentFacetOrder {
facet: facet.clone(),
previous_facet_order: *previous_facet_order,
previous_uid: previous_index.clone(),
current_uid: current_index.to_owned(),
index_facet_order,
}
.into());
}
}
};
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,823 @@
use std::collections::HashMap;
use std::fmt::Write;
use itertools::Itertools as _;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy};
pub struct RankingRules {
canonical_criteria: Vec<Criterion>,
canonical_sort: Option<Vec<AscDesc>>,
canonicalization_actions: Vec<CanonicalizationAction>,
source_criteria: Vec<Criterion>,
source_sort: Option<Vec<AscDesc>>,
}
pub enum CanonicalizationAction {
PrependedWords {
prepended_index: RankingRuleSource,
},
RemovedDuplicate {
earlier_occurrence: RankingRuleSource,
removed_occurrence: RankingRuleSource,
},
RemovedWords {
reason: RemoveWords,
removed_occurrence: RankingRuleSource,
},
RemovedPlaceholder {
removed_occurrence: RankingRuleSource,
},
TruncatedVector {
vector_rule: RankingRuleSource,
truncated_from: RankingRuleSource,
},
RemovedVector {
vector_rule: RankingRuleSource,
removed_occurrence: RankingRuleSource,
},
RemovedSort {
removed_occurrence: RankingRuleSource,
},
}
pub enum RemoveWords {
WasPrepended,
MatchingStrategyAll,
}
impl std::fmt::Display for RemoveWords {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let reason = match self {
RemoveWords::WasPrepended => "it was previously prepended",
RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`",
};
f.write_str(reason)
}
}
pub enum CanonicalizationKind {
Placeholder,
Keyword,
Vector,
}
pub struct CompatibilityError {
previous: RankingRule,
current: RankingRule,
}
impl CompatibilityError {
pub(crate) fn to_response_error(
&self,
ranking_rules: &RankingRules,
previous_ranking_rules: &RankingRules,
query_index: usize,
previous_query_index: usize,
index_uid: &str,
previous_index_uid: &str,
) -> meilisearch_types::error::ResponseError {
let rule = self.current.as_string(
&ranking_rules.canonical_criteria,
&ranking_rules.canonical_sort,
query_index,
index_uid,
);
let previous_rule = self.previous.as_string(
&previous_ranking_rules.canonical_criteria,
&previous_ranking_rules.canonical_sort,
previous_query_index,
previous_index_uid,
);
let canonicalization_actions = ranking_rules.canonicalization_notes();
let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes();
let mut msg = String::new();
let reason = self.reason();
let _ = writeln!(
&mut msg,
"The results of queries #{previous_query_index} and #{query_index} are incompatible: "
);
let _ = writeln!(&mut msg, " 1. {previous_rule}");
let _ = writeln!(&mut msg, " 2. {rule}");
let _ = writeln!(&mut msg, " - {reason}");
if !previous_canonicalization_actions.is_empty() {
let _ = write!(&mut msg, " - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}");
}
if !canonicalization_actions.is_empty() {
let _ = write!(&mut msg, " - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}");
}
ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules)
}
pub fn reason(&self) -> &'static str {
match (self.previous.kind, self.current.kind) {
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort)
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort)
| (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy)
| (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => {
"cannot compare a relevancy rule with a sort rule"
}
(RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => {
"cannot compare a relevancy rule with a geosort rule"
}
(RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => {
"cannot compare two sort rules in opposite directions"
}
(RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => {
"cannot compare a sort rule with a geosort rule"
}
(RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => {
"cannot compare two geosort rules in opposite directions"
}
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy)
| (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort)
| (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort)
| (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort)
| (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => {
"internal error, comparison should be possible"
}
}
}
}
impl RankingRules {
pub fn new(
criteria: Vec<Criterion>,
sort: Option<Vec<AscDesc>>,
terms_matching_strategy: TermsMatchingStrategy,
canonicalization_kind: CanonicalizationKind,
) -> Self {
let (canonical_criteria, canonical_sort, canonicalization_actions) =
Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind);
Self {
canonical_criteria,
canonical_sort,
canonicalization_actions,
source_criteria: criteria,
source_sort: sort,
}
}
fn canonicalize(
criteria: &[Criterion],
sort: &Option<Vec<AscDesc>>,
terms_matching_strategy: TermsMatchingStrategy,
canonicalization_kind: CanonicalizationKind,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
match canonicalization_kind {
CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort),
CanonicalizationKind::Keyword => {
Self::canonicalize_keyword(criteria, sort, terms_matching_strategy)
}
CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort),
}
}
fn canonicalize_placeholder(
criteria: &[Criterion],
sort_query: &Option<Vec<AscDesc>>,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
let mut sort = None;
let mut sorted_fields = HashMap::new();
let mut canonicalization_actions = Vec::new();
let mut canonical_criteria = Vec::new();
let mut canonical_sort = None;
for (criterion_index, criterion) in criteria.iter().enumerate() {
match criterion.clone() {
Criterion::Words
| Criterion::Typo
| Criterion::Proximity
| Criterion::Attribute
| Criterion::Exactness => {
canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
Criterion::Sort => {
if let Some(previous_index) = sort {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else if let Some(sort_query) = sort_query {
sort = Some(criterion_index);
canonical_criteria.push(criterion.clone());
canonical_sort = Some(canonicalize_sort(
&mut sorted_fields,
sort_query.as_slice(),
criterion_index,
&mut canonicalization_actions,
));
} else {
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
}
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(RankingRuleSource::Criterion(criterion_index));
canonical_criteria.push(criterion.clone())
}
},
}
}
(canonical_criteria, canonical_sort, canonicalization_actions)
}
fn canonicalize_vector(
criteria: &[Criterion],
sort_query: &Option<Vec<AscDesc>>,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
let mut sort = None;
let mut sorted_fields = HashMap::new();
let mut canonicalization_actions = Vec::new();
let mut canonical_criteria = Vec::new();
let mut canonical_sort = None;
let mut vector = None;
'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() {
match criterion.clone() {
Criterion::Words
| Criterion::Typo
| Criterion::Proximity
| Criterion::Attribute
| Criterion::Exactness => match vector {
Some(previous_occurrence) => {
if sorted_fields.is_empty() {
canonicalization_actions.push(CanonicalizationAction::RemovedVector {
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else {
canonicalization_actions.push(
CanonicalizationAction::TruncatedVector {
vector_rule: RankingRuleSource::Criterion(previous_occurrence),
truncated_from: RankingRuleSource::Criterion(criterion_index),
},
);
break 'criteria;
}
}
None => {
canonical_criteria.push(criterion.clone());
vector = Some(criterion_index);
}
},
Criterion::Sort => {
if let Some(previous_index) = sort {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else if let Some(sort_query) = sort_query {
sort = Some(criterion_index);
canonical_criteria.push(criterion.clone());
canonical_sort = Some(canonicalize_sort(
&mut sorted_fields,
sort_query.as_slice(),
criterion_index,
&mut canonicalization_actions,
));
} else {
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
}
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(RankingRuleSource::Criterion(criterion_index));
canonical_criteria.push(criterion.clone())
}
},
}
}
(canonical_criteria, canonical_sort, canonicalization_actions)
}
fn canonicalize_keyword(
criteria: &[Criterion],
sort_query: &Option<Vec<AscDesc>>,
terms_matching_strategy: TermsMatchingStrategy,
) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
let mut words = None;
let mut typo = None;
let mut proximity = None;
let mut sort = None;
let mut attribute = None;
let mut exactness = None;
let mut sorted_fields = HashMap::new();
let mut canonical_criteria = Vec::new();
let mut canonical_sort = None;
let mut canonicalization_actions = Vec::new();
for (criterion_index, criterion) in criteria.iter().enumerate() {
let criterion = criterion.clone();
match criterion.clone() {
Criterion::Words => {
if let TermsMatchingStrategy::All = terms_matching_strategy {
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
reason: RemoveWords::MatchingStrategyAll,
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
continue;
}
if let Some(maybe_previous_index) = words {
if let Some(previous_index) = maybe_previous_index {
canonicalization_actions.push(
CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(
previous_index,
),
removed_occurrence: RankingRuleSource::Criterion(
criterion_index,
),
},
);
continue;
}
canonicalization_actions.push(CanonicalizationAction::RemovedWords {
reason: RemoveWords::WasPrepended,
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
words = Some(Some(criterion_index));
canonical_criteria.push(criterion);
}
Criterion::Typo => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut typo,
);
}
Criterion::Proximity => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut proximity,
);
}
Criterion::Attribute => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut attribute,
);
}
Criterion::Exactness => {
canonicalize_criterion(
criterion,
criterion_index,
terms_matching_strategy,
&mut words,
&mut canonicalization_actions,
&mut canonical_criteria,
&mut exactness,
);
}
Criterion::Sort => {
if let Some(previous_index) = sort {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else if let Some(sort_query) = sort_query {
sort = Some(criterion_index);
canonical_criteria.push(criterion);
canonical_sort = Some(canonicalize_sort(
&mut sorted_fields,
sort_query.as_slice(),
criterion_index,
&mut canonicalization_actions,
));
} else {
canonicalization_actions.push(CanonicalizationAction::RemovedSort {
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
})
}
}
Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(RankingRuleSource::Criterion(criterion_index));
canonical_criteria.push(criterion)
}
},
}
}
(canonical_criteria, canonical_sort, canonicalization_actions)
}
pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> {
for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) {
if current.kind != previous.kind {
return Err(CompatibilityError { current, previous });
}
}
Ok(())
}
pub fn constraint_count(&self) -> usize {
self.coalesce_iterator().count()
}
fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ {
self.canonical_criteria
.iter()
.enumerate()
.flat_map(|(criterion_index, criterion)| {
RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort)
})
.coalesce(
|previous @ RankingRule { source: previous_source, kind: previous_kind },
current @ RankingRule { source, kind }| {
match (previous_kind, kind) {
(RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => {
let merged_source = match (previous_source, source) {
(
RankingRuleSource::Criterion(previous),
RankingRuleSource::Criterion(current),
) => RankingRuleSource::CoalescedCriteria(previous, current),
(
RankingRuleSource::CoalescedCriteria(begin, _end),
RankingRuleSource::Criterion(current),
) => RankingRuleSource::CoalescedCriteria(begin, current),
(_previous, current) => current,
};
Ok(RankingRule { source: merged_source, kind })
}
_ => Err((previous, current)),
}
},
)
}
fn canonicalization_notes(&self) -> String {
use CanonicalizationAction::*;
let mut notes = String::new();
for (index, action) in self.canonicalization_actions.iter().enumerate() {
let index = index + 1;
let _ = match action {
PrependedWords { prepended_index } => writeln!(
&mut notes,
" {index}. Prepended rule `words` before first relevancy rule `{}` at position {}",
prepended_index.rule_name(&self.source_criteria, &self.source_sort),
prepended_index.rule_position()
),
RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}",
earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort),
removed_occurrence.rule_position(),
earlier_occurrence.rule_position(),
),
RemovedWords { reason, removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed rule `words` at position {} because {reason}",
removed_occurrence.rule_position()
),
RemovedPlaceholder { removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")",
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
removed_occurrence.rule_position()
),
TruncatedVector { vector_rule, truncated_from } => writeln!(
&mut notes,
" {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}",
truncated_from.rule_name(&self.source_criteria, &self.source_sort),
truncated_from.rule_position(),
vector_rule.rule_position(),
),
RemovedVector { vector_rule, removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}",
removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
removed_occurrence.rule_position(),
vector_rule.rule_position(),
),
RemovedSort { removed_occurrence } => writeln!(
&mut notes,
" {index}. Removed rule `sort` at position {} because `query.sort` is empty",
removed_occurrence.rule_position()
),
};
}
notes
}
}
fn canonicalize_sort(
sorted_fields: &mut HashMap<String, RankingRuleSource>,
sort_query: &[AscDesc],
criterion_index: usize,
canonicalization_actions: &mut Vec<CanonicalizationAction>,
) -> Vec<AscDesc> {
let mut geo_sorted = None;
let mut canonical_sort = Vec::new();
for (sort_index, asc_desc) in sort_query.iter().enumerate() {
let source = RankingRuleSource::Sort { criterion_index, sort_index };
let asc_desc = asc_desc.clone();
match asc_desc.clone() {
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
match sorted_fields.entry(s) {
std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: *entry.get(),
removed_occurrence: source,
}),
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(source);
canonical_sort.push(asc_desc);
}
}
}
AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted {
Some(earlier_sort_index) => {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Sort {
criterion_index,
sort_index: earlier_sort_index,
},
removed_occurrence: source,
})
}
None => {
geo_sorted = Some(sort_index);
canonical_sort.push(asc_desc);
}
},
}
}
canonical_sort
}
fn canonicalize_criterion(
criterion: Criterion,
criterion_index: usize,
terms_matching_strategy: TermsMatchingStrategy,
words: &mut Option<Option<usize>>,
canonicalization_actions: &mut Vec<CanonicalizationAction>,
canonical_criteria: &mut Vec<Criterion>,
rule: &mut Option<usize>,
) {
*words = match (terms_matching_strategy, words.take()) {
(TermsMatchingStrategy::All, words) => words,
(_, None) => {
// inject words
canonicalization_actions.push(CanonicalizationAction::PrependedWords {
prepended_index: RankingRuleSource::Criterion(criterion_index),
});
canonical_criteria.push(Criterion::Words);
Some(None)
}
(_, words) => words,
};
if let Some(previous_index) = *rule {
canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
earlier_occurrence: RankingRuleSource::Criterion(previous_index),
removed_occurrence: RankingRuleSource::Criterion(criterion_index),
});
} else {
*rule = Some(criterion_index);
canonical_criteria.push(criterion)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RankingRuleKind {
Relevancy,
AscendingSort,
DescendingSort,
AscendingGeoSort,
DescendingGeoSort,
}
#[derive(Debug, Clone, Copy)]
pub struct RankingRule {
source: RankingRuleSource,
kind: RankingRuleKind,
}
#[derive(Debug, Clone, Copy)]
pub enum RankingRuleSource {
Criterion(usize),
CoalescedCriteria(usize, usize),
Sort { criterion_index: usize, sort_index: usize },
}
impl RankingRuleSource {
fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String {
match self {
RankingRuleSource::Criterion(criterion_index) => criteria
.get(*criterion_index)
.map(|c| c.to_string())
.unwrap_or_else(|| "unknown".into()),
RankingRuleSource::CoalescedCriteria(begin, end) => {
let rules: Vec<_> = criteria
.get(*begin..=*end)
.iter()
.flat_map(|c| c.iter())
.map(|c| c.to_string())
.collect();
rules.join(", ")
}
RankingRuleSource::Sort { criterion_index: _, sort_index } => {
match sort.as_deref().and_then(|sort| sort.get(*sort_index)) {
Some(sort) => match sort {
AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"),
AscDesc::Desc(Member::Field(field_name)) => {
format!("{field_name}:desc")
}
AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(),
AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(),
},
None => "unknown".into(),
}
}
}
}
fn rule_position(&self) -> String {
match self {
RankingRuleSource::Criterion(criterion_index) => {
format!("#{criterion_index} in ranking rules")
}
RankingRuleSource::CoalescedCriteria(begin, end) => {
format!("#{begin} to #{end} in ranking rules")
}
RankingRuleSource::Sort { criterion_index, sort_index } => format!(
"#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)"
),
}
}
}
impl RankingRule {
fn from_criterion<'a>(
criterion_index: usize,
criterion: &'a Criterion,
sort: &'a Option<Vec<AscDesc>>,
) -> impl Iterator<Item = Self> + 'a {
let kind = match criterion {
Criterion::Words
| Criterion::Typo
| Criterion::Proximity
| Criterion::Attribute
| Criterion::Exactness => RankingRuleKind::Relevancy,
Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort,
Criterion::Asc(_) => RankingRuleKind::AscendingSort,
Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort,
Criterion::Desc(_) => RankingRuleKind::DescendingSort,
Criterion::Sort => {
return either::Right(sort.iter().flatten().enumerate().map(
move |(rule_index, asc_desc)| {
Self::from_asc_desc(asc_desc, criterion_index, rule_index)
},
))
}
};
either::Left(std::iter::once(Self {
source: RankingRuleSource::Criterion(criterion_index),
kind,
}))
}
fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self {
let kind = match asc_desc {
AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort,
AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort,
AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort,
AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort,
};
Self {
source: RankingRuleSource::Sort {
criterion_index: sort_index,
sort_index: rule_index_in_sort,
},
kind,
}
}
fn as_string(
&self,
canonical_criteria: &[Criterion],
canonical_sort: &Option<Vec<AscDesc>>,
query_index: usize,
index_uid: &str,
) -> String {
let kind = match self.kind {
RankingRuleKind::Relevancy => "relevancy",
RankingRuleKind::AscendingSort => "ascending sort",
RankingRuleKind::DescendingSort => "descending sort",
RankingRuleKind::AscendingGeoSort => "ascending geo sort",
RankingRuleKind::DescendingGeoSort => "descending geo sort",
};
let rules = self.fetch_from_source(canonical_criteria, canonical_sort);
let source = match self.source {
RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"),
RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
};
format!("{source}: {kind} {rules}")
}
fn fetch_from_source(
&self,
canonical_criteria: &[Criterion],
canonical_sort: &Option<Vec<AscDesc>>,
) -> String {
let rule_name = match self.source {
RankingRuleSource::Criterion(index) => {
canonical_criteria.get(index).map(|criterion| criterion.to_string())
}
RankingRuleSource::CoalescedCriteria(begin, end) => {
let rules: Vec<String> = canonical_criteria
.get(begin..=end)
.into_iter()
.flat_map(|criteria| criteria.iter())
.map(|criterion| criterion.to_string())
.collect();
(!rules.is_empty()).then_some(rules.join(", "))
}
RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort
.as_deref()
.and_then(|canonical_sort| canonical_sort.get(sort_index))
.and_then(|asc_desc: &AscDesc| match asc_desc {
AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
Some(format!("on field `{s}`"))
}
_ => None,
}),
};
let rule_name = rule_name.unwrap_or_else(|| "default".into());
format!("rule(s) {rule_name}")
}
}

View file

@ -0,0 +1,170 @@
//! This file implements a queue of searches to process and the ability to control how many searches can be run in parallel.
//! We need this because we don't want to process more search requests than we have cores.
//! That slows down everything and consumes RAM for no reason.
//! The steps to do a search are to get the `SearchQueue` data structure and try to get a search permit.
//! This can fail if the queue is full, and we need to drop your search request to register a new one.
//!
//! ### How to do a search request
//!
//! In order to do a search request you should try to get a search permit.
//! Retrieve the `SearchQueue` structure from actix-web (`search_queue: Data<SearchQueue>`)
//! and right before processing the search, calls the `SearchQueue::try_get_search_permit` method: `search_queue.try_get_search_permit().await?;`
//!
//! What is going to happen at this point is that you're going to send a oneshot::Sender over an async mpsc channel.
//! Then, the queue/scheduler is going to either:
//! - Drop your oneshot channel => that means there are too many searches going on, and yours won't be executed.
//! You should exit and free all the RAM you use ASAP.
//! - Sends you a Permit => that will unlock the method, and you will be able to process your search.
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
use std::num::NonZeroUsize;
use std::time::Duration;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use tokio::sync::{mpsc, oneshot};
use crate::error::MeilisearchHttpError;
#[derive(Debug)]
pub struct SearchQueue {
sender: mpsc::Sender<oneshot::Sender<Permit>>,
capacity: usize,
/// If we have waited longer than this to get a permit, we should abort the search request entirely.
/// The client probably already closed the connection, but we have no way to find out.
time_to_abort: Duration,
}
/// You should only run search requests while holding this permit.
/// Once it's dropped, a new search request will be able to process.
/// You should always try to drop the permit yourself calling the `drop` async method on it.
#[derive(Debug)]
pub struct Permit {
sender: mpsc::Sender<()>,
}
impl Permit {
/// Drop the permit giving back on permit to the search queue.
pub async fn drop(self) {
// if the channel is closed then the whole instance is down
let _ = self.sender.send(()).await;
}
}
impl Drop for Permit {
/// The implicit drop implementation can still be called in multiple cases:
/// - We forgot to call the explicit one somewhere => this should be fixed on our side asap
/// - The future is cancelled while running and the permit dropped with it
fn drop(&mut self) {
let sender = self.sender.clone();
// if the channel is closed then the whole instance is down
std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
}
}
impl SearchQueue {
pub fn new(capacity: usize, paralellism: NonZeroUsize) -> Self {
// Search requests are going to wait until we're available anyway,
// so let's not allocate any RAM and keep a capacity of 1.
let (sender, receiver) = mpsc::channel(1);
tokio::task::spawn(Self::run(capacity, paralellism, receiver));
Self { sender, capacity, time_to_abort: Duration::from_secs(60) }
}
pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
Self { time_to_abort, ..self }
}
/// This function is the main loop, it's in charge on scheduling which search request should execute first and
/// how many should executes at the same time.
///
/// It **must never** panic or exit.
async fn run(
capacity: usize,
parallelism: NonZeroUsize,
mut receive_new_searches: mpsc::Receiver<oneshot::Sender<Permit>>,
) {
let mut queue: Vec<oneshot::Sender<Permit>> = Default::default();
let mut rng: StdRng = StdRng::from_entropy();
let mut searches_running: usize = 0;
// By having a capacity of parallelism we ensures that every time a search finish it can release its RAM asap
let (sender, mut search_finished) = mpsc::channel(parallelism.into());
loop {
tokio::select! {
// biased select because we wants to free up space before trying to register new tasks
biased;
_ = search_finished.recv() => {
searches_running = searches_running.saturating_sub(1);
if !queue.is_empty() {
// Can't panic: the queue wasn't empty thus the range isn't empty.
let remove = rng.gen_range(0..queue.len());
let channel = queue.swap_remove(remove);
let _ = channel.send(Permit { sender: sender.clone() });
}
},
search_request = receive_new_searches.recv() => {
let search_request = match search_request {
Some(search_request) => search_request,
// This should never happen while actix-web is running, but it's not a reason to crash
// and it can generate a lot of noise in the tests.
None => continue,
};
if searches_running < usize::from(parallelism) && queue.is_empty() {
searches_running += 1;
// if the search requests die it's not a hard error on our side
let _ = search_request.send(Permit { sender: sender.clone() });
continue;
} else if capacity == 0 {
// in the very specific case where we have a capacity of zero
// we must refuse the request straight away without going through
// the queue stuff.
drop(search_request);
continue;
} else if queue.len() >= capacity {
let remove = rng.gen_range(0..queue.len());
let thing = queue.swap_remove(remove); // this will drop the channel and notify the search that it won't be processed
drop(thing);
}
queue.push(search_request);
},
}
}
}
/// Returns a search `Permit`.
/// It should be dropped as soon as you've freed all the RAM associated with the search request being processed.
pub async fn try_get_search_permit(&self) -> Result<Permit, MeilisearchHttpError> {
let now = std::time::Instant::now();
let (sender, receiver) = oneshot::channel();
self.sender.send(sender).await.map_err(|_| MeilisearchHttpError::SearchLimiterIsDown)?;
let permit = receiver
.await
.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))?;
// If we've been for more than one minute to get a search permit, it's better to simply
// abort the search request than spending time processing something were the client
// most certainly exited or got a timeout a long time ago.
// We may find a better solution in https://github.com/actix/actix-web/issues/3462.
if now.elapsed() > self.time_to_abort {
permit.drop().await;
Err(MeilisearchHttpError::TooManySearchRequests(self.capacity))
} else {
Ok(permit)
}
}
/// Returns `Ok(())` if everything seems normal.
/// Returns `Err(MeilisearchHttpError::SearchLimiterIsDown)` if the search limiter seems down.
pub fn health(&self) -> Result<(), MeilisearchHttpError> {
if self.sender.is_closed() {
Err(MeilisearchHttpError::SearchLimiterIsDown)
} else {
Ok(())
}
}
}