Renames meilisearch-http to meilisearch

This commit is contained in:
Colby Allen 2022-12-07 08:20:47 -07:00
parent ded2a50d14
commit ad2b1467da
83 changed files with 32 additions and 36 deletions

View file

@ -0,0 +1,63 @@
use std::any::Any;
use std::sync::Arc;
use actix_web::HttpRequest;
use meilisearch_types::InstanceUid;
use serde_json::Value;
use super::{find_user_id, Analytics, DocumentDeletionKind};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQueryRaw;
use crate::Opt;
pub struct MockAnalytics {
instance_uid: Option<InstanceUid>,
}
#[derive(Default)]
pub struct SearchAggregator;
#[allow(dead_code)]
impl SearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
let instance_uid = find_user_id(&opt.db_path);
Arc::new(Self { instance_uid })
}
}
impl Analytics for MockAnalytics {
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
self.instance_uid.as_ref()
}
// These methods are noop and should be optimized out
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
fn update_documents(
&self,
_documents_query: &UpdateDocumentsQuery,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn get_tasks(&self, _query: &TasksFilterQueryRaw, _request: &HttpRequest) {}
fn health_seen(&self, _request: &HttpRequest) {}
}

View file

@ -0,0 +1,101 @@
mod mock_analytics;
// if we are in release mode and the feature analytics was enabled
#[cfg(all(not(debug_assertions), feature = "analytics"))]
mod segment_analytics;
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use actix_web::HttpRequest;
use meilisearch_types::InstanceUid;
pub use mock_analytics::MockAnalytics;
use once_cell::sync::Lazy;
use platform_dirs::AppDirs;
use serde_json::Value;
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQueryRaw;
// if we are in debug mode OR the analytics feature is disabled
// the `SegmentAnalytics` point to the mock instead of the real analytics
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SearchAggregator = segment_analytics::SearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
/// `~/Library/ApplicationSupport` on macOS.
/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows.
static MEILISEARCH_CONFIG_PATH: Lazy<Option<PathBuf>> =
Lazy::new(|| AppDirs::new(Some("Meilisearch"), false).map(|appdir| appdir.config_dir));
fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
db_path
.canonicalize()
.ok()
.map(|path| path.join("instance-uid").display().to_string().replace('/', "-"))
.zip(MEILISEARCH_CONFIG_PATH.as_ref())
.map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-')))
}
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
fs::read_to_string(db_path.join("instance-uid"))
.ok()
.or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok())
.and_then(|uid| InstanceUid::from_str(&uid).ok())
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentDeletionKind {
PerDocumentId,
ClearAll,
PerBatch,
}
pub trait Analytics: Sync + Send {
fn instance_uid(&self) -> Option<&InstanceUid>;
/// The method used to publish most analytics that do not need to be batched every hours
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
/// This method should be called to aggregate a get search
fn get_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a post search
fn post_search(&self, aggregate: SearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,
documents_query: &UpdateDocumentsQuery,
index_creation: bool,
request: &HttpRequest,
);
// this method should be called to aggregate a add documents request
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
// this method should be called to batch a update documents request
fn update_documents(
&self,
documents_query: &UpdateDocumentsQuery,
index_creation: bool,
request: &HttpRequest,
);
// this method should be called to aggregate the get tasks requests.
fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest);
// this method should be called to aggregate a add documents request
fn health_seen(&self, request: &HttpRequest);
}

View file

@ -0,0 +1,991 @@
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest;
use byte_unit::Byte;
use http::header::CONTENT_TYPE;
use index_scheduler::IndexScheduler;
use meilisearch_auth::SearchRules;
use meilisearch_types::InstanceUid;
use once_cell::sync::Lazy;
use regex::Regex;
use segment::message::{Identify, Track, User};
use segment::{AutoBatcher, Batcher, HttpClient};
use serde::Serialize;
use serde_json::{json, Value};
use sysinfo::{DiskExt, System, SystemExt};
use time::OffsetDateTime;
use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
use uuid::Uuid;
use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
use crate::analytics::Analytics;
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, SchedulerConfig};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQueryRaw;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use crate::Opt;
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes());
if let Some((meilisearch_config_path, user_id_path)) =
MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path))
{
let _ = fs::create_dir_all(&meilisearch_config_path);
let _ = fs::write(user_id_path, user_id.to_string());
}
}
const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb";
pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
request
.headers()
.get(ANALYTICS_HEADER)
.or_else(|| request.headers().get(USER_AGENT))
.map(|header| header.to_str().ok())
.flatten()
.unwrap_or("unknown")
.split(';')
.map(str::trim)
.map(ToString::to_string)
.collect()
}
pub enum AnalyticsMsg {
BatchMessage(Track),
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
AggregateTasks(TasksAggregator),
AggregateHealth(HealthAggregator),
}
pub struct SegmentAnalytics {
instance_uid: InstanceUid,
sender: Sender<AnalyticsMsg>,
user: User,
}
impl SegmentAnalytics {
pub async fn new(opt: &Opt, index_scheduler: Arc<IndexScheduler>) -> Arc<dyn Analytics> {
let instance_uid = super::find_user_id(&opt.db_path);
let first_time_run = instance_uid.is_none();
let instance_uid = instance_uid.unwrap_or_else(|| Uuid::new_v4());
write_user_id(&opt.db_path, &instance_uid);
let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build();
// if reqwest throws an error we won't be able to send analytics
if client.is_err() {
return super::MockAnalytics::new(opt);
}
let client =
HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string());
let user = User::UserId { user_id: instance_uid.to_string() };
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
// If Meilisearch is Launched for the first time:
// 1. Send an event Launched associated to the user `total_launch`.
// 2. Batch an event Launched with the real instance-id and send it in one hour.
if first_time_run {
let _ = batcher
.push(Track {
user: User::UserId { user_id: "total_launch".to_string() },
event: "Launched".to_string(),
..Default::default()
})
.await;
let _ = batcher.flush().await;
let _ = batcher
.push(Track {
user: user.clone(),
event: "Launched".to_string(),
..Default::default()
})
.await;
}
let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize
let segment = Box::new(Segment {
inbox,
user: user.clone(),
opt: opt.clone(),
batcher,
post_search_aggregator: SearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
update_documents_aggregator: DocumentsAggregator::default(),
get_tasks_aggregator: TasksAggregator::default(),
health_aggregator: HealthAggregator::default(),
});
tokio::spawn(segment.run(index_scheduler.clone()));
let this = Self { instance_uid, sender, user: user.clone() };
Arc::new(this)
}
}
impl super::Analytics for SegmentAnalytics {
fn instance_uid(&self) -> Option<&InstanceUid> {
Some(&self.instance_uid)
}
fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) {
let user_agent = request.map(|req| extract_user_agents(req));
send["user-agent"] = json!(user_agent);
let event = Track {
user: self.user.clone(),
event: event_name.clone(),
properties: send,
..Default::default()
};
let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event.into()));
}
fn get_search(&self, aggregate: SearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate));
}
fn post_search(&self, aggregate: SearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn add_documents(
&self,
documents_query: &UpdateDocumentsQuery,
index_creation: bool,
request: &HttpRequest,
) {
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate));
}
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest) {
let aggregate = DocumentsDeletionAggregator::from_query(kind, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateDeleteDocuments(aggregate));
}
fn update_documents(
&self,
documents_query: &UpdateDocumentsQuery,
index_creation: bool,
request: &HttpRequest,
) {
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
}
fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest) {
let aggregate = TasksAggregator::from_query(query, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
}
fn health_seen(&self, request: &HttpRequest) {
let aggregate = HealthAggregator::from_query(request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateHealth(aggregate));
}
}
/// This structure represent the `infos` field we send in the analytics.
/// It's quite close to the `Opt` structure except all sensitive informations
/// have been simplified to a boolean.
/// It's send as-is in amplitude thus you should never update a name of the
/// struct without the approval of the PM.
#[derive(Debug, Clone, Serialize)]
struct Infos {
env: String,
db_path: bool,
import_dump: bool,
dump_dir: bool,
ignore_missing_dump: bool,
ignore_dump_if_db_exists: bool,
import_snapshot: bool,
schedule_snapshot: bool,
snapshot_dir: bool,
snapshot_interval_sec: u64,
ignore_missing_snapshot: bool,
ignore_snapshot_if_db_exists: bool,
http_addr: bool,
max_index_size: Byte,
max_task_db_size: Byte,
http_payload_size_limit: Byte,
disable_auto_batching: bool,
log_level: String,
max_indexing_memory: MaxMemory,
max_indexing_threads: MaxThreads,
with_configuration_file: bool,
ssl_auth_path: bool,
ssl_cert_path: bool,
ssl_key_path: bool,
ssl_ocsp_path: bool,
ssl_require_auth: bool,
ssl_resumption: bool,
ssl_tickets: bool,
}
impl From<Opt> for Infos {
fn from(options: Opt) -> Self {
// We wants to decompose this whole struct by hand to be sure we don't forget
// to add analytics when we add a field in the Opt.
// Thus we must not insert `..` at the end.
let Opt {
db_path,
http_addr,
master_key: _,
env,
max_index_size,
max_task_db_size,
http_payload_size_limit,
ssl_cert_path,
ssl_key_path,
ssl_auth_path,
ssl_ocsp_path,
ssl_require_auth,
ssl_resumption,
ssl_tickets,
import_snapshot,
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
snapshot_dir,
schedule_snapshot,
snapshot_interval_sec,
import_dump,
ignore_missing_dump,
ignore_dump_if_db_exists,
dump_dir,
log_level,
indexer_options,
scheduler_options,
config_file_path,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: _,
} = options;
let SchedulerConfig { disable_auto_batching } = scheduler_options;
let IndexerOpts {
log_every_n: _,
max_nb_chunks: _,
max_indexing_memory,
max_indexing_threads,
} = indexer_options;
// We're going to override every sensible information.
// We consider information sensible if it contains a path, an address, or a key.
Self {
env,
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"),
ignore_missing_dump,
ignore_dump_if_db_exists,
import_snapshot: import_snapshot.is_some(),
schedule_snapshot,
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
snapshot_interval_sec,
ignore_missing_snapshot,
ignore_snapshot_if_db_exists,
http_addr: http_addr != default_http_addr(),
max_index_size,
max_task_db_size,
http_payload_size_limit,
disable_auto_batching,
log_level,
max_indexing_memory,
max_indexing_threads,
with_configuration_file: config_file_path.is_some(),
ssl_auth_path: ssl_auth_path.is_some(),
ssl_cert_path: ssl_cert_path.is_some(),
ssl_key_path: ssl_key_path.is_some(),
ssl_ocsp_path: ssl_ocsp_path.is_some(),
ssl_require_auth,
ssl_resumption,
ssl_tickets,
}
}
}
pub struct Segment {
inbox: Receiver<AnalyticsMsg>,
user: User,
opt: Opt,
batcher: AutoBatcher,
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
get_tasks_aggregator: TasksAggregator,
health_aggregator: HealthAggregator,
}
impl Segment {
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
static SYSTEM: Lazy<Value> = Lazy::new(|| {
let mut sys = System::new_all();
sys.refresh_all();
let kernel_version = sys
.kernel_version()
.map(|k| k.split_once("-").map(|(k, _)| k.to_string()))
.flatten();
json!({
"distribution": sys.name(),
"kernel_version": kernel_version,
"cores": sys.cpus().len(),
"ram_size": sys.total_memory(),
"disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
})
});
let number_of_documents =
stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>();
json!({
"start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day
"system": *SYSTEM,
"stats": {
"database_size": stats.database_size,
"indexes_number": stats.indexes.len(),
"documents_number": number_of_documents,
},
"infos": Infos::from(opt.clone()),
})
}
async fn run(mut self, index_scheduler: Arc<IndexScheduler>) {
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
// The first batch must be sent after one hour.
let mut interval =
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
loop {
select! {
_ = interval.tick() => {
self.tick(index_scheduler.clone()).await;
},
msg = self.inbox.recv() => {
match msg {
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
None => (),
}
}
}
}
}
async fn tick(&mut self, index_scheduler: Arc<IndexScheduler>) {
if let Ok(stats) = create_all_stats(index_scheduler.into(), &SearchRules::default()) {
let _ = self
.batcher
.push(Identify {
context: Some(json!({
"app": {
"version": env!("CARGO_PKG_VERSION").to_string(),
},
})),
user: self.user.clone(),
traits: Self::compute_traits(&self.opt, stats),
..Default::default()
})
.await;
}
let get_search = std::mem::take(&mut self.get_search_aggregator)
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
.into_event(&self.user, "Documents Deleted");
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
.into_event(&self.user, "Documents Updated");
let get_tasks =
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
let health =
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
if let Some(get_search) = get_search {
let _ = self.batcher.push(get_search).await;
}
if let Some(post_search) = post_search {
let _ = self.batcher.push(post_search).await;
}
if let Some(add_documents) = add_documents {
let _ = self.batcher.push(add_documents).await;
}
if let Some(delete_documents) = delete_documents {
let _ = self.batcher.push(delete_documents).await;
}
if let Some(update_documents) = update_documents {
let _ = self.batcher.push(update_documents).await;
}
if let Some(get_tasks) = get_tasks {
let _ = self.batcher.push(get_tasks).await;
}
if let Some(health) = health {
let _ = self.batcher.push(health).await;
}
let _ = self.batcher.flush().await;
}
}
#[derive(Default)]
pub struct SearchAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// sort
sort_with_geo_point: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
sort_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize,
// filter
filter_with_geo_radius: bool,
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// every time a request has a filter, this field must be incremented by one
filter_total_number_of_criteria: usize,
used_syntax: HashMap<String, usize>,
// q
// The maximum number of terms in a q request
max_terms_number: usize,
// every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>,
// pagination
max_limit: usize,
max_offset: usize,
finite_pagination: usize,
// formatting
max_attributes_to_retrieve: usize,
max_attributes_to_highlight: usize,
highlight_pre_tag: bool,
highlight_post_tag: bool,
max_attributes_to_crop: usize,
crop_marker: bool,
show_matches_position: bool,
crop_length: bool,
// facets
facets_sum_of_terms: usize,
facets_total_number_of_facets: usize,
}
impl SearchAggregator {
pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
if let Some(ref sort) = query.sort {
ret.sort_total_number_of_criteria = 1;
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
ret.sort_sum_of_criteria_terms = sort.len();
}
if let Some(ref filter) = query.filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1;
let syntax = match filter {
Value::String(_) => "string".to_string(),
Value::Array(values) => {
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
"mixed".to_string()
} else {
"array".to_string()
}
}
_ => "none".to_string(),
};
// convert the string to a HashMap
ret.used_syntax.insert(syntax, 1);
let stringified_filters = filter.to_string();
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
}
if let Some(ref q) = query.q {
ret.max_terms_number = q.split_whitespace().count();
}
if query.is_finite_pagination() {
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
ret.max_limit = limit;
ret.max_offset = query.page.unwrap_or(1).saturating_sub(1) * limit;
ret.finite_pagination = 1;
} else {
ret.max_limit = query.limit;
ret.max_offset = query.offset;
ret.finite_pagination = 0;
}
ret.matching_strategy.insert(format!("{:?}", query.matching_strategy), 1);
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER();
ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH();
ret.show_matches_position = query.show_matches_position;
ret
}
pub fn succeed(&mut self, result: &SearchResult) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(result.processing_time_ms as usize);
}
/// Aggregate one [SearchAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// context
for user_agent in other.user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request
self.total_received = self.total_received.saturating_add(other.total_received);
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
self.time_spent.append(&mut other.time_spent);
// sort
self.sort_with_geo_point |= other.sort_with_geo_point;
self.sort_sum_of_criteria_terms =
self.sort_sum_of_criteria_terms.saturating_add(other.sort_sum_of_criteria_terms);
self.sort_total_number_of_criteria =
self.sort_total_number_of_criteria.saturating_add(other.sort_total_number_of_criteria);
// filter
self.filter_with_geo_radius |= other.filter_with_geo_radius;
self.filter_sum_of_criteria_terms =
self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms);
self.filter_total_number_of_criteria = self
.filter_total_number_of_criteria
.saturating_add(other.filter_total_number_of_criteria);
for (key, value) in other.used_syntax.into_iter() {
let used_syntax = self.used_syntax.entry(key).or_insert(0);
*used_syntax = used_syntax.saturating_add(value);
}
// q
self.max_terms_number = self.max_terms_number.max(other.max_terms_number);
// pagination
self.max_limit = self.max_limit.max(other.max_limit);
self.max_offset = self.max_offset.max(other.max_offset);
self.finite_pagination += other.finite_pagination;
// formatting
self.max_attributes_to_retrieve =
self.max_attributes_to_retrieve.max(other.max_attributes_to_retrieve);
self.max_attributes_to_highlight =
self.max_attributes_to_highlight.max(other.max_attributes_to_highlight);
self.highlight_pre_tag |= other.highlight_pre_tag;
self.highlight_post_tag |= other.highlight_post_tag;
self.max_attributes_to_crop = self.max_attributes_to_crop.max(other.max_attributes_to_crop);
self.crop_marker |= other.crop_marker;
self.show_matches_position |= other.show_matches_position;
self.crop_length |= other.crop_length;
// facets
self.facets_sum_of_terms =
self.facets_sum_of_terms.saturating_add(other.facets_sum_of_terms);
self.facets_total_number_of_facets =
self.facets_total_number_of_facets.saturating_add(other.facets_total_number_of_facets);
// matching strategy
for (key, value) in other.matching_strategy.into_iter() {
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
*matching_strategy = matching_strategy.saturating_add(value);
}
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
// the index of the 99th percentage of value
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
// we get all the values in a sorted manner
let time_spent = self.time_spent.into_sorted_vec();
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th as usize);
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"sort": {
"with_geoPoint": self.sort_with_geo_point,
"avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64),
},
"filter": {
"with_geoRadius": self.filter_with_geo_radius,
"avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64),
"most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},
"q": {
"max_terms_number": self.max_terms_number,
},
"pagination": {
"max_limit": self.max_limit,
"max_offset": self.max_offset,
"most_used_navigation": if self.finite_pagination > (self.total_received / 2) { "exhaustive" } else { "estimated" },
},
"formatting": {
"max_attributes_to_retrieve": self.max_attributes_to_retrieve,
"max_attributes_to_highlight": self.max_attributes_to_highlight,
"highlight_pre_tag": self.highlight_pre_tag,
"highlight_post_tag": self.highlight_post_tag,
"max_attributes_to_crop": self.max_attributes_to_crop,
"crop_marker": self.crop_marker,
"show_matches_position": self.show_matches_position,
"crop_length": self.crop_length,
},
"facets": {
"avg_facets_number": format!("{:.2}", self.facets_sum_of_terms as f64 / self.facets_total_number_of_facets as f64),
},
"matching_strategy": {
"most_used_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
}
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,
// set to true when at least one request was received
updated: bool,
// context
user_agents: HashSet<String>,
content_types: HashSet<String>,
primary_keys: HashSet<String>,
index_creation: bool,
}
impl DocumentsAggregator {
pub fn from_query(
documents_query: &UpdateDocumentsQuery,
index_creation: bool,
request: &HttpRequest,
) -> Self {
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.updated = true;
ret.user_agents = extract_user_agents(request).into_iter().collect();
if let Some(primary_key) = documents_query.primary_key.clone() {
ret.primary_keys.insert(primary_key);
}
let content_type = request
.headers()
.get(CONTENT_TYPE)
.and_then(|s| s.to_str().ok())
.unwrap_or("unknown")
.to_string();
ret.content_types.insert(content_type);
ret.index_creation = index_creation;
ret
}
/// Aggregate one [DocumentsAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
self.updated |= other.updated;
// we can't create a union because there is no `into_union` method
for user_agent in other.user_agents {
self.user_agents.insert(user_agent);
}
for primary_key in other.primary_keys {
self.primary_keys.insert(primary_key);
}
for content_type in other.content_types {
self.content_types.insert(content_type);
}
self.index_creation |= other.index_creation;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if !self.updated {
None
} else {
let properties = json!({
"user-agent": self.user_agents,
"payload_type": self.content_types,
"primary_key": self.primary_keys,
"index_creation": self.index_creation,
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default, Serialize)]
pub struct DocumentsDeletionAggregator {
#[serde(skip)]
timestamp: Option<OffsetDateTime>,
// context
#[serde(rename = "user-agent")]
user_agents: HashSet<String>,
total_received: usize,
per_document_id: bool,
clear_all: bool,
per_batch: bool,
}
impl DocumentsDeletionAggregator {
pub fn from_query(kind: DocumentDeletionKind, request: &HttpRequest) -> Self {
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.user_agents = extract_user_agents(request).into_iter().collect();
ret.total_received = 1;
match kind {
DocumentDeletionKind::PerDocumentId => ret.per_document_id = true,
DocumentDeletionKind::ClearAll => ret.clear_all = true,
DocumentDeletionKind::PerBatch => ret.per_batch = true,
}
ret
}
/// Aggregate one [DocumentsAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// we can't create a union because there is no `into_union` method
for user_agent in other.user_agents {
self.user_agents.insert(user_agent);
}
self.total_received = self.total_received.saturating_add(other.total_received);
self.per_document_id |= other.per_document_id;
self.clear_all |= other.clear_all;
self.per_batch |= other.per_batch;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
// if we had no timestamp it means we never encountered any events and
// thus we don't need to send this event.
let timestamp = self.timestamp?;
Some(Track {
timestamp: Some(timestamp),
user: user.clone(),
event: event_name.to_string(),
properties: serde_json::to_value(self).ok()?,
..Default::default()
})
}
}
#[derive(Default, Serialize)]
pub struct TasksAggregator {
#[serde(skip)]
timestamp: Option<OffsetDateTime>,
// context
#[serde(rename = "user-agent")]
user_agents: HashSet<String>,
filtered_by_uid: bool,
filtered_by_index_uid: bool,
filtered_by_type: bool,
filtered_by_status: bool,
filtered_by_canceled_by: bool,
filtered_by_before_enqueued_at: bool,
filtered_by_after_enqueued_at: bool,
filtered_by_before_started_at: bool,
filtered_by_after_started_at: bool,
filtered_by_before_finished_at: bool,
filtered_by_after_finished_at: bool,
total_received: usize,
}
impl TasksAggregator {
pub fn from_query(query: &TasksFilterQueryRaw, request: &HttpRequest) -> Self {
Self {
timestamp: Some(OffsetDateTime::now_utc()),
user_agents: extract_user_agents(request).into_iter().collect(),
filtered_by_uid: query.common.uids.is_some(),
filtered_by_index_uid: query.common.index_uids.is_some(),
filtered_by_type: query.common.types.is_some(),
filtered_by_status: query.common.statuses.is_some(),
filtered_by_canceled_by: query.common.canceled_by.is_some(),
filtered_by_before_enqueued_at: query.dates.before_enqueued_at.is_some(),
filtered_by_after_enqueued_at: query.dates.after_enqueued_at.is_some(),
filtered_by_before_started_at: query.dates.before_started_at.is_some(),
filtered_by_after_started_at: query.dates.after_started_at.is_some(),
filtered_by_before_finished_at: query.dates.before_finished_at.is_some(),
filtered_by_after_finished_at: query.dates.after_finished_at.is_some(),
total_received: 1,
}
}
/// Aggregate one [DocumentsAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// we can't create a union because there is no `into_union` method
for user_agent in other.user_agents {
self.user_agents.insert(user_agent);
}
self.filtered_by_uid |= other.filtered_by_uid;
self.filtered_by_index_uid |= other.filtered_by_index_uid;
self.filtered_by_type |= other.filtered_by_type;
self.filtered_by_status |= other.filtered_by_status;
self.filtered_by_canceled_by |= other.filtered_by_canceled_by;
self.filtered_by_before_enqueued_at |= other.filtered_by_before_enqueued_at;
self.filtered_by_after_enqueued_at |= other.filtered_by_after_enqueued_at;
self.filtered_by_before_started_at |= other.filtered_by_before_started_at;
self.filtered_by_after_started_at |= other.filtered_by_after_started_at;
self.filtered_by_before_finished_at |= other.filtered_by_before_finished_at;
self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at;
self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at;
self.total_received = self.total_received.saturating_add(other.total_received);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
// if we had no timestamp it means we never encountered any events and
// thus we don't need to send this event.
let timestamp = self.timestamp?;
Some(Track {
timestamp: Some(timestamp),
user: user.clone(),
event: event_name.to_string(),
properties: serde_json::to_value(self).ok()?,
..Default::default()
})
}
}
#[derive(Default, Serialize)]
pub struct HealthAggregator {
#[serde(skip)]
timestamp: Option<OffsetDateTime>,
// context
#[serde(rename = "user-agent")]
user_agents: HashSet<String>,
total_received: usize,
}
impl HealthAggregator {
pub fn from_query(request: &HttpRequest) -> Self {
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.user_agents = extract_user_agents(request).into_iter().collect();
ret.total_received = 1;
ret
}
/// Aggregate one [DocumentsAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// we can't create a union because there is no `into_union` method
for user_agent in other.user_agents {
self.user_agents.insert(user_agent);
}
self.total_received = self.total_received.saturating_add(other.total_received);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
// if we had no timestamp it means we never encountered any events and
// thus we don't need to send this event.
let timestamp = self.timestamp?;
Some(Track {
timestamp: Some(timestamp),
user: user.clone(),
event: event_name.to_string(),
properties: serde_json::to_value(self).ok()?,
..Default::default()
})
}
}

163
meilisearch/src/error.rs Normal file
View file

@ -0,0 +1,163 @@
use actix_web as aweb;
use aweb::error::{JsonPayloadError, QueryPayloadError};
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::IndexUidFormatError;
use serde_json::Value;
use tokio::task::JoinError;
#[derive(Debug, thiserror::Error)]
pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
InvalidContentType(String, Vec<String>),
#[error("Document `{0}` not found.")]
DocumentNotFound(String),
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
#[error("A {0} payload is missing.")]
MissingPayload(PayloadType),
#[error("The provided payload reached the size limit.")]
PayloadTooLarge,
#[error("Two indexes must be given for each swap. The list `{:?}` contains {} indexes.",
.0, .0.len()
)]
SwapIndexPayloadWrongLength(Vec<String>),
#[error(transparent)]
IndexUid(#[from] IndexUidFormatError),
#[error(transparent)]
SerdeJson(#[from] serde_json::Error),
#[error(transparent)]
HeedError(#[from] meilisearch_types::heed::Error),
#[error(transparent)]
IndexScheduler(#[from] index_scheduler::Error),
#[error(transparent)]
Milli(#[from] meilisearch_types::milli::Error),
#[error(transparent)]
Payload(#[from] PayloadError),
#[error(transparent)]
FileStore(#[from] file_store::Error),
#[error(transparent)]
DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)]
Join(#[from] JoinError),
}
impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
MeilisearchHttpError::InvalidExpression(_, _) => Code::Filter,
MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::BadRequest,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
MeilisearchHttpError::Milli(e) => e.error_code(),
MeilisearchHttpError::Payload(e) => e.error_code(),
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
MeilisearchHttpError::Join(_) => Code::Internal,
}
}
}
impl From<MeilisearchHttpError> for aweb::Error {
fn from(other: MeilisearchHttpError) -> Self {
aweb::Error::from(ResponseError::from(other))
}
}
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
fn from(error: aweb::error::PayloadError) -> Self {
MeilisearchHttpError::Payload(PayloadError::Payload(error))
}
}
#[derive(Debug, thiserror::Error)]
pub enum PayloadError {
#[error(transparent)]
Payload(aweb::error::PayloadError),
#[error(transparent)]
Json(JsonPayloadError),
#[error(transparent)]
Query(QueryPayloadError),
#[error("The json payload provided is malformed. `{0}`.")]
MalformedPayload(serde_json::error::Error),
#[error("A json payload is missing.")]
MissingPayload,
}
impl ErrorCode for PayloadError {
fn error_code(&self) -> Code {
match self {
PayloadError::Payload(e) => match e {
aweb::error::PayloadError::Incomplete(_) => Code::Internal,
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
aweb::error::PayloadError::UnknownLength => Code::Internal,
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
aweb::error::PayloadError::Io(_) => Code::Internal,
_ => todo!(),
},
PayloadError::Json(err) => match err {
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
JsonPayloadError::ContentType => Code::UnsupportedMediaType,
JsonPayloadError::Payload(aweb::error::PayloadError::Overflow) => {
Code::PayloadTooLarge
}
JsonPayloadError::Payload(_) => Code::BadRequest,
JsonPayloadError::Deserialize(_) => Code::BadRequest,
JsonPayloadError::Serialize(_) => Code::Internal,
_ => Code::Internal,
},
PayloadError::Query(err) => match err {
QueryPayloadError::Deserialize(_) => Code::BadRequest,
_ => Code::Internal,
},
PayloadError::MissingPayload => Code::MissingPayload,
PayloadError::MalformedPayload(_) => Code::MalformedPayload,
}
}
}
impl From<JsonPayloadError> for PayloadError {
fn from(other: JsonPayloadError) -> Self {
match other {
JsonPayloadError::Deserialize(e)
if e.classify() == serde_json::error::Category::Eof
&& e.line() == 1
&& e.column() == 0 =>
{
Self::MissingPayload
}
JsonPayloadError::Deserialize(e)
if e.classify() != serde_json::error::Category::Data =>
{
Self::MalformedPayload(e)
}
_ => Self::Json(other),
}
}
}
impl From<QueryPayloadError> for PayloadError {
fn from(other: QueryPayloadError) -> Self {
Self::Query(other)
}
}
impl From<PayloadError> for aweb::Error {
fn from(other: PayloadError) -> Self {
aweb::Error::from(ResponseError::from(other))
}
}

View file

@ -0,0 +1,25 @@
use meilisearch_types::error::{Code, ErrorCode};
#[derive(Debug, thiserror::Error)]
pub enum AuthenticationError {
#[error("The Authorization header is missing. It must use the bearer authorization method.")]
MissingAuthorizationHeader,
#[error("The provided API key is invalid.")]
InvalidToken,
// Triggered on configuration error.
#[error("An internal error has occurred. `Irretrievable state`.")]
IrretrievableState,
#[error("Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.")]
MissingMasterKey,
}
impl ErrorCode for AuthenticationError {
fn error_code(&self) -> Code {
match self {
AuthenticationError::MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
AuthenticationError::InvalidToken => Code::InvalidToken,
AuthenticationError::IrretrievableState => Code::Internal,
AuthenticationError::MissingMasterKey => Code::MissingMasterKey,
}
}
}

View file

@ -0,0 +1,247 @@
mod error;
use std::marker::PhantomData;
use std::ops::Deref;
use std::pin::Pin;
use actix_web::FromRequest;
pub use error::AuthenticationError;
use futures::future::err;
use futures::Future;
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::error::{Code, ResponseError};
pub struct GuardedData<P, D> {
data: D,
filters: AuthFilter,
_marker: PhantomData<P>,
}
impl<P, D> GuardedData<P, D> {
pub fn filters(&self) -> &AuthFilter {
&self.filters
}
async fn auth_bearer(
auth: AuthController,
token: String,
index: Option<String>,
data: Option<D>,
) -> Result<Self, ResponseError>
where
P: Policy + 'static,
{
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, token, index).await? {
Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
None => Err(AuthenticationError::InvalidToken.into()),
}
}
async fn auth_token(auth: AuthController, data: Option<D>) -> Result<Self, ResponseError>
where
P: Policy + 'static,
{
let missing_master_key = auth.get_master_key().is_none();
match Self::authenticate(auth, String::new(), None).await? {
Some(filters) => match data {
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
None => Err(AuthenticationError::IrretrievableState.into()),
},
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
}
}
async fn authenticate(
auth: AuthController,
token: String,
index: Option<String>,
) -> Result<Option<AuthFilter>, ResponseError>
where
P: Policy + 'static,
{
tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref()))
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))
}
}
impl<P, D> Deref for GuardedData<P, D> {
type Target = D;
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D> {
type Error = ResponseError;
type Future = Pin<Box<dyn Future<Output = Result<Self, Self::Error>>>>;
fn from_request(
req: &actix_web::HttpRequest,
_payload: &mut actix_web::dev::Payload,
) -> Self::Future {
match req.app_data::<AuthController>().cloned() {
Some(auth) => match req
.headers()
.get("Authorization")
.map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' '))
{
Some(mut type_token) => match type_token.next() {
Some("Bearer") => {
// TODO: find a less hardcoded way?
let index = req.match_info().get("index_uid");
match type_token.next() {
Some(token) => Box::pin(Self::auth_bearer(
auth,
token.to_string(),
index.map(String::from),
req.app_data::<D>().cloned(),
)),
None => Box::pin(err(AuthenticationError::InvalidToken.into())),
}
}
_otherwise => {
Box::pin(err(AuthenticationError::MissingAuthorizationHeader.into()))
}
},
None => Box::pin(Self::auth_token(auth, req.app_data::<D>().cloned())),
},
None => Box::pin(err(AuthenticationError::IrretrievableState.into())),
}
}
}
pub trait Policy {
fn authenticate(auth: AuthController, token: &str, index: Option<&str>) -> Option<AuthFilter>;
}
pub mod policies {
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
// reexport actions in policies in order to be used in routes configuration.
pub use meilisearch_types::keys::{actions, Action};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use uuid::Uuid;
use crate::extractors::authentication::Policy;
fn tenant_token_validation() -> Validation {
let mut validation = Validation::default();
validation.validate_exp = false;
validation.required_spec_claims.remove("exp");
validation.algorithms = vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512];
validation
}
/// Extracts the key id used to sign the payload, without performing any validation.
fn extract_key_id(token: &str) -> Option<Uuid> {
let mut validation = tenant_token_validation();
validation.insecure_disable_signature_validation();
let dummy_key = DecodingKey::from_secret(b"secret");
let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
// get token fields without validating it.
let Claims { api_key_uid, .. } = token_data.claims;
Some(api_key_uid)
}
fn is_keys_action(action: u8) -> bool {
use actions::*;
matches!(action, KEYS_GET | KEYS_CREATE | KEYS_UPDATE | KEYS_DELETE)
}
pub struct ActionPolicy<const A: u8>;
impl<const A: u8> Policy for ActionPolicy<A> {
fn authenticate(
auth: AuthController,
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// authenticate if token is the master key.
// master key can only have access to keys routes.
// if master key is None only keys routes are inaccessible.
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
return Some(AuthFilter::default());
}
// Tenant token
if let Some(filters) = ActionPolicy::<A>::authenticate_tenant_token(&auth, token, index)
{
return Some(filters);
} else if let Some(action) = Action::from_repr(A) {
// API key
if let Ok(Some(uid)) = auth.get_optional_uid_from_encoded_key(token.as_bytes()) {
if let Ok(true) = auth.is_key_authorized(uid, action, index) {
return auth.get_key_filters(uid, None).ok();
}
}
}
None
}
}
impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token(
auth: &AuthController,
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return None;
}
let uid = extract_key_id(token)?;
// check if parent key is authorized to do the action.
if auth.is_key_authorized(uid, Action::Search, index).ok()? {
// Check if tenant token is valid.
let key = auth.generate_key(uid)?;
let data = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
)
.ok()?;
// Check index access if an index restriction is provided.
if let Some(index) = index {
if !data.claims.search_rules.is_index_authorized(index) {
return None;
}
}
// Check if token is expired.
if let Some(exp) = data.claims.exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return None;
}
}
return auth.get_key_filters(uid, Some(data.claims.search_rules)).ok();
}
None
}
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct Claims {
search_rules: SearchRules,
exp: Option<i64>,
api_key_uid: Uuid,
}
}

View file

@ -0,0 +1,4 @@
pub mod payload;
#[macro_use]
pub mod authentication;
pub mod sequential_extractor;

View file

@ -0,0 +1,69 @@
use std::pin::Pin;
use std::task::{Context, Poll};
use actix_http::encoding::Decoder as Decompress;
use actix_web::{dev, web, FromRequest, HttpRequest};
use futures::future::{ready, Ready};
use futures::Stream;
use crate::error::MeilisearchHttpError;
pub struct Payload {
payload: Decompress<dev::Payload>,
limit: usize,
}
pub struct PayloadConfig {
limit: usize,
}
impl PayloadConfig {
pub fn new(limit: usize) -> Self {
Self { limit }
}
}
impl Default for PayloadConfig {
fn default() -> Self {
Self { limit: 256 * 1024 }
}
}
impl FromRequest for Payload {
type Error = MeilisearchHttpError;
type Future = Ready<Result<Payload, Self::Error>>;
#[inline]
fn from_request(req: &HttpRequest, payload: &mut dev::Payload) -> Self::Future {
let limit = req
.app_data::<PayloadConfig>()
.map(|c| c.limit)
.unwrap_or(PayloadConfig::default().limit);
ready(Ok(Payload {
payload: Decompress::from_headers(payload.take(), req.headers()),
limit,
}))
}
}
impl Stream for Payload {
type Item = Result<web::Bytes, MeilisearchHttpError>;
#[inline]
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.payload).poll_next(cx) {
Poll::Ready(Some(result)) => match result {
Ok(bytes) => match self.limit.checked_sub(bytes.len()) {
Some(new_limit) => {
self.limit = new_limit;
Poll::Ready(Some(Ok(bytes)))
}
None => Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge))),
},
x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))),
},
otherwise => otherwise.map(|o| o.map(|o| o.map_err(MeilisearchHttpError::from))),
}
}
}

View file

@ -0,0 +1,151 @@
#![allow(non_snake_case)]
use std::future::Future;
use std::pin::Pin;
use std::task::Poll;
use actix_web::dev::Payload;
use actix_web::{FromRequest, Handler, HttpRequest};
use pin_project_lite::pin_project;
/// `SeqHandler` is an actix `Handler` that enforces that extractors errors are returned in the
/// same order as they are defined in the wrapped handler. This is needed because, by default, actix
/// resolves the extractors concurrently, whereas we always need the authentication extractor to
/// throw first.
#[derive(Clone)]
pub struct SeqHandler<H>(pub H);
pub struct SeqFromRequest<T>(T);
/// This macro implements `FromRequest` for arbitrary arity handler, except for one, which is
/// useless anyway.
macro_rules! gen_seq {
($ty:ident; $($T:ident)+) => {
pin_project! {
pub struct $ty<$($T: FromRequest), +> {
$(
#[pin]
$T: ExtractFuture<$T::Future, $T, $T::Error>,
)+
}
}
impl<$($T: FromRequest), +> Future for $ty<$($T),+> {
type Output = Result<SeqFromRequest<($($T),+)>, actix_web::Error>;
fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> {
let mut this = self.project();
let mut count_fut = 0;
let mut count_finished = 0;
$(
count_fut += 1;
match this.$T.as_mut().project() {
ExtractProj::Future { fut } => match fut.poll(cx) {
Poll::Ready(Ok(output)) => {
count_finished += 1;
let _ = this
.$T
.as_mut()
.project_replace(ExtractFuture::Done { output });
}
Poll::Ready(Err(error)) => {
count_finished += 1;
let _ = this
.$T
.as_mut()
.project_replace(ExtractFuture::Error { error });
}
Poll::Pending => (),
},
ExtractProj::Done { .. } => count_finished += 1,
ExtractProj::Error { .. } => {
// short circuit if all previous are finished and we had an error.
if count_finished == count_fut {
match this.$T.project_replace(ExtractFuture::Empty) {
ExtractReplaceProj::Error { error } => {
return Poll::Ready(Err(error.into()))
}
_ => unreachable!("Invalid future state"),
}
} else {
count_finished += 1;
}
}
ExtractProj::Empty => unreachable!("From request polled after being finished. {}", stringify!($T)),
}
)+
if count_fut == count_finished {
let result = (
$(
match this.$T.project_replace(ExtractFuture::Empty) {
ExtractReplaceProj::Done { output } => output,
ExtractReplaceProj::Error { error } => return Poll::Ready(Err(error.into())),
_ => unreachable!("Invalid future state"),
},
)+
);
Poll::Ready(Ok(SeqFromRequest(result)))
} else {
Poll::Pending
}
}
}
impl<$($T: FromRequest,)+> FromRequest for SeqFromRequest<($($T,)+)> {
type Error = actix_web::Error;
type Future = $ty<$($T),+>;
fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future {
$ty {
$(
$T: ExtractFuture::Future {
fut: $T::from_request(req, payload),
},
)+
}
}
}
impl<Han, $($T: FromRequest),+> Handler<SeqFromRequest<($($T),+)>> for SeqHandler<Han>
where
Han: Handler<($($T),+)>,
{
type Output = Han::Output;
type Future = Han::Future;
fn call(&self, args: SeqFromRequest<($($T),+)>) -> Self::Future {
self.0.call(args.0)
}
}
};
}
// Not working for a single argument, but then, it is not really necessary.
// gen_seq! { SeqFromRequestFut1; A }
gen_seq! { SeqFromRequestFut2; A B }
gen_seq! { SeqFromRequestFut3; A B C }
gen_seq! { SeqFromRequestFut4; A B C D }
gen_seq! { SeqFromRequestFut5; A B C D E }
gen_seq! { SeqFromRequestFut6; A B C D E F }
pin_project! {
#[project = ExtractProj]
#[project_replace = ExtractReplaceProj]
enum ExtractFuture<Fut, Res, Err> {
Future {
#[pin]
fut: Fut,
},
Done {
output: Res,
},
Error {
error: Err,
},
Empty,
}
}

417
meilisearch/src/lib.rs Normal file
View file

@ -0,0 +1,417 @@
#![allow(rustdoc::private_intra_doc_links)]
#[macro_use]
pub mod error;
pub mod analytics;
#[macro_use]
pub mod extractors;
pub mod option;
pub mod routes;
pub mod search;
#[cfg(feature = "metrics")]
pub mod metrics;
#[cfg(feature = "metrics")]
pub mod route_metrics;
use std::fs::File;
use std::io::{BufReader, BufWriter};
use std::path::Path;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use actix_cors::Cors;
use actix_http::body::MessageBody;
use actix_web::dev::{ServiceFactory, ServiceResponse};
use actix_web::error::JsonPayloadError;
use actix_web::web::Data;
use actix_web::{middleware, web, HttpRequest};
use analytics::Analytics;
use anyhow::bail;
use error::PayloadError;
use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use crate::error::MeilisearchHttpError;
pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
/// Check if a db is empty. It does not provide any information on the
/// validity of the data in it.
/// We consider a database as non empty when it's a non empty directory.
fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
let db_path = db_path.as_ref();
if !db_path.exists() {
true
// if we encounter an error or if the db is a file we consider the db non empty
} else if let Ok(dir) = db_path.read_dir() {
dir.count() == 0
} else {
true
}
}
pub fn create_app(
index_scheduler: Data<IndexScheduler>,
auth_controller: AuthController,
opt: Opt,
analytics: Arc<dyn Analytics>,
enable_dashboard: bool,
) -> actix_web::App<
impl ServiceFactory<
actix_web::dev::ServiceRequest,
Config = (),
Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error,
InitError = (),
>,
> {
let app = actix_web::App::new()
.configure(|s| {
configure_data(
s,
index_scheduler.clone(),
auth_controller.clone(),
&opt,
analytics.clone(),
)
})
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
#[cfg(feature = "metrics")]
let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route));
#[cfg(feature = "metrics")]
let app = app.wrap(Condition::new(opt.enable_metrics_route, route_metrics::RouteMetrics));
app.wrap(
Cors::default()
.send_wildcard()
.allow_any_header()
.allow_any_origin()
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim))
}
// TODO: TAMO: Finish setting up things
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder = || {
IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: !opt.scheduler_options.disable_auto_batching,
})
};
enum OnFailure {
RemoveDb,
KeepDb,
}
let meilisearch_builder = |on_failure: OnFailure| -> anyhow::Result<_> {
// if anything wrong happens we delete the `data.ms` entirely.
match (
index_scheduler_builder().map_err(anyhow::Error::from),
auth_controller_builder().map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
if matches!(on_failure, OnFailure::RemoveDb) {
std::fs::remove_dir_all(&opt.db_path)?;
}
Err(e)
}
}
};
let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists();
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => meilisearch_builder(OnFailure::RemoveDb)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
} else if !empty_db && !opt.ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
)
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
bail!("snapshot doesn't exist at {}", snapshot_path.display())
} else {
meilisearch_builder(OnFailure::RemoveDb)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) =
meilisearch_builder(OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
} else if !empty_db && !opt.ignore_dump_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
)
} else if !src_path_exists && !opt.ignore_missing_dump {
bail!("dump doesn't exist at {:?}", path)
} else {
let (mut index_scheduler, mut auth_controller) =
meilisearch_builder(OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
}
} else {
if !empty_db {
check_version_file(&opt.db_path)?;
}
meilisearch_builder(OnFailure::KeepDb)?
};
// We create a loop in a thread that registers snapshotCreation tasks
let index_scheduler = Arc::new(index_scheduler);
if opt.schedule_snapshot {
let snapshot_delay = Duration::from_secs(opt.snapshot_interval_sec);
let index_scheduler = index_scheduler.clone();
thread::Builder::new()
.name(String::from("register-snapshot-tasks"))
.spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
error!("Error while registering snapshot: {}", e);
}
})
.unwrap();
}
Ok((index_scheduler, auth_controller))
}
fn import_dump(
db_path: &Path,
dump_path: &Path,
index_scheduler: &mut IndexScheduler,
auth: &mut AuthController,
) -> Result<(), anyhow::Error> {
let reader = File::open(dump_path)?;
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {
log::info!(
"Importing a dump of meilisearch `{:?}` from the {}",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
date
);
} else {
log::info!(
"Importing a dump of meilisearch `{:?}`",
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
);
}
let instance_uid = dump_reader.instance_uid()?;
// 1. Import the instance-uid.
if let Some(ref instance_uid) = instance_uid {
// we don't want to panic if there is an error with the instance-uid.
let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes());
};
// 2. Import the `Key`s.
let mut keys = Vec::new();
auth.raw_delete_all_keys()?;
for key in dump_reader.keys()? {
let key = key?;
auth.raw_insert_key(key.clone())?;
keys.push(key);
}
let indexer_config = index_scheduler.indexer_config();
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
// try to process tasks while we're trying to import the indexes.
// 3. Import the indexes.
for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
log::info!("Importing index `{}`.", metadata.uid);
let index = index_scheduler.create_raw_index(&metadata.uid)?;
let mut wtxn = index.write_txn()?;
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
// 3.1 Import the primary key if there is one.
if let Some(ref primary_key) = metadata.primary_key {
builder.set_primary_key(primary_key.to_string());
}
// 3.2 Import the settings.
log::info!("Importing the settings.");
let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder);
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
// 3.3 Import the documents.
// 3.3.1 We need to recreate the grenad+obkv format accepted by the index.
log::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 3.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| log::debug!("update: {:?}", indexing_step),
|| false,
)?;
let (builder, user_result) = builder.add_documents(reader)?;
log::info!("{} documents found.", user_result?);
builder.execute()?;
wtxn.commit()?;
log::info!("All documents successfully imported.");
}
// 4. Import the tasks.
for ret in dump_reader.tasks()? {
let (task, file) = ret?;
index_scheduler.register_dumped_task(task, file)?;
}
Ok(())
}
pub fn configure_data(
config: &mut web::ServiceConfig,
index_scheduler: Data<IndexScheduler>,
auth: AuthController,
opt: &Opt,
analytics: Arc<dyn Analytics>,
) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(web::Data::from(analytics))
.app_data(
web::JsonConfig::default()
.content_type(|mime| mime == mime::APPLICATION_JSON)
.error_handler(|err, req: &HttpRequest| match err {
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
Some(content_type) => MeilisearchHttpError::InvalidContentType(
content_type.to_str().unwrap_or("unknown").to_string(),
vec![mime::APPLICATION_JSON.to_string()],
)
.into(),
None => MeilisearchHttpError::MissingContentType(vec![
mime::APPLICATION_JSON.to_string(),
])
.into(),
},
err => PayloadError::from(err).into(),
}),
)
.app_data(PayloadConfig::new(http_payload_size_limit))
.app_data(
web::QueryConfig::default().error_handler(|err, _req| PayloadError::from(err).into()),
);
}
#[cfg(feature = "mini-dashboard")]
pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
use actix_web::HttpResponse;
use static_files::Resource;
mod generated {
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
}
if enable_frontend {
let generated = generated::generate();
// Generate routes for mini-dashboard assets
for (path, resource) in generated.into_iter() {
let Resource { mime_type, data, .. } = resource;
// Redirect index.html to /
if path == "index.html" {
config.service(web::resource("/").route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
} else {
config.service(web::resource(path).route(web::get().to(move || async move {
HttpResponse::Ok().content_type(mime_type).body(data)
})));
}
}
} else {
config.service(web::resource("/").route(web::get().to(routes::running)));
}
}
#[cfg(not(feature = "mini-dashboard"))]
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
config.service(web::resource("/").route(web::get().to(routes::running)));
}
#[cfg(feature = "metrics")]
pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_route: bool) {
if enable_metrics_route {
config.service(
web::resource("/metrics").route(web::get().to(crate::route_metrics::get_metrics)),
);
}
}

166
meilisearch/src/main.rs Normal file
View file

@ -0,0 +1,166 @@
use std::env;
use std::path::PathBuf;
use std::sync::Arc;
use actix_web::http::KeepAlive;
use actix_web::web::Data;
use actix_web::HttpServer;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch::analytics::Analytics;
use meilisearch::{analytics, create_app, setup_meilisearch, Opt};
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
/// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<()> {
let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters(&opt.log_level);
if opt.log_level == "info" {
// if we are in info we only allow the warn log_level for milli
log_builder.filter_module("milli", log::LevelFilter::Warn);
}
log_builder.init();
Ok(())
}
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
setup(&opt)?;
match opt.env.as_ref() {
"production" => {
if opt.master_key.is_none() {
anyhow::bail!(
"In production mode, the environment variable MEILI_MASTER_KEY is mandatory"
)
}
}
"development" => (),
_ => unreachable!(),
}
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
let analytics = if !opt.no_analytics {
analytics::SegmentAnalytics::new(&opt, index_scheduler.clone()).await
} else {
analytics::MockAnalytics::new(&opt)
};
#[cfg(any(debug_assertions, not(feature = "analytics")))]
let analytics = analytics::MockAnalytics::new(&opt);
print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, analytics).await?;
Ok(())
}
async fn run_http(
index_scheduler: Arc<IndexScheduler>,
auth_controller: AuthController,
opt: Opt,
analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone();
let index_scheduler = Data::from(index_scheduler);
let http_server = HttpServer::new(move || {
create_app(
index_scheduler.clone(),
auth_controller.clone(),
opt.clone(),
analytics.clone(),
enable_dashboard,
)
})
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals()
.keep_alive(KeepAlive::Os);
if let Some(config) = opt_clone.get_ssl_config()? {
http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?;
} else {
http_server.bind(&opt_clone.http_addr)?.run().await?;
}
Ok(())
}
pub fn print_launch_resume(
opt: &Opt,
analytics: Arc<dyn Analytics>,
config_read_from: Option<PathBuf>,
) {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
let protocol =
if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" };
let ascii_name = r#"
888b d888 d8b 888 d8b 888
8888b d8888 Y8P 888 Y8P 888
88888b.d88888 888 888
888Y88888P888 .d88b. 888 888 888 .d8888b .d88b. 8888b. 888d888 .d8888b 88888b.
888 Y888P 888 d8P Y8b 888 888 888 88K d8P Y8b "88b 888P" d88P" 888 "88b
888 Y8P 888 88888888 888 888 888 "Y8888b. 88888888 .d888888 888 888 888 888
888 " 888 Y8b. 888 888 888 X88 Y8b. 888 888 888 Y88b. 888 888
888 888 "Y8888 888 888 888 88888P' "Y8888 "Y888888 888 "Y8888P 888 888
"#;
eprintln!("{}", ascii_name);
eprintln!(
"Config file path:\t{:?}",
config_read_from
.map(|config_file_path| config_file_path.display().to_string())
.unwrap_or_else(|| "none".to_string())
);
eprintln!("Database path:\t\t{:?}", opt.db_path);
eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
eprintln!("Environment:\t\t{:?}", opt.env);
eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string());
eprintln!("Commit date:\t\t{:?}", commit_date.to_string());
eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
if !opt.no_analytics {
eprintln!(
"
Thank you for using Meilisearch!
We collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html
Anonymous telemetry:\t\"Enabled\""
);
} else {
eprintln!("Anonymous telemetry:\t\"Disabled\"");
}
}
if let Some(instance_uid) = analytics.instance_uid() {
eprintln!("Instance UID:\t\t\"{}\"", instance_uid);
}
eprintln!();
if opt.master_key.is_some() {
eprintln!("A Master Key has been set. Requests to Meilisearch won't be authorized unless you provide an authentication key.");
} else {
eprintln!("No master key found; The server will accept unidentified requests. \
If you need some protection in development mode, please export a key: export MEILI_MASTER_KEY=xxx");
}
eprintln!();
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html");
eprintln!();
}

View file

@ -0,0 +1,36 @@
use lazy_static::lazy_static;
use prometheus::{
opts, register_histogram_vec, register_int_counter_vec, register_int_gauge,
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
};
const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[
0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015,
0.002, 0.003, 1.0,
];
lazy_static! {
pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!("http_requests_total", "HTTP requests total"),
&["method", "path"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes"))
.expect("Can't create a metric");
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
.expect("Can't create a metric");
pub static ref MEILISEARCH_INDEX_DOCS_COUNT: IntGaugeVec = register_int_gauge_vec!(
opts!("meilisearch_index_docs_count", "Meilisearch Index Docs Count"),
&["index"]
)
.expect("Can't create a metric");
pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
"http_response_time_seconds",
"HTTP response times",
&["method", "path"],
HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
)
.expect("Can't create a metric");
}

761
meilisearch/src/option.rs Normal file
View file

@ -0,0 +1,761 @@
use std::convert::TryFrom;
use std::env::VarError;
use std::ffi::OsStr;
use std::io::{BufReader, Read};
use std::num::ParseIntError;
use std::ops::Deref;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::{env, fmt, fs};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use meilisearch_types::milli::update::IndexerConfig;
use rustls::server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
};
use rustls::RootCertStore;
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::{Deserialize, Serialize};
use sysinfo::{RefreshKind, System, SystemExt};
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV";
#[cfg(all(not(debug_assertions), feature = "analytics"))]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE";
const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH";
const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH";
const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH";
const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH";
const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION";
const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS";
const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT";
const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT";
const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS";
const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR";
const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT";
const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC";
const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP";
const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
#[cfg(feature = "metrics")]
const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
const DEFAULT_ENV: &str = "development";
const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB";
const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB";
const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
const DEFAULT_DUMP_DIR: &str = "dumps/";
const DEFAULT_LOG_LEVEL: &str = "INFO";
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
const DEFAULT_LOG_EVERY_N: usize = 100000;
#[derive(Debug, Clone, Parser, Deserialize)]
#[clap(version, next_display_order = None)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct Opt {
/// Designates the location where database files will be created and retrieved.
#[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())]
#[serde(default = "default_db_path")]
pub db_path: PathBuf,
/// Sets the HTTP address and port Meilisearch will use.
#[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())]
#[serde(default = "default_http_addr")]
pub http_addr: String,
/// Sets the instance's master key, automatically protecting all routes except `GET /health`.
#[clap(long, env = MEILI_MASTER_KEY)]
pub master_key: Option<String>,
/// Configures the instance's environment. Value must be either `production` or `development`.
#[clap(long, env = MEILI_ENV, default_value_t = default_env(), value_parser = POSSIBLE_ENV)]
#[serde(default = "default_env")]
pub env: String,
/// Deactivates Meilisearch's built-in telemetry when provided.
///
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
/// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
/// at any time.
#[cfg(all(not(debug_assertions), feature = "analytics"))]
#[serde(default)] // we can't send true
#[clap(long, env = MEILI_NO_ANALYTICS)]
pub no_analytics: bool,
/// Sets the maximum size of the index. Value must be given in bytes or explicitly stating a base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb').
#[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())]
#[serde(default = "default_max_index_size")]
pub max_index_size: Byte,
/// Sets the maximum size of the task database. Value must be given in bytes or explicitly stating a
/// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb').
#[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())]
#[serde(default = "default_max_task_db_size")]
pub max_task_db_size: Byte,
/// Sets the maximum size of accepted payloads. Value must be given in bytes or explicitly stating a
/// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb').
#[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())]
#[serde(default = "default_http_payload_size_limit")]
pub http_payload_size_limit: Byte,
/// Sets the server's SSL certificates.
#[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)]
pub ssl_cert_path: Option<PathBuf>,
/// Sets the server's SSL key files.
#[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)]
pub ssl_key_path: Option<PathBuf>,
/// Enables client authentication in the specified path.
#[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)]
pub ssl_auth_path: Option<PathBuf>,
/// Sets the server's OCSP file. *Optional*
///
/// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate.
#[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)]
pub ssl_ocsp_path: Option<PathBuf>,
/// Makes SSL authentication mandatory.
#[serde(default)]
#[clap(long, env = MEILI_SSL_REQUIRE_AUTH)]
pub ssl_require_auth: bool,
/// Activates SSL session resumption.
#[serde(default)]
#[clap(long, env = MEILI_SSL_RESUMPTION)]
pub ssl_resumption: bool,
/// Activates SSL tickets.
#[serde(default)]
#[clap(long, env = MEILI_SSL_TICKETS)]
pub ssl_tickets: bool,
/// Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
#[clap(long, env = MEILI_IMPORT_SNAPSHOT)]
pub import_snapshot: Option<PathBuf>,
/// Prevents a Meilisearch instance from throwing an error when `--import-snapshot`
/// does not point to a valid snapshot file.
///
/// This command will throw an error if `--import-snapshot` is not defined.
#[clap(
long,
env = MEILI_IGNORE_MISSING_SNAPSHOT,
requires = "import_snapshot"
)]
#[serde(default)]
pub ignore_missing_snapshot: bool,
/// Prevents a Meilisearch instance with an existing database from throwing an
/// error when using `--import-snapshot`. Instead, the snapshot will be ignored
/// and Meilisearch will launch using the existing database.
///
/// This command will throw an error if `--import-snapshot` is not defined.
#[clap(
long,
env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS,
requires = "import_snapshot"
)]
#[serde(default)]
pub ignore_snapshot_if_db_exists: bool,
/// Sets the directory where Meilisearch will store snapshots.
#[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())]
#[serde(default = "default_snapshot_dir")]
pub snapshot_dir: PathBuf,
/// Activates scheduled snapshots when provided. Snapshots are disabled by default.
#[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)]
#[serde(default)]
pub schedule_snapshot: bool,
/// Defines the interval between each snapshot. Value must be given in seconds.
#[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())]
#[serde(default = "default_snapshot_interval_sec")]
pub snapshot_interval_sec: u64,
/// Imports the dump file located at the specified path. Path must point to a `.dump` file.
/// If a database already exists, Meilisearch will throw an error and abort launch.
#[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import_snapshot")]
pub import_dump: Option<PathBuf>,
/// Prevents Meilisearch from throwing an error when `--import-dump` does not point to
/// a valid dump file. Instead, Meilisearch will start normally without importing any dump.
///
/// This option will trigger an error if `--import-dump` is not defined.
#[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import_dump")]
#[serde(default)]
pub ignore_missing_dump: bool,
/// Prevents a Meilisearch instance with an existing database from throwing an error
/// when using `--import-dump`. Instead, the dump will be ignored and Meilisearch will
/// launch using the existing database.
///
/// This option will trigger an error if `--import-dump` is not defined.
#[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import_dump")]
#[serde(default)]
pub ignore_dump_if_db_exists: bool,
/// Sets the directory where Meilisearch will create dump files.
#[clap(long, env = MEILI_DUMP_DIR, default_value_os_t = default_dump_dir())]
#[serde(default = "default_dump_dir")]
pub dump_dir: PathBuf,
/// Defines how much detail should be present in Meilisearch's logs.
///
/// Meilisearch currently supports five log levels, listed in order of increasing verbosity: ERROR, WARN, INFO, DEBUG, TRACE.
#[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())]
#[serde(default = "default_log_level")]
pub log_level: String,
/// Enables Prometheus metrics and /metrics route.
#[cfg(feature = "metrics")]
#[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)]
#[serde(default)]
pub enable_metrics_route: bool,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
#[serde(flatten)]
#[clap(flatten)]
pub scheduler_options: SchedulerConfig,
/// Set the path to a configuration file that should be used to setup the engine.
/// Format must be TOML.
#[clap(long)]
pub config_file_path: Option<PathBuf>,
}
impl Opt {
/// Whether analytics should be enabled or not.
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub fn analytics(&self) -> bool {
!self.no_analytics
}
/// Build a new Opt from config file, env vars and cli args.
pub fn try_build() -> anyhow::Result<(Self, Option<PathBuf>)> {
// Parse the args to get the config_file_path.
let mut opts = Opt::parse();
let mut config_read_from = None;
let user_specified_config_file_path = opts
.config_file_path
.clone()
.or_else(|| env::var("MEILI_CONFIG_FILE_PATH").map(PathBuf::from).ok());
let config_file_path = user_specified_config_file_path
.clone()
.unwrap_or_else(|| PathBuf::from(DEFAULT_CONFIG_FILE_PATH));
match std::fs::read(&config_file_path) {
Ok(config) => {
// If the file is successfully read, we deserialize it with `toml`.
let opt_from_config = toml::from_slice::<Opt>(&config)?;
// Return an error if config file contains 'config_file_path'
// Using that key in the config file doesn't make sense bc it creates a logical loop (config file referencing itself)
if opt_from_config.config_file_path.is_some() {
anyhow::bail!("`config_file_path` is not supported in the configuration file")
}
// We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args.
opt_from_config.export_to_env();
// Once injected we parse the cli args once again to take the new env vars into scope.
opts = Opt::parse();
config_read_from = Some(config_file_path);
}
Err(e) => {
if let Some(path) = user_specified_config_file_path {
// If we have an error while reading the file defined by the user.
anyhow::bail!(
"unable to open or read the {:?} configuration file: {}.",
path,
e,
)
}
}
}
Ok((opts, config_read_from))
}
/// Exports the opts values to their corresponding env vars if they are not set.
fn export_to_env(self) {
let Opt {
db_path,
http_addr,
master_key,
env,
max_index_size,
max_task_db_size,
http_payload_size_limit,
ssl_cert_path,
ssl_key_path,
ssl_auth_path,
ssl_ocsp_path,
ssl_require_auth,
ssl_resumption,
ssl_tickets,
snapshot_dir,
schedule_snapshot,
snapshot_interval_sec,
dump_dir,
log_level,
indexer_options,
scheduler_options,
import_snapshot: _,
ignore_missing_snapshot: _,
ignore_snapshot_if_db_exists: _,
import_dump: _,
ignore_missing_dump: _,
ignore_dump_if_db_exists: _,
config_file_path: _,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics,
#[cfg(feature = "metrics")]
enable_metrics_route,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
if let Some(master_key) = master_key {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, env);
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
}
export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string());
export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string());
export_to_env_if_not_present(
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
http_payload_size_limit.to_string(),
);
if let Some(ssl_cert_path) = ssl_cert_path {
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
}
if let Some(ssl_key_path) = ssl_key_path {
export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path);
}
if let Some(ssl_auth_path) = ssl_auth_path {
export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path);
}
if let Some(ssl_ocsp_path) = ssl_ocsp_path {
export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path);
}
export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string());
export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string());
export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string());
export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir);
export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string());
export_to_env_if_not_present(
MEILI_SNAPSHOT_INTERVAL_SEC,
snapshot_interval_sec.to_string(),
);
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level);
#[cfg(feature = "metrics")]
{
export_to_env_if_not_present(
MEILI_ENABLE_METRICS_ROUTE,
enable_metrics_route.to_string(),
);
}
indexer_options.export_to_env();
scheduler_options.export_to_env();
}
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let config = rustls::ServerConfig::builder().with_safe_defaults();
let config = match &self.ssl_auth_path {
Some(auth_path) => {
let roots = load_certs(auth_path.to_path_buf())?;
let mut client_auth_roots = RootCertStore::empty();
for root in roots {
client_auth_roots.add(&root).unwrap();
}
if self.ssl_require_auth {
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(verifier)
} else {
let verifier =
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
config.with_client_cert_verifier(verifier)
}
}
None => config.with_no_client_auth(),
};
let certs = load_certs(cert_path.to_path_buf())?;
let privkey = load_private_key(key_path.to_path_buf())?;
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
let mut config = config
.with_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
.map_err(|_| anyhow::anyhow!("bad certificates/private key"))?;
config.key_log = Arc::new(rustls::KeyLogFile::new());
if self.ssl_resumption {
config.session_storage = ServerSessionMemoryCache::new(256);
}
if self.ssl_tickets {
config.ticketer = rustls::Ticketer::new().unwrap();
}
Ok(Some(config))
} else {
Ok(None)
}
}
}
#[derive(Debug, Clone, Parser, Deserialize)]
pub struct IndexerOpts {
/// Sets the amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[serde(default = "default_log_every_n")]
#[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[clap(long, hide = true)]
pub max_nb_chunks: Option<usize>,
/// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
/// uses no more than two thirds of available memory.
#[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
#[serde(default)]
pub max_indexing_memory: MaxMemory,
/// Sets the maximum number of threads Meilisearch can use during indexation. By default, the
/// indexer avoids using more than half of a machine's total processing units. This ensures
/// Meilisearch is always ready to perform searches, even while you are updating an index.
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
#[serde(default)]
pub max_indexing_threads: MaxThreads,
}
impl IndexerOpts {
/// Exports the values to their corresponding env vars if they are not set.
pub fn export_to_env(self) {
let IndexerOpts {
max_indexing_memory,
max_indexing_threads,
log_every_n: _,
max_nb_chunks: _,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_MEMORY,
max_indexing_memory.to_string(),
);
}
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
max_indexing_threads.0.to_string(),
);
}
}
#[derive(Debug, Clone, Parser, Default, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct SchedulerConfig {
/// Deactivates auto-batching when provided.
#[clap(long, env = DISABLE_AUTO_BATCHING)]
#[serde(default)]
pub disable_auto_batching: bool,
}
impl SchedulerConfig {
pub fn export_to_env(self) {
let SchedulerConfig { disable_auto_batching } = self;
export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string());
}
}
impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.thread_name(|index| format!("indexing-thread:{index}"))
.num_threads(*other.max_indexing_threads)
.build()?;
Ok(Self {
log_every_n: Some(other.log_every_n),
max_nb_chunks: other.max_nb_chunks,
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
..Default::default()
})
}
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_indexing_memory: MaxMemory::default(),
max_indexing_threads: MaxThreads::default(),
}
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory)
}
}
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes))
}
}
impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
None => f.write_str("unknown"),
}
}
}
impl Deref for MaxMemory {
type Target = Option<Byte>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl MaxMemory {
pub fn unlimited() -> Self {
Self(None)
}
}
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory())
} else {
None
}
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
pub struct MaxThreads(usize);
impl FromStr for MaxThreads {
type Err = ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
usize::from_str(s).map(Self)
}
}
impl Default for MaxThreads {
fn default() -> Self {
MaxThreads(num_cpus::get() / 2)
}
}
impl fmt::Display for MaxThreads {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Deref for MaxThreads {
type Target = usize;
fn deref(&self) -> &Self::Target {
&self.0
}
}
fn load_certs(filename: PathBuf) -> anyhow::Result<Vec<rustls::Certificate>> {
let certfile =
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;
let mut reader = BufReader::new(certfile);
certs(&mut reader)
.map(|certs| certs.into_iter().map(rustls::Certificate).collect())
.map_err(|_| anyhow::anyhow!("cannot read certificate file"))
}
fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
let rsa_keys = {
let keyfile = fs::File::open(filename.clone())
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
let mut reader = BufReader::new(keyfile);
rsa_private_keys(&mut reader)
.map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))?
};
let pkcs8_keys = {
let keyfile = fs::File::open(filename)
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
let mut reader = BufReader::new(keyfile);
pkcs8_private_keys(&mut reader).map_err(|_| {
anyhow::anyhow!(
"file contains invalid pkcs8 private key (encrypted keys not supported)"
)
})?
};
// prefer to load pkcs8 keys
if !pkcs8_keys.is_empty() {
Ok(rustls::PrivateKey(pkcs8_keys[0].clone()))
} else {
assert!(!rsa_keys.is_empty());
Ok(rustls::PrivateKey(rsa_keys[0].clone()))
}
}
fn load_ocsp(filename: &Option<PathBuf>) -> anyhow::Result<Vec<u8>> {
let mut ret = Vec::new();
if let Some(ref name) = filename {
fs::File::open(name)
.map_err(|_| anyhow::anyhow!("cannot open ocsp file"))?
.read_to_end(&mut ret)
.map_err(|_| anyhow::anyhow!("cannot read oscp file"))?;
}
Ok(ret)
}
/// Checks if the key is defined in the environment variables.
/// If not, inserts it with the given value.
pub fn export_to_env_if_not_present<T>(key: &str, value: T)
where
T: AsRef<OsStr>,
{
if let Err(VarError::NotPresent) = std::env::var(key) {
std::env::set_var(key, value);
}
}
/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute.
fn default_db_path() -> PathBuf {
PathBuf::from(DEFAULT_DB_PATH)
}
pub fn default_http_addr() -> String {
DEFAULT_HTTP_ADDR.to_string()
}
fn default_env() -> String {
DEFAULT_ENV.to_string()
}
fn default_max_index_size() -> Byte {
Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap()
}
fn default_max_task_db_size() -> Byte {
Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap()
}
fn default_http_payload_size_limit() -> Byte {
Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
}
fn default_snapshot_dir() -> PathBuf {
PathBuf::from(DEFAULT_SNAPSHOT_DIR)
}
fn default_snapshot_interval_sec() -> u64 {
DEFAULT_SNAPSHOT_INTERVAL_SEC
}
fn default_dump_dir() -> PathBuf {
PathBuf::from(DEFAULT_DUMP_DIR)
}
fn default_log_level() -> String {
DEFAULT_LOG_LEVEL.to_string()
}
fn default_log_every_n() -> usize {
DEFAULT_LOG_EVERY_N
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_valid_opt() {
assert!(Opt::try_parse_from(Some("")).is_ok());
}
#[test]
#[ignore]
fn test_meilli_config_file_path_valid() {
temp_env::with_vars(
vec![("MEILI_CONFIG_FILE_PATH", Some("../config.toml"))], // Relative path in meilisearch package
|| {
assert!(Opt::try_build().is_ok());
},
);
}
#[test]
#[ignore]
fn test_meilli_config_file_path_invalid() {
temp_env::with_vars(vec![("MEILI_CONFIG_FILE_PATH", Some("../configgg.toml"))], || {
let possible_error_messages = [
"unable to open or read the \"../configgg.toml\" configuration file: No such file or directory (os error 2).",
"unable to open or read the \"../configgg.toml\" configuration file: The system cannot find the file specified. (os error 2).", // Windows
];
let error_message = Opt::try_build().unwrap_err().to_string();
assert!(
possible_error_messages.contains(&error_message.as_str()),
"Expected onf of {:?}, got {:?}.",
possible_error_messages,
error_message
);
});
}
}

View file

@ -0,0 +1,104 @@
use std::future::{ready, Ready};
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
use actix_web::http::header;
use actix_web::{Error, HttpResponse};
use futures_util::future::LocalBoxFuture;
use meilisearch_auth::actions;
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
use prometheus::{Encoder, HistogramTimer, TextEncoder};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
pub async fn get_metrics(
meilisearch: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, MeiliSearch>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &meilisearch.filters().search_rules;
let response = meilisearch.get_all_stats(search_rules).await?;
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
for (index, value) in response.indexes.iter() {
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
.with_label_values(&[index])
.set(value.number_of_documents as i64);
}
let encoder = TextEncoder::new();
let mut buffer = vec![];
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
}
pub struct RouteMetrics;
// Middleware factory is `Transform` trait from actix-service crate
// `S` - type of the next service
// `B` - type of response's body
impl<S, B> Transform<S, ServiceRequest> for RouteMetrics
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = Error;
type InitError = ();
type Transform = RouteMetricsMiddleware<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ready(Ok(RouteMetricsMiddleware { service }))
}
}
pub struct RouteMetricsMiddleware<S> {
service: S,
}
impl<S, B> Service<ServiceRequest> for RouteMetricsMiddleware<S>
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = Error;
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
dev::forward_ready!(service);
fn call(&self, req: ServiceRequest) -> Self::Future {
let mut histogram_timer: Option<HistogramTimer> = None;
let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.start_timer(),
);
crate::metrics::HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, request_path])
.inc();
}
let fut = self.service.call(req);
Box::pin(async move {
let res = fut.await?;
if let Some(histogram_timer) = histogram_timer {
histogram_timer.observe_duration();
};
Ok(res)
})
}
}

View file

@ -0,0 +1,158 @@
use std::str;
use actix_web::{web, HttpRequest, HttpResponse};
use meilisearch_auth::error::AuthControllerError;
use meilisearch_auth::AuthController;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::keys::{Action, Key};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::Pagination;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::post().to(SeqHandler(create_api_key)))
.route(web::get().to(SeqHandler(list_api_keys))),
)
.service(
web::resource("/{key}")
.route(web::get().to(SeqHandler(get_api_key)))
.route(web::patch().to(SeqHandler(patch_api_key)))
.route(web::delete().to(SeqHandler(delete_api_key))),
);
}
pub async fn create_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
body: web::Json<Value>,
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let v = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let key = auth_controller.create_key(v)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Created().json(res))
}
pub async fn list_api_keys(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
paginate: web::Query<Pagination>,
) -> Result<HttpResponse, ResponseError> {
let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let keys = auth_controller.list_keys()?;
let page_view = paginate
.auto_paginate_sized(keys.into_iter().map(|k| KeyView::from_key(k, &auth_controller)));
Ok(page_view)
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(page_view))
}
pub async fn get_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.get_key(uid)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(res))
}
pub async fn patch_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
body: web::Json<Value>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
let body = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.update_key(uid, body)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(res))
}
pub async fn delete_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, AuthController>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
tokio::task::spawn_blocking(move || {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
auth_controller.delete_key(uid)
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::NoContent().finish())
}
#[derive(Deserialize)]
pub struct AuthParam {
key: String,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct KeyView {
name: Option<String>,
description: Option<String>,
key: String,
uid: Uuid,
actions: Vec<Action>,
indexes: Vec<String>,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
expires_at: Option<OffsetDateTime>,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
created_at: OffsetDateTime,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
updated_at: OffsetDateTime,
}
impl KeyView {
fn from_key(key: Key, auth: &AuthController) -> Self {
let generated_key = auth.generate_key(key.uid).unwrap_or_default();
KeyView {
name: key.name,
description: key.description,
key: generated_key,
uid: key.uid,
actions: key.actions,
indexes: key.indexes.into_iter().map(String::from).collect(),
expires_at: key.expires_at,
created_at: key.created_at,
updated_at: key.updated_at,
}
}
}

View file

@ -0,0 +1,37 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
}
pub async fn create_dump(
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
let task = KindWithContent::DumpCreation {
keys: auth_controller.list_keys()?,
instance_uid: analytics.instance_uid().cloned(),
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View file

@ -0,0 +1,410 @@
use std::io::{Cursor, ErrorKind};
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index};
use mime::Mime;
use once_cell::sync::Lazy;
use serde::Deserialize;
use serde_cs::vec::CS;
use serde_json::Value;
use crate::analytics::{Analytics, DocumentDeletionKind};
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{fold_star_or, PaginationView, SummarizedTaskView};
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
});
/// Extracts the mime type from the content type and return
/// a meilisearch error if anything bad happen.
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
match req.mime_type() {
Ok(Some(mime)) => Ok(Some(mime)),
Ok(None) => Ok(None),
Err(_) => match req.headers().get(CONTENT_TYPE) {
Some(content_type) => Err(MeilisearchHttpError::InvalidContentType(
content_type.as_bytes().as_bstr().to_string(),
ACCEPTED_CONTENT_TYPE.clone(),
)),
None => Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())),
},
}
}
#[derive(Deserialize)]
pub struct DocumentParam {
index_uid: String,
document_id: String,
}
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
// this route needs to be before the /documents/{document_id} to match properly
.service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
.service(
web::resource("/{document_id}")
.route(web::get().to(SeqHandler(get_document)))
.route(web::delete().to(SeqHandler(delete_document))),
);
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct GetDocument {
fields: Option<CS<StarOr<String>>>,
}
pub async fn get_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
params: web::Query<GetDocument>,
) -> Result<HttpResponse, ResponseError> {
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
let index = index_scheduler.index(&path.index_uid)?;
let document = retrieve_document(&index, &path.document_id, attributes_to_retrieve)?;
debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document))
}
pub async fn delete_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
let DocumentParam { document_id, index_uid } = path.into_inner();
let task = KindWithContent::DocumentDeletion { index_uid, documents_ids: vec![document_id] };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct BrowseQuery {
#[serde(default)]
offset: usize,
#[serde(default = "crate::routes::PAGINATION_DEFAULT_LIMIT")]
limit: usize,
fields: Option<CS<StarOr<String>>>,
}
pub async fn get_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let BrowseQuery { limit, offset, fields } = params.into_inner();
let attributes_to_retrieve = fields.and_then(fold_star_or);
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(&index, offset, limit, attributes_to_retrieve)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
pub primary_key: Option<String>,
}
pub async fn add_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid.into_inner(),
params.primary_key,
body,
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
)
.await?;
Ok(HttpResponse::Accepted().json(task))
}
pub async fn update_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
path: web::Path<String>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let index_uid = path.into_inner();
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.into_inner().primary_key,
body,
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
)
.await?;
Ok(HttpResponse::Accepted().json(task))
}
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: String,
primary_key: Option<String>,
mut body: Payload,
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
Some(("application", "json")) => PayloadType::Json,
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
Some(("text", "csv")) => PayloadType::Csv,
Some((type_, subtype)) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
))
}
None => {
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
}
};
// is your indexUid valid?
let index_uid = IndexUid::try_from(index_uid)?.into_inner();
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
// TODO: this can be slow, maybe we should spawn a thread? But the payload isn't Send+Sync :weary:
// push the entire stream into a `Vec`.
// If someone sends us a never ending stream we're going to block the thread.
// TODO: Maybe we should write it to a file to reduce the RAM consumption
// and then reread it to convert it to obkv?
let mut buffer = Vec::new();
while let Some(bytes) = body.next().await {
buffer.extend_from_slice(&bytes?);
}
if buffer.is_empty() {
return Err(MeilisearchHttpError::MissingPayload(format));
}
let reader = Cursor::new(buffer);
let documents_count =
tokio::task::spawn_blocking(move || -> Result<_, MeilisearchHttpError> {
let documents_count = match format {
PayloadType::Json => read_json(reader, update_file.as_file_mut())?,
PayloadType::Csv => read_csv(reader, update_file.as_file_mut())?,
PayloadType::Ndjson => read_ndjson(reader, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
Ok(documents_count)
})
.await;
let documents_count = match documents_count {
Ok(Ok(documents_count)) => documents_count as u64,
// in this case the file has not possibly be persisted.
Ok(Err(e)) => return Err(e),
Err(e) => {
// Here the file MAY have been persisted or not.
// We don't know thus we ignore the file not found error.
match index_scheduler.delete_update_file(uuid) {
Ok(()) => (),
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {}
Err(e) => {
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
}
}
// We still want to return the original error to the end user.
return Err(e.into());
}
};
let task = KindWithContent::DocumentAdditionOrUpdate {
method,
content_file: uuid,
documents_count,
primary_key,
allow_index_creation,
index_uid,
};
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
Ok(task) => task,
Err(e) => {
index_scheduler.delete_update_file(uuid)?;
return Err(e.into());
}
};
debug!("returns: {:?}", task);
Ok(task.into())
}
pub async fn delete_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<String>,
body: web::Json<Vec<Value>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
let ids = body
.iter()
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
.collect();
let task =
KindWithContent::DocumentDeletion { index_uid: path.into_inner(), documents_ids: ids };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn clear_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<String>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
let task = KindWithContent::DocumentClear { index_uid: path.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
fn all_documents<'a>(
index: &Index,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(index.all_documents(rtxn)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
fn retrieve_documents<S: AsRef<str>>(
index: &Index,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let mut documents = Vec::new();
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
};
documents.push(document);
}
let number_of_documents = index.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
fn retrieve_document<S: AsRef<str>>(
index: &Index,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document, ResponseError> {
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
};
Ok(document)
}

View file

@ -0,0 +1,214 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use meilisearch_types::tasks::KindWithContent;
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use super::{Pagination, SummarizedTaskView};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
pub mod search;
pub mod settings;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(list_indexes))
.route(web::post().to(SeqHandler(create_index))),
)
.service(
web::scope("/{index_uid}")
.service(
web::resource("")
.route(web::get().to(SeqHandler(get_index)))
.route(web::patch().to(SeqHandler(update_index)))
.route(web::delete().to(SeqHandler(delete_index))),
)
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexView {
pub uid: String,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
impl IndexView {
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
let rtxn = index.read_txn()?;
Ok(IndexView {
uid,
created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at(&rtxn)?,
primary_key: index.primary_key(&rtxn)?.map(String::from),
})
}
}
pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: web::Query<Pagination>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &index_scheduler.filters().search_rules;
let indexes: Vec<_> = index_scheduler.indexes()?;
let indexes = indexes
.into_iter()
.filter(|(name, _)| search_rules.is_index_authorized(name))
.map(|(name, index)| IndexView::new(name, &index))
.collect::<Result<Vec<_>, _>>()?;
let ret = paginate.auto_paginate_sized(indexes.into_iter());
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct IndexCreateRequest {
uid: String,
primary_key: Option<String>,
}
pub async fn create_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: web::Json<IndexCreateRequest>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let uid = IndexUid::try_from(uid)?.into_inner();
let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid);
if allow_index_creation {
analytics.publish(
"Index Created".to_string(),
json!({ "primary_key": primary_key }),
Some(&req),
);
let task = KindWithContent::IndexCreation { index_uid: uid, primary_key };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
Ok(HttpResponse::Accepted().json(task))
} else {
Err(AuthenticationError::InvalidToken.into())
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
#[allow(dead_code)]
pub struct UpdateIndexRequest {
uid: Option<String>,
primary_key: Option<String>,
}
pub async fn get_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!("returns: {:?}", index_view);
Ok(HttpResponse::Ok().json(index_view))
}
pub async fn update_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
path: web::Path<String>,
body: web::Json<UpdateIndexRequest>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let body = body.into_inner();
analytics.publish(
"Index Updated".to_string(),
json!({ "primary_key": body.primary_key}),
Some(&req),
);
let task = KindWithContent::IndexUpdate {
index_uid: path.into_inner(),
primary_key: body.primary_key,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn delete_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get_index_stats(
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": true }), Some(&req));
let stats = IndexStats::new((*index_scheduler).clone(), index_uid.into_inner())?;
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
pub number_of_documents: u64,
pub is_indexing: bool,
pub field_distribution: FieldDistribution,
}
impl IndexStats {
pub fn new(
index_scheduler: Data<IndexScheduler>,
index_uid: String,
) -> Result<Self, ResponseError> {
// we check if there is currently a task processing associated with this index.
let is_processing = index_scheduler.is_index_processing(&index_uid)?;
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
Ok(IndexStats {
number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: is_processing,
field_distribution: index.field_distribution(&rtxn)?,
})
}
}

View file

@ -0,0 +1,225 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::error::ResponseError;
use serde::Deserialize;
use serde_cs::vec::CS;
use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(search_with_url_query)))
.route(web::post().to(SeqHandler(search_with_post))),
);
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQueryGet {
q: Option<String>,
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
offset: usize,
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
limit: usize,
page: Option<usize>,
hits_per_page: Option<usize>,
attributes_to_retrieve: Option<CS<String>>,
attributes_to_crop: Option<CS<String>>,
#[serde(default = "DEFAULT_CROP_LENGTH")]
crop_length: usize,
attributes_to_highlight: Option<CS<String>>,
filter: Option<String>,
sort: Option<String>,
#[serde(default = "Default::default")]
show_matches_position: bool,
facets: Option<CS<String>>,
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
highlight_pre_tag: String,
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
highlight_post_tag: String,
#[serde(default = "DEFAULT_CROP_MARKER")]
crop_marker: String,
#[serde(default)]
matching_strategy: MatchingStrategy,
}
impl From<SearchQueryGet> for SearchQuery {
fn from(other: SearchQueryGet) -> Self {
let filter = match other.filter {
Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v),
_ => Some(Value::String(f)),
},
None => None,
};
Self {
q: other.q,
offset: other.offset,
limit: other.limit,
page: other.page,
hits_per_page: other.hits_per_page,
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
crop_length: other.crop_length,
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
show_matches_position: other.show_matches_position,
facets: other.facets.map(|o| o.into_iter().collect()),
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,
crop_marker: other.crop_marker,
matching_strategy: other.matching_strategy,
}
}
}
/// Incorporate search rules in search query
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
let mut sort_parameters = Vec::new();
let mut merge = false;
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
if current_sort.starts_with("_geoPoint(") {
sort_parameters.push(current_sort.to_string());
merge = true;
} else if merge && !sort_parameters.is_empty() {
let s = sort_parameters.last_mut().unwrap();
s.push(',');
s.push_str(current_sort);
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
merge = false;
}
} else {
sort_parameters.push(current_sort.to_string());
merge = false;
}
}
sort_parameters
}
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Query<SearchQueryGet>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let mut query: SearchQuery = params.into_inner().into();
// Tenant token search_rules.
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.get_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: web::Json<SearchQuery>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let mut query = params.into_inner();
debug!("search called with params: {:?}", query);
// Tenant token search_rules.
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.post_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_fix_sort_query_parameters() {
let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc");
assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]);
let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc");
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]);
let sort = fix_sort_query_parameters(
"doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc",
);
assert_eq!(
sort,
vec![
"doggo:asc".to_string(),
"_geoPoint(12.45,13.56,2590352):desc".to_string(),
"catto:desc".to_string(),
]
);
let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc");
// This is ugly but eh, I don't want to write a full parser just for this unused route
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]);
}
}

View file

@ -0,0 +1,565 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::{settings, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::SummarizedTaskView;
#[macro_export]
macro_rules! make_setting_route {
($route:literal, $update_verb:ident, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
pub mod $attr {
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData;
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::routes::SummarizedTaskView;
pub async fn delete(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = Settings { $attr: Setting::Reset, ..Default::default() };
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn update(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
body: actix_web::web::Json<Option<$type>>,
req: HttpRequest,
$analytics_var: web::Data<dyn Analytics>,
) -> std::result::Result<HttpResponse, ResponseError> {
let body = body.into_inner();
$analytics(&body, &req);
let new_settings = Settings {
$attr: match body {
Some(inner_body) => Setting::Set(inner_body),
None => Setting::Reset,
},
..Default::default()
};
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get(
index_scheduler: GuardedData<
ActionPolicy<{ actions::SETTINGS_GET }>,
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take();
Ok(HttpResponse::Ok().json(val))
}
pub fn resources() -> Resource {
Resource::new($route)
.route(web::get().to(SeqHandler(get)))
.route(web::$update_verb().to(SeqHandler(update)))
.route(web::delete().to(SeqHandler(delete)))
}
}
};
}
make_setting_route!(
"/filterable-attributes",
put,
std::collections::BTreeSet<String>,
filterable_attributes,
"filterableAttributes",
analytics,
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"FilterableAttributes Updated".to_string(),
json!({
"filterable_attributes": {
"total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
"has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/sortable-attributes",
put,
std::collections::BTreeSet<String>,
sortable_attributes,
"sortableAttributes",
analytics,
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"SortableAttributes Updated".to_string(),
json!({
"sortable_attributes": {
"total": setting.as_ref().map(|sort| sort.len()),
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/displayed-attributes",
put,
Vec<String>,
displayed_attributes,
"displayedAttributes",
analytics,
|displayed: &Option<Vec<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"DisplayedAttributes Updated".to_string(),
json!({
"displayed_attributes": {
"total": displayed.as_ref().map(|displayed| displayed.len()),
"with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/typo-tolerance",
patch,
meilisearch_types::settings::TypoSettings,
typo_tolerance,
"typoTolerance",
analytics,
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"TypoTolerance Updated".to_string(),
json!({
"typo_tolerance": {
"enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
"disable_on_attributes": setting
.as_ref()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": setting
.as_ref()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/searchable-attributes",
put,
Vec<String>,
searchable_attributes,
"searchableAttributes",
analytics,
|setting: &Option<Vec<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"SearchableAttributes Updated".to_string(),
json!({
"searchable_attributes": {
"total": setting.as_ref().map(|searchable| searchable.len()),
"with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/stop-words",
put,
std::collections::BTreeSet<String>,
stop_words,
"stopWords",
analytics,
|stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"StopWords Updated".to_string(),
json!({
"stop_words": {
"total": stop_words.as_ref().map(|stop_words| stop_words.len()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/synonyms",
put,
std::collections::BTreeMap<String, Vec<String>>,
synonyms,
"synonyms",
analytics,
|synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Synonyms Updated".to_string(),
json!({
"synonyms": {
"total": synonyms.as_ref().map(|synonyms| synonyms.len()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/distinct-attribute",
put,
String,
distinct_attribute,
"distinctAttribute",
analytics,
|distinct: &Option<String>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"DistinctAttribute Updated".to_string(),
json!({
"distinct_attribute": {
"set": distinct.is_some(),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/ranking-rules",
put,
Vec<String>,
ranking_rules,
"rankingRules",
analytics,
|setting: &Option<Vec<String>>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"RankingRules Updated".to_string(),
json!({
"ranking_rules": {
"words_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "words")),
"typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "typo")),
"proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "proximity")),
"attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "attribute")),
"sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "sort")),
"exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "exactness")),
"values": setting.as_ref().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::<Vec<_>>().join(", ")),
}
}),
Some(req),
);
}
);
make_setting_route!(
"/faceting",
patch,
meilisearch_types::settings::FacetingSettings,
faceting,
"faceting",
analytics,
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Faceting Updated".to_string(),
json!({
"faceting": {
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/pagination",
patch,
meilisearch_types::settings::PaginationSettings,
pagination,
"pagination",
analytics,
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Pagination Updated".to_string(),
json!({
"pagination": {
"max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
},
}),
Some(req),
);
}
);
macro_rules! generate_configure {
($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) {
use crate::extractors::sequential_extractor::SeqHandler;
cfg.service(
web::resource("")
.route(web::patch().to(SeqHandler(update_all)))
.route(web::get().to(SeqHandler(get_all)))
.route(web::delete().to(SeqHandler(delete_all))))
$(.service($mod::resources()))*;
}
};
}
generate_configure!(
filterable_attributes,
sortable_attributes,
displayed_attributes,
searchable_attributes,
distinct_attribute,
stop_words,
synonyms,
ranking_rules,
typo_tolerance,
pagination,
faceting
);
pub async fn update_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: web::Json<Settings<Unchecked>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = body.into_inner();
analytics.publish(
"Settings Updated".to_string(),
json!({
"ranking_rules": {
"words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "words")),
"typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "typo")),
"proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "proximity")),
"attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "attribute")),
"sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "sort")),
"exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "exactness")),
"values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::<Vec<_>>().join(", ")),
},
"searchable_attributes": {
"total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
"with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
},
"displayed_attributes": {
"total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()),
"with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
},
"sortable_attributes": {
"total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
"has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
},
"filterable_attributes": {
"total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
"has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
},
"distinct_attribute": {
"set": new_settings.distinct_attribute.as_ref().set().is_some()
},
"typo_tolerance": {
"enabled": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.enabled.as_ref().set())
.copied(),
"disable_on_attributes": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": new_settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
},
"faceting": {
"max_values_per_facet": new_settings.faceting
.as_ref()
.set()
.and_then(|s| s.max_values_per_facet.as_ref().set()),
},
"pagination": {
"max_total_hits": new_settings.pagination
.as_ref()
.set()
.and_then(|s| s.max_total_hits.as_ref().set()),
},
"stop_words": {
"total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()),
},
"synonyms": {
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
},
}),
Some(&req),
);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn get_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings))
}
pub async fn delete_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let new_settings = Settings::cleared().into_unchecked();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
new_settings: Box::new(new_settings),
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View file

@ -0,0 +1,339 @@
use std::collections::BTreeMap;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::{IndexScheduler, Query};
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use self::indexes::IndexStats;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
mod api_key;
mod dump;
pub mod indexes;
mod swap_indexes;
pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure))
.service(web::resource("/stats").route(web::get().to(get_stats)))
.service(web::resource("/version").route(web::get().to(get_version)))
.service(web::scope("/indexes").configure(indexes::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
}
/// Extracts the raw values from the `StarOr` types and
/// return None if a `StarOr::Star` is encountered.
pub fn fold_star_or<T, O>(content: impl IntoIterator<Item = StarOr<T>>) -> Option<O>
where
O: FromIterator<T>,
{
content
.into_iter()
.map(|value| match value {
StarOr::Star => None,
StarOr::Other(val) => Some(val),
})
.collect()
}
const PAGINATION_DEFAULT_LIMIT: fn() -> usize = || 20;
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
task_uid: TaskId,
index_uid: Option<String>,
status: Status,
#[serde(rename = "type")]
kind: Kind,
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
enqueued_at: OffsetDateTime,
}
impl From<Task> for SummarizedTaskView {
fn from(task: Task) -> Self {
SummarizedTaskView {
task_uid: task.uid,
index_uid: task.index_uid().map(|s| s.to_string()),
status: task.status,
kind: task.kind.as_kind(),
enqueued_at: task.enqueued_at,
}
}
}
#[derive(Debug, Clone, Copy, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct Pagination {
#[serde(default)]
pub offset: usize,
#[serde(default = "PAGINATION_DEFAULT_LIMIT")]
pub limit: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct PaginationView<T> {
pub results: Vec<T>,
pub offset: usize,
pub limit: usize,
pub total: usize,
}
impl Pagination {
/// Given the full data to paginate, returns the selected section.
pub fn auto_paginate_sized<T>(
self,
content: impl IntoIterator<Item = T> + ExactSizeIterator,
) -> PaginationView<T>
where
T: Serialize,
{
let total = content.len();
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
self.format_with(total, content)
}
/// Given an iterator and the total number of elements, returns the selected section.
pub fn auto_paginate_unsized<T>(
self,
total: usize,
content: impl IntoIterator<Item = T>,
) -> PaginationView<T>
where
T: Serialize,
{
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
self.format_with(total, content)
}
/// Given the data already paginated + the total number of elements, it stores
/// everything in a [PaginationResult].
pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<T>
where
T: Serialize,
{
PaginationView { results, offset: self.offset, limit: self.limit, total }
}
}
impl<T> PaginationView<T> {
pub fn new(offset: usize, limit: usize, total: usize, results: Vec<T>) -> Self {
Self { offset, limit, results, total }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[allow(clippy::large_enum_variant)]
#[serde(tag = "name")]
pub enum UpdateType {
ClearAll,
Customs,
DocumentsAddition {
#[serde(skip_serializing_if = "Option::is_none")]
number: Option<usize>,
},
DocumentsPartial {
#[serde(skip_serializing_if = "Option::is_none")]
number: Option<usize>,
},
DocumentsDeletion {
#[serde(skip_serializing_if = "Option::is_none")]
number: Option<usize>,
},
Settings {
settings: Settings<Unchecked>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ProcessedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FailedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
pub error: ResponseError,
pub duration: f64, // in seconds
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub processed_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct EnqueuedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
#[serde(skip_serializing_if = "Option::is_none", with = "time::serde::rfc3339::option")]
pub started_processing_at: Option<OffsetDateTime>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "status")]
pub enum UpdateStatusResponse {
Enqueued {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Processing {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Failed {
#[serde(flatten)]
content: FailedUpdateResult,
},
Processed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexUpdateResponse {
pub update_id: u64,
}
impl IndexUpdateResponse {
pub fn with_id(update_id: u64) -> Self {
Self { update_id }
}
}
/// Always return a 200 with:
/// ```json
/// {
/// "status": "Meilisearch is running"
/// }
/// ```
pub async fn running() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" }))
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
pub last_update: Option<OffsetDateTime>,
pub indexes: BTreeMap<String, IndexStats>,
}
async fn get_stats(
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
let search_rules = &index_scheduler.filters().search_rules;
let stats = create_all_stats((*index_scheduler).clone(), search_rules)?;
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}
pub fn create_all_stats(
index_scheduler: Data<IndexScheduler>,
search_rules: &meilisearch_auth::SearchRules,
) -> Result<Stats, ResponseError> {
let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let processing_task = index_scheduler.get_tasks_from_authorized_indexes(
Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() },
search_rules.authorized_indexes(),
)?;
let processing_index = processing_task.first().and_then(|task| task.index_uid());
for (name, index) in index_scheduler.indexes()? {
if !search_rules.is_index_authorized(&name) {
continue;
}
database_size += index.on_disk_size()?;
let rtxn = index.read_txn()?;
let stats = IndexStats {
number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: processing_index.map_or(false, |index_name| name == index_name),
field_distribution: index.field_distribution(&rtxn)?,
};
let updated_at = index.updated_at(&rtxn)?;
last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));
indexes.insert(name, stats);
}
let stats = Stats { database_size, last_update: last_task, indexes };
Ok(stats)
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct VersionResponse {
commit_sha: String,
commit_date: String,
pkg_version: String,
}
async fn get_version(
_index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> HttpResponse {
analytics.publish("Version Seen".to_string(), json!(null), Some(&req));
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
HttpResponse::Ok().json(VersionResponse {
commit_sha: commit_sha.to_string(),
commit_date: commit_date.to_string(),
pkg_version: env!("CARGO_PKG_VERSION").to_string(),
})
}
#[derive(Serialize)]
struct KeysResponse {
private: Option<String>,
public: Option<String>,
}
pub async fn get_health(
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.health_seen(&req);
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
}

View file

@ -0,0 +1,59 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
use serde::Deserialize;
use serde_json::json;
use super::SummarizedTaskView;
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
}
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SwapIndexesPayload {
indexes: Vec<String>,
}
pub async fn swap_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
params: web::Json<Vec<SwapIndexesPayload>>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish(
"Indexes Swapped".to_string(),
json!({
"swap_operation_number": params.len(),
}),
Some(&req),
);
let search_rules = &index_scheduler.filters().search_rules;
let mut swaps = vec![];
for SwapIndexesPayload { indexes } in params.into_inner().into_iter() {
let (lhs, rhs) = match indexes.as_slice() {
[lhs, rhs] => (lhs, rhs),
_ => {
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
}
};
if !search_rules.is_index_authorized(lhs) || !search_rules.is_index_authorized(rhs) {
return Err(AuthenticationError::InvalidToken.into());
}
swaps.push(IndexSwap { indexes: (lhs.clone(), rhs.clone()) });
}
let task = KindWithContent::IndexSwap { swaps };
let task = index_scheduler.register(task)?;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Accepted().json(task))
}

File diff suppressed because it is too large Load diff

701
meilisearch/src/search.rs Normal file
View file

@ -0,0 +1,701 @@
use std::cmp::min;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr;
use std::time::Instant;
use either::Either;
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
};
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use crate::error::MeilisearchHttpError;
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
pub const DEFAULT_CROP_MARKER: fn() -> String = || "".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQuery {
pub q: Option<String>,
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
pub offset: usize,
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
pub limit: usize,
pub page: Option<usize>,
pub hits_per_page: Option<usize>,
pub attributes_to_retrieve: Option<BTreeSet<String>>,
pub attributes_to_crop: Option<Vec<String>>,
#[serde(default = "DEFAULT_CROP_LENGTH")]
pub crop_length: usize,
pub attributes_to_highlight: Option<HashSet<String>>,
// Default to false
#[serde(default = "Default::default")]
pub show_matches_position: bool,
pub filter: Option<Value>,
pub sort: Option<Vec<String>>,
pub facets: Option<Vec<String>>,
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
pub highlight_pre_tag: String,
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
pub highlight_post_tag: String,
#[serde(default = "DEFAULT_CROP_MARKER")]
pub crop_marker: String,
#[serde(default)]
pub matching_strategy: MatchingStrategy,
}
impl SearchQuery {
pub fn is_finite_pagination(&self) -> bool {
self.page.or(self.hits_per_page).is_some()
}
}
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub enum MatchingStrategy {
/// Remove query words from last to first
Last,
/// All query words are mandatory
All,
}
impl Default for MatchingStrategy {
fn default() -> Self {
Self::Last
}
}
impl From<MatchingStrategy> for TermsMatchingStrategy {
fn from(other: MatchingStrategy) -> Self {
match other {
MatchingStrategy::Last => Self::Last,
MatchingStrategy::All => Self::All,
}
}
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct SearchHit {
#[serde(flatten)]
pub document: Document,
#[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
pub formatted: Document,
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
pub matches_position: Option<MatchesPosition>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<SearchHit>,
pub query: String,
pub processing_time_ms: u128,
#[serde(flatten)]
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize },
#[serde(rename_all = "camelCase")]
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let mut search = index.search(&rtxn);
if let Some(ref query) = query.q {
search.query(query);
}
let is_finite_pagination = query.is_finite_pagination();
search.terms_matching_strategy(query.matching_strategy.into());
let max_total_hits = index
.pagination_max_total_hits(&rtxn)
.map_err(milli::Error::from)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
// compute the offset on the limit depending on the pagination mode.
let (offset, limit) = if is_finite_pagination {
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
let page = query.page.unwrap_or(1);
// page 0 gives a limit of 0 forcing Meilisearch to return no document.
page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit))
} else {
(query.offset, query.limit)
};
// Make sure that a user can't get more documents than the hard limit,
// we align that on the offset too.
let offset = min(offset, max_total_hits);
let limit = min(limit, max_total_hits.saturating_sub(offset));
search.offset(offset);
search.limit(limit);
if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter)? {
search.filter(facets);
}
}
if let Some(ref sort) = query.sort {
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(asc_desc_error) => {
return Err(milli::Error::from(SortError::from(asc_desc_error)).into())
}
};
search.sort_criteria(sort);
}
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let displayed_ids = index
.displayed_fields_ids(&rtxn)?
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
let fids = |attrs: &BTreeSet<String>| {
let mut ids = BTreeSet::new();
for attr in attrs {
if attr == "*" {
ids = displayed_ids.clone();
break;
}
if let Some(id) = fields_ids_map.id(attr) {
ids.insert(id);
}
}
ids
};
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
// but these attributes must be also be present
// - in the fields_ids_map
// - in the the displayed attributes
let to_retrieve_ids: BTreeSet<_> = query
.attributes_to_retrieve
.as_ref()
.map(fids)
.unwrap_or_else(|| displayed_ids.clone())
.intersection(&displayed_ids)
.cloned()
.collect();
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
// These attributes are:
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
// But these attributes must be also present in displayed attributes
let formatted_options = compute_formatted_options(
&attr_to_highlight,
&attr_to_crop,
query.crop_length,
&to_retrieve_ids,
&fields_ids_map,
&displayed_ids,
);
let tokenizer = TokenizerBuilder::default().build();
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
formatter_builder.crop_marker(query.crop_marker);
formatter_builder.highlight_prefix(query.highlight_pre_tag);
formatter_builder.highlight_suffix(query.highlight_post_tag);
let mut documents = Vec::new();
let documents_iter = index.documents(&rtxn, documents_ids)?;
for (_id, obkv) in documents_iter {
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
// select the attributes to retrieve
let attributes_to_retrieve = to_retrieve_ids
.iter()
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
let mut document =
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
let (matches_position, formatted) = format_fields(
&displayed_document,
&fields_ids_map,
&formatter_builder,
&formatted_options,
query.show_matches_position,
&displayed_ids,
)?;
if let Some(sort) = query.sort.as_ref() {
insert_geo_distance(sort, &mut document);
}
let hit = SearchHit { document, formatted, matches_position };
documents.push(hit);
}
let number_of_hits = min(candidates.len() as usize, max_total_hits);
let hits_info = if is_finite_pagination {
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
.checked_div(hits_per_page)
.unwrap_or(0);
HitsInfo::Pagination {
hits_per_page,
page: query.page.unwrap_or(1),
total_pages,
total_hits: number_of_hits,
}
} else {
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
};
let facet_distribution = match query.facets {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
let max_values_by_facet = index
.max_values_per_facet(&rtxn)
.map_err(milli::Error::from)?
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields);
}
let distribution = facet_distribution.candidates(candidates).execute()?;
Some(distribution)
}
None => None,
};
let result = SearchResult {
hits: documents,
hits_info,
query: query.q.clone().unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution,
};
Ok(result)
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =
Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap();
};
if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) {
// TODO: TAMO: milli encountered an internal error, what do we want to do?
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
}
}
}
fn compute_formatted_options(
attr_to_highlight: &HashSet<String>,
attr_to_crop: &[String],
query_crop_length: usize,
to_retrieve_ids: &BTreeSet<FieldId>,
fields_ids_map: &FieldsIdsMap,
displayed_ids: &BTreeSet<FieldId>,
) -> BTreeMap<FieldId, FormatOptions> {
let mut formatted_options = BTreeMap::new();
add_highlight_to_formatted_options(
&mut formatted_options,
attr_to_highlight,
fields_ids_map,
displayed_ids,
);
add_crop_to_formatted_options(
&mut formatted_options,
attr_to_crop,
query_crop_length,
fields_ids_map,
displayed_ids,
);
// Should not return `_formatted` if no valid attributes to highlight/crop
if !formatted_options.is_empty() {
add_non_formatted_ids_to_formatted_options(&mut formatted_options, to_retrieve_ids);
}
formatted_options
}
fn add_highlight_to_formatted_options(
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
attr_to_highlight: &HashSet<String>,
fields_ids_map: &FieldsIdsMap,
displayed_ids: &BTreeSet<FieldId>,
) {
for attr in attr_to_highlight {
let new_format = FormatOptions { highlight: true, crop: None };
if attr == "*" {
for id in displayed_ids {
formatted_options.insert(*id, new_format);
}
break;
}
if let Some(id) = fields_ids_map.id(attr) {
if displayed_ids.contains(&id) {
formatted_options.insert(id, new_format);
}
}
}
}
fn add_crop_to_formatted_options(
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
attr_to_crop: &[String],
crop_length: usize,
fields_ids_map: &FieldsIdsMap,
displayed_ids: &BTreeSet<FieldId>,
) {
for attr in attr_to_crop {
let mut split = attr.rsplitn(2, ':');
let (attr_name, attr_len) = match split.next().zip(split.next()) {
Some((len, name)) => {
let crop_len = len.parse::<usize>().unwrap_or(crop_length);
(name, crop_len)
}
None => (attr.as_str(), crop_length),
};
if attr_name == "*" {
for id in displayed_ids {
formatted_options
.entry(*id)
.and_modify(|f| f.crop = Some(attr_len))
.or_insert(FormatOptions { highlight: false, crop: Some(attr_len) });
}
}
if let Some(id) = fields_ids_map.id(attr_name) {
if displayed_ids.contains(&id) {
formatted_options
.entry(id)
.and_modify(|f| f.crop = Some(attr_len))
.or_insert(FormatOptions { highlight: false, crop: Some(attr_len) });
}
}
}
}
fn add_non_formatted_ids_to_formatted_options(
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
to_retrieve_ids: &BTreeSet<FieldId>,
) {
for id in to_retrieve_ids {
formatted_options.entry(*id).or_insert(FormatOptions { highlight: false, crop: None });
}
}
fn make_document(
displayed_attributes: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
) -> Result<Document, MeilisearchHttpError> {
let mut document = serde_json::Map::new();
// recreate the original json
for (key, value) in obkv.iter() {
let value = serde_json::from_slice(value)?;
let key = field_ids_map.name(key).expect("Missing field name").to_string();
document.insert(key, value);
}
// select the attributes to retrieve
let displayed_attributes = displayed_attributes
.iter()
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
let document = permissive_json_pointer::select_values(&document, displayed_attributes);
Ok(document)
}
fn format_fields<'a, A: AsRef<[u8]>>(
document: &Document,
field_ids_map: &FieldsIdsMap,
builder: &MatcherBuilder<'a, A>,
formatted_options: &BTreeMap<FieldId, FormatOptions>,
compute_matches: bool,
displayable_ids: &BTreeSet<FieldId>,
) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
let mut matches_position = compute_matches.then(BTreeMap::new);
let mut document = document.clone();
// select the attributes to retrieve
let displayable_names =
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
// To get the formatting option of each key we need to see all the rules that applies
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
// highlighted.
let format = formatted_options
.iter()
.filter(|(field, _option)| {
let name = field_ids_map.name(**field).unwrap();
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
})
.map(|(_, option)| *option)
.reduce(|acc, option| acc.merge(option));
let mut infos = Vec::new();
*value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches);
if let Some(matches) = matches_position.as_mut() {
if !infos.is_empty() {
matches.insert(key.to_owned(), infos);
}
}
});
let selectors = formatted_options
.keys()
// This unwrap must be safe since we got the ids from the fields_ids_map just
// before.
.map(|&fid| field_ids_map.name(fid).unwrap());
let document = permissive_json_pointer::select_values(&document, selectors);
Ok((matches_position, document))
}
fn format_value<'a, A: AsRef<[u8]>>(
value: Value,
builder: &MatcherBuilder<'a, A>,
format_options: Option<FormatOptions>,
infos: &mut Vec<MatchBounds>,
compute_matches: bool,
) -> Value {
match value {
Value::String(old_string) => {
let mut matcher = builder.build(&old_string);
if compute_matches {
let matches = matcher.matches();
infos.extend_from_slice(&matches[..]);
}
match format_options {
Some(format_options) => {
let value = matcher.format(format_options);
Value::String(value.into_owned())
}
None => Value::String(old_string),
}
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| {
format_value(
v,
builder,
format_options.map(|format_options| FormatOptions {
highlight: format_options.highlight,
crop: None,
}),
infos,
compute_matches,
)
})
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| {
(
k,
format_value(
v,
builder,
format_options.map(|format_options| FormatOptions {
highlight: format_options.highlight,
crop: None,
}),
infos,
compute_matches,
),
)
})
.collect(),
),
Value::Number(number) => {
let s = number.to_string();
let mut matcher = builder.build(&s);
if compute_matches {
let matches = matcher.matches();
infos.extend_from_slice(&matches[..]);
}
match format_options {
Some(format_options) => {
let value = matcher.format(format_options);
Value::String(value.into_owned())
}
None => Value::Number(number),
}
}
value => value,
}
}
fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
match facets {
Value::String(expr) => {
let condition = Filter::from_str(expr)?;
Ok(condition)
}
Value::Array(arr) => parse_filter_array(arr),
v => Err(MeilisearchHttpError::InvalidExpression(&["Array"], v.clone())),
}
}
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
let mut ands = Vec::new();
for value in arr {
match value {
Value::String(s) => ands.push(Either::Right(s.as_str())),
Value::Array(arr) => {
let mut ors = Vec::new();
for value in arr {
match value {
Value::String(s) => ors.push(s.as_str()),
v => {
return Err(MeilisearchHttpError::InvalidExpression(
&["String"],
v.clone(),
))
}
}
}
ands.push(Either::Left(ors));
}
v => {
return Err(MeilisearchHttpError::InvalidExpression(
&["String", "[String]"],
v.clone(),
))
}
}
}
Ok(Filter::from_array(ands)?)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_insert_geo_distance() {
let value: Document = serde_json::from_str(
r#"{
"_geo": {
"lat": 50.629973371633746,
"lng": 3.0569447399419567
},
"city": "Lille",
"id": "1"
}"#,
)
.unwrap();
let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let sorters =
&["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()];
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
let sorters = &[
"prix:asc",
"villeneuve:desc",
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
"ubu:asc",
]
.map(|s| s.to_string());
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
// only the first geoPoint is used to compute the distance
let sorters = &[
"chien:desc",
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
"pangolin:desc",
"_geoPoint(100.0, -80.0):asc",
"chat:asc",
]
.map(|s| s.to_string());
let mut document = value.clone();
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
// there was no _geoPoint so nothing is inserted in the document
let sorters = &["chien:asc".to_string()];
let mut document = value;
insert_geo_distance(sorters, &mut document);
assert_eq!(document.get("_geoDistance"), None);
}
}