mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Merge branch 'main' into fix-3037-new
This commit is contained in:
commit
169682d3ec
98 changed files with 133 additions and 104 deletions
63
meilisearch/src/analytics/mock_analytics.rs
Normal file
63
meilisearch/src/analytics/mock_analytics.rs
Normal file
|
@ -0,0 +1,63 @@
|
|||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use meilisearch_types::InstanceUid;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::{find_user_id, Analytics, DocumentDeletionKind};
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::tasks::TasksFilterQueryRaw;
|
||||
use crate::Opt;
|
||||
|
||||
pub struct MockAnalytics {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SearchAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl SearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
impl MockAnalytics {
|
||||
#[allow(clippy::new_ret_no_self)]
|
||||
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||
let instance_uid = find_user_id(&opt.db_path);
|
||||
Arc::new(Self { instance_uid })
|
||||
}
|
||||
}
|
||||
|
||||
impl Analytics for MockAnalytics {
|
||||
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
|
||||
self.instance_uid.as_ref()
|
||||
}
|
||||
|
||||
// These methods are noop and should be optimized out
|
||||
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn add_documents(
|
||||
&self,
|
||||
_documents_query: &UpdateDocumentsQuery,
|
||||
_index_creation: bool,
|
||||
_request: &HttpRequest,
|
||||
) {
|
||||
}
|
||||
fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
|
||||
fn update_documents(
|
||||
&self,
|
||||
_documents_query: &UpdateDocumentsQuery,
|
||||
_index_creation: bool,
|
||||
_request: &HttpRequest,
|
||||
) {
|
||||
}
|
||||
fn get_tasks(&self, _query: &TasksFilterQueryRaw, _request: &HttpRequest) {}
|
||||
fn health_seen(&self, _request: &HttpRequest) {}
|
||||
}
|
101
meilisearch/src/analytics/mod.rs
Normal file
101
meilisearch/src/analytics/mod.rs
Normal file
|
@ -0,0 +1,101 @@
|
|||
mod mock_analytics;
|
||||
// if we are in release mode and the feature analytics was enabled
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
mod segment_analytics;
|
||||
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use meilisearch_types::InstanceUid;
|
||||
pub use mock_analytics::MockAnalytics;
|
||||
use once_cell::sync::Lazy;
|
||||
use platform_dirs::AppDirs;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::tasks::TasksFilterQueryRaw;
|
||||
|
||||
// if we are in debug mode OR the analytics feature is disabled
|
||||
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||
|
||||
// if we are in release mode and the feature analytics was enabled
|
||||
// we use the real analytics
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||
|
||||
/// The Meilisearch config dir:
|
||||
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
||||
/// `~/Library/ApplicationSupport` on macOS.
|
||||
/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows.
|
||||
static MEILISEARCH_CONFIG_PATH: Lazy<Option<PathBuf>> =
|
||||
Lazy::new(|| AppDirs::new(Some("Meilisearch"), false).map(|appdir| appdir.config_dir));
|
||||
|
||||
fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
|
||||
db_path
|
||||
.canonicalize()
|
||||
.ok()
|
||||
.map(|path| path.join("instance-uid").display().to_string().replace('/', "-"))
|
||||
.zip(MEILISEARCH_CONFIG_PATH.as_ref())
|
||||
.map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-')))
|
||||
}
|
||||
|
||||
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
|
||||
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
|
||||
fs::read_to_string(db_path.join("instance-uid"))
|
||||
.ok()
|
||||
.or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok())
|
||||
.and_then(|uid| InstanceUid::from_str(&uid).ok())
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum DocumentDeletionKind {
|
||||
PerDocumentId,
|
||||
ClearAll,
|
||||
PerBatch,
|
||||
}
|
||||
|
||||
pub trait Analytics: Sync + Send {
|
||||
fn instance_uid(&self) -> Option<&InstanceUid>;
|
||||
|
||||
/// The method used to publish most analytics that do not need to be batched every hours
|
||||
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
|
||||
|
||||
/// This method should be called to aggregate a get search
|
||||
fn get_search(&self, aggregate: SearchAggregator);
|
||||
|
||||
/// This method should be called to aggregate a post search
|
||||
fn post_search(&self, aggregate: SearchAggregator);
|
||||
|
||||
// this method should be called to aggregate a add documents request
|
||||
fn add_documents(
|
||||
&self,
|
||||
documents_query: &UpdateDocumentsQuery,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
);
|
||||
|
||||
// this method should be called to aggregate a add documents request
|
||||
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
|
||||
|
||||
// this method should be called to batch a update documents request
|
||||
fn update_documents(
|
||||
&self,
|
||||
documents_query: &UpdateDocumentsQuery,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
);
|
||||
|
||||
// this method should be called to aggregate the get tasks requests.
|
||||
fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest);
|
||||
|
||||
// this method should be called to aggregate a add documents request
|
||||
fn health_seen(&self, request: &HttpRequest);
|
||||
}
|
991
meilisearch/src/analytics/segment_analytics.rs
Normal file
991
meilisearch/src/analytics/segment_analytics.rs
Normal file
|
@ -0,0 +1,991 @@
|
|||
use std::collections::{BinaryHeap, HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use actix_web::http::header::USER_AGENT;
|
||||
use actix_web::HttpRequest;
|
||||
use byte_unit::Byte;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_auth::SearchRules;
|
||||
use meilisearch_types::InstanceUid;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use segment::message::{Identify, Track, User};
|
||||
use segment::{AutoBatcher, Batcher, HttpClient};
|
||||
use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
use sysinfo::{DiskExt, System, SystemExt};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::select;
|
||||
use tokio::sync::mpsc::{self, Receiver, Sender};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, SchedulerConfig};
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::tasks::TasksFilterQueryRaw;
|
||||
use crate::routes::{create_all_stats, Stats};
|
||||
use crate::search::{
|
||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
};
|
||||
use crate::Opt;
|
||||
|
||||
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
|
||||
|
||||
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
|
||||
fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
|
||||
let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes());
|
||||
if let Some((meilisearch_config_path, user_id_path)) =
|
||||
MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path))
|
||||
{
|
||||
let _ = fs::create_dir_all(&meilisearch_config_path);
|
||||
let _ = fs::write(user_id_path, user_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb";
|
||||
|
||||
pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
|
||||
request
|
||||
.headers()
|
||||
.get(ANALYTICS_HEADER)
|
||||
.or_else(|| request.headers().get(USER_AGENT))
|
||||
.map(|header| header.to_str().ok())
|
||||
.flatten()
|
||||
.unwrap_or("unknown")
|
||||
.split(';')
|
||||
.map(str::trim)
|
||||
.map(ToString::to_string)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub enum AnalyticsMsg {
|
||||
BatchMessage(Track),
|
||||
AggregateGetSearch(SearchAggregator),
|
||||
AggregatePostSearch(SearchAggregator),
|
||||
AggregateAddDocuments(DocumentsAggregator),
|
||||
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
||||
AggregateUpdateDocuments(DocumentsAggregator),
|
||||
AggregateTasks(TasksAggregator),
|
||||
AggregateHealth(HealthAggregator),
|
||||
}
|
||||
|
||||
pub struct SegmentAnalytics {
|
||||
instance_uid: InstanceUid,
|
||||
sender: Sender<AnalyticsMsg>,
|
||||
user: User,
|
||||
}
|
||||
|
||||
impl SegmentAnalytics {
|
||||
pub async fn new(opt: &Opt, index_scheduler: Arc<IndexScheduler>) -> Arc<dyn Analytics> {
|
||||
let instance_uid = super::find_user_id(&opt.db_path);
|
||||
let first_time_run = instance_uid.is_none();
|
||||
let instance_uid = instance_uid.unwrap_or_else(|| Uuid::new_v4());
|
||||
write_user_id(&opt.db_path, &instance_uid);
|
||||
|
||||
let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build();
|
||||
|
||||
// if reqwest throws an error we won't be able to send analytics
|
||||
if client.is_err() {
|
||||
return super::MockAnalytics::new(opt);
|
||||
}
|
||||
|
||||
let client =
|
||||
HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string());
|
||||
let user = User::UserId { user_id: instance_uid.to_string() };
|
||||
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
|
||||
|
||||
// If Meilisearch is Launched for the first time:
|
||||
// 1. Send an event Launched associated to the user `total_launch`.
|
||||
// 2. Batch an event Launched with the real instance-id and send it in one hour.
|
||||
if first_time_run {
|
||||
let _ = batcher
|
||||
.push(Track {
|
||||
user: User::UserId { user_id: "total_launch".to_string() },
|
||||
event: "Launched".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
let _ = batcher.flush().await;
|
||||
let _ = batcher
|
||||
.push(Track {
|
||||
user: user.clone(),
|
||||
event: "Launched".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize
|
||||
|
||||
let segment = Box::new(Segment {
|
||||
inbox,
|
||||
user: user.clone(),
|
||||
opt: opt.clone(),
|
||||
batcher,
|
||||
post_search_aggregator: SearchAggregator::default(),
|
||||
get_search_aggregator: SearchAggregator::default(),
|
||||
add_documents_aggregator: DocumentsAggregator::default(),
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
||||
update_documents_aggregator: DocumentsAggregator::default(),
|
||||
get_tasks_aggregator: TasksAggregator::default(),
|
||||
health_aggregator: HealthAggregator::default(),
|
||||
});
|
||||
tokio::spawn(segment.run(index_scheduler.clone()));
|
||||
|
||||
let this = Self { instance_uid, sender, user: user.clone() };
|
||||
|
||||
Arc::new(this)
|
||||
}
|
||||
}
|
||||
|
||||
impl super::Analytics for SegmentAnalytics {
|
||||
fn instance_uid(&self) -> Option<&InstanceUid> {
|
||||
Some(&self.instance_uid)
|
||||
}
|
||||
|
||||
fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) {
|
||||
let user_agent = request.map(|req| extract_user_agents(req));
|
||||
|
||||
send["user-agent"] = json!(user_agent);
|
||||
let event = Track {
|
||||
user: self.user.clone(),
|
||||
event: event_name.clone(),
|
||||
properties: send,
|
||||
..Default::default()
|
||||
};
|
||||
let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event.into()));
|
||||
}
|
||||
|
||||
fn get_search(&self, aggregate: SearchAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate));
|
||||
}
|
||||
|
||||
fn post_search(&self, aggregate: SearchAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
||||
}
|
||||
|
||||
fn add_documents(
|
||||
&self,
|
||||
documents_query: &UpdateDocumentsQuery,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
) {
|
||||
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate));
|
||||
}
|
||||
|
||||
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest) {
|
||||
let aggregate = DocumentsDeletionAggregator::from_query(kind, request);
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateDeleteDocuments(aggregate));
|
||||
}
|
||||
|
||||
fn update_documents(
|
||||
&self,
|
||||
documents_query: &UpdateDocumentsQuery,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
) {
|
||||
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
||||
}
|
||||
|
||||
fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest) {
|
||||
let aggregate = TasksAggregator::from_query(query, request);
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
|
||||
}
|
||||
|
||||
fn health_seen(&self, request: &HttpRequest) {
|
||||
let aggregate = HealthAggregator::from_query(request);
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateHealth(aggregate));
|
||||
}
|
||||
}
|
||||
|
||||
/// This structure represent the `infos` field we send in the analytics.
|
||||
/// It's quite close to the `Opt` structure except all sensitive informations
|
||||
/// have been simplified to a boolean.
|
||||
/// It's send as-is in amplitude thus you should never update a name of the
|
||||
/// struct without the approval of the PM.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
struct Infos {
|
||||
env: String,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
dump_dir: bool,
|
||||
ignore_missing_dump: bool,
|
||||
ignore_dump_if_db_exists: bool,
|
||||
import_snapshot: bool,
|
||||
schedule_snapshot: bool,
|
||||
snapshot_dir: bool,
|
||||
snapshot_interval_sec: u64,
|
||||
ignore_missing_snapshot: bool,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
http_addr: bool,
|
||||
max_index_size: Byte,
|
||||
max_task_db_size: Byte,
|
||||
http_payload_size_limit: Byte,
|
||||
disable_auto_batching: bool,
|
||||
log_level: String,
|
||||
max_indexing_memory: MaxMemory,
|
||||
max_indexing_threads: MaxThreads,
|
||||
with_configuration_file: bool,
|
||||
ssl_auth_path: bool,
|
||||
ssl_cert_path: bool,
|
||||
ssl_key_path: bool,
|
||||
ssl_ocsp_path: bool,
|
||||
ssl_require_auth: bool,
|
||||
ssl_resumption: bool,
|
||||
ssl_tickets: bool,
|
||||
}
|
||||
|
||||
impl From<Opt> for Infos {
|
||||
fn from(options: Opt) -> Self {
|
||||
// We wants to decompose this whole struct by hand to be sure we don't forget
|
||||
// to add analytics when we add a field in the Opt.
|
||||
// Thus we must not insert `..` at the end.
|
||||
let Opt {
|
||||
db_path,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
max_index_size,
|
||||
max_task_db_size,
|
||||
http_payload_size_limit,
|
||||
ssl_cert_path,
|
||||
ssl_key_path,
|
||||
ssl_auth_path,
|
||||
ssl_ocsp_path,
|
||||
ssl_require_auth,
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
import_snapshot,
|
||||
ignore_missing_snapshot,
|
||||
ignore_snapshot_if_db_exists,
|
||||
snapshot_dir,
|
||||
schedule_snapshot,
|
||||
snapshot_interval_sec,
|
||||
import_dump,
|
||||
ignore_missing_dump,
|
||||
ignore_dump_if_db_exists,
|
||||
dump_dir,
|
||||
log_level,
|
||||
indexer_options,
|
||||
scheduler_options,
|
||||
config_file_path,
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics: _,
|
||||
} = options;
|
||||
|
||||
let SchedulerConfig { disable_auto_batching } = scheduler_options;
|
||||
let IndexerOpts {
|
||||
log_every_n: _,
|
||||
max_nb_chunks: _,
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
} = indexer_options;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
// We consider information sensible if it contains a path, an address, or a key.
|
||||
Self {
|
||||
env,
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
||||
ignore_missing_dump,
|
||||
ignore_dump_if_db_exists,
|
||||
import_snapshot: import_snapshot.is_some(),
|
||||
schedule_snapshot,
|
||||
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
||||
snapshot_interval_sec,
|
||||
ignore_missing_snapshot,
|
||||
ignore_snapshot_if_db_exists,
|
||||
http_addr: http_addr != default_http_addr(),
|
||||
max_index_size,
|
||||
max_task_db_size,
|
||||
http_payload_size_limit,
|
||||
disable_auto_batching,
|
||||
log_level,
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
with_configuration_file: config_file_path.is_some(),
|
||||
ssl_auth_path: ssl_auth_path.is_some(),
|
||||
ssl_cert_path: ssl_cert_path.is_some(),
|
||||
ssl_key_path: ssl_key_path.is_some(),
|
||||
ssl_ocsp_path: ssl_ocsp_path.is_some(),
|
||||
ssl_require_auth,
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Segment {
|
||||
inbox: Receiver<AnalyticsMsg>,
|
||||
user: User,
|
||||
opt: Opt,
|
||||
batcher: AutoBatcher,
|
||||
get_search_aggregator: SearchAggregator,
|
||||
post_search_aggregator: SearchAggregator,
|
||||
add_documents_aggregator: DocumentsAggregator,
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator,
|
||||
update_documents_aggregator: DocumentsAggregator,
|
||||
get_tasks_aggregator: TasksAggregator,
|
||||
health_aggregator: HealthAggregator,
|
||||
}
|
||||
|
||||
impl Segment {
|
||||
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
|
||||
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
|
||||
static SYSTEM: Lazy<Value> = Lazy::new(|| {
|
||||
let mut sys = System::new_all();
|
||||
sys.refresh_all();
|
||||
let kernel_version = sys
|
||||
.kernel_version()
|
||||
.map(|k| k.split_once("-").map(|(k, _)| k.to_string()))
|
||||
.flatten();
|
||||
json!({
|
||||
"distribution": sys.name(),
|
||||
"kernel_version": kernel_version,
|
||||
"cores": sys.cpus().len(),
|
||||
"ram_size": sys.total_memory(),
|
||||
"disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
|
||||
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
|
||||
})
|
||||
});
|
||||
let number_of_documents =
|
||||
stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>();
|
||||
|
||||
json!({
|
||||
"start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day
|
||||
"system": *SYSTEM,
|
||||
"stats": {
|
||||
"database_size": stats.database_size,
|
||||
"indexes_number": stats.indexes.len(),
|
||||
"documents_number": number_of_documents,
|
||||
},
|
||||
"infos": Infos::from(opt.clone()),
|
||||
})
|
||||
}
|
||||
|
||||
async fn run(mut self, index_scheduler: Arc<IndexScheduler>) {
|
||||
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
||||
// The first batch must be sent after one hour.
|
||||
let mut interval =
|
||||
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
|
||||
|
||||
loop {
|
||||
select! {
|
||||
_ = interval.tick() => {
|
||||
self.tick(index_scheduler.clone()).await;
|
||||
},
|
||||
msg = self.inbox.recv() => {
|
||||
match msg {
|
||||
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
|
||||
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
|
||||
None => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn tick(&mut self, index_scheduler: Arc<IndexScheduler>) {
|
||||
if let Ok(stats) = create_all_stats(index_scheduler.into(), &SearchRules::default()) {
|
||||
let _ = self
|
||||
.batcher
|
||||
.push(Identify {
|
||||
context: Some(json!({
|
||||
"app": {
|
||||
"version": env!("CARGO_PKG_VERSION").to_string(),
|
||||
},
|
||||
})),
|
||||
user: self.user.clone(),
|
||||
traits: Self::compute_traits(&self.opt, stats),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
}
|
||||
let get_search = std::mem::take(&mut self.get_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched GET");
|
||||
let post_search = std::mem::take(&mut self.post_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched POST");
|
||||
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Added");
|
||||
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Deleted");
|
||||
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Updated");
|
||||
let get_tasks =
|
||||
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
|
||||
let health =
|
||||
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
|
||||
|
||||
if let Some(get_search) = get_search {
|
||||
let _ = self.batcher.push(get_search).await;
|
||||
}
|
||||
if let Some(post_search) = post_search {
|
||||
let _ = self.batcher.push(post_search).await;
|
||||
}
|
||||
if let Some(add_documents) = add_documents {
|
||||
let _ = self.batcher.push(add_documents).await;
|
||||
}
|
||||
if let Some(delete_documents) = delete_documents {
|
||||
let _ = self.batcher.push(delete_documents).await;
|
||||
}
|
||||
if let Some(update_documents) = update_documents {
|
||||
let _ = self.batcher.push(update_documents).await;
|
||||
}
|
||||
if let Some(get_tasks) = get_tasks {
|
||||
let _ = self.batcher.push(get_tasks).await;
|
||||
}
|
||||
if let Some(health) = health {
|
||||
let _ = self.batcher.push(health).await;
|
||||
}
|
||||
let _ = self.batcher.flush().await;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SearchAggregator {
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
// requests
|
||||
total_received: usize,
|
||||
total_succeeded: usize,
|
||||
time_spent: BinaryHeap<usize>,
|
||||
|
||||
// sort
|
||||
sort_with_geo_point: bool,
|
||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||
sort_sum_of_criteria_terms: usize,
|
||||
// every time a request has a filter, this field must be incremented by one
|
||||
sort_total_number_of_criteria: usize,
|
||||
|
||||
// filter
|
||||
filter_with_geo_radius: bool,
|
||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||
filter_sum_of_criteria_terms: usize,
|
||||
// every time a request has a filter, this field must be incremented by one
|
||||
filter_total_number_of_criteria: usize,
|
||||
used_syntax: HashMap<String, usize>,
|
||||
|
||||
// q
|
||||
// The maximum number of terms in a q request
|
||||
max_terms_number: usize,
|
||||
|
||||
// every time a search is done, we increment the counter linked to the used settings
|
||||
matching_strategy: HashMap<String, usize>,
|
||||
|
||||
// pagination
|
||||
max_limit: usize,
|
||||
max_offset: usize,
|
||||
finite_pagination: usize,
|
||||
|
||||
// formatting
|
||||
max_attributes_to_retrieve: usize,
|
||||
max_attributes_to_highlight: usize,
|
||||
highlight_pre_tag: bool,
|
||||
highlight_post_tag: bool,
|
||||
max_attributes_to_crop: usize,
|
||||
crop_marker: bool,
|
||||
show_matches_position: bool,
|
||||
crop_length: bool,
|
||||
|
||||
// facets
|
||||
facets_sum_of_terms: usize,
|
||||
facets_total_number_of_facets: usize,
|
||||
}
|
||||
|
||||
impl SearchAggregator {
|
||||
pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
|
||||
let mut ret = Self::default();
|
||||
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||
|
||||
ret.total_received = 1;
|
||||
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||
|
||||
if let Some(ref sort) = query.sort {
|
||||
ret.sort_total_number_of_criteria = 1;
|
||||
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
|
||||
ret.sort_sum_of_criteria_terms = sort.len();
|
||||
}
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
||||
ret.filter_total_number_of_criteria = 1;
|
||||
|
||||
let syntax = match filter {
|
||||
Value::String(_) => "string".to_string(),
|
||||
Value::Array(values) => {
|
||||
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
||||
"mixed".to_string()
|
||||
} else {
|
||||
"array".to_string()
|
||||
}
|
||||
}
|
||||
_ => "none".to_string(),
|
||||
};
|
||||
// convert the string to a HashMap
|
||||
ret.used_syntax.insert(syntax, 1);
|
||||
|
||||
let stringified_filters = filter.to_string();
|
||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
||||
}
|
||||
|
||||
if let Some(ref q) = query.q {
|
||||
ret.max_terms_number = q.split_whitespace().count();
|
||||
}
|
||||
|
||||
if query.is_finite_pagination() {
|
||||
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||
ret.max_limit = limit;
|
||||
ret.max_offset = query.page.unwrap_or(1).saturating_sub(1) * limit;
|
||||
ret.finite_pagination = 1;
|
||||
} else {
|
||||
ret.max_limit = query.limit;
|
||||
ret.max_offset = query.offset;
|
||||
ret.finite_pagination = 0;
|
||||
}
|
||||
|
||||
ret.matching_strategy.insert(format!("{:?}", query.matching_strategy), 1);
|
||||
|
||||
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER();
|
||||
ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH();
|
||||
ret.show_matches_position = query.show_matches_position;
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, result: &SearchResult) {
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
self.time_spent.push(result.processing_time_ms as usize);
|
||||
}
|
||||
|
||||
/// Aggregate one [SearchAggregator] into another.
|
||||
pub fn aggregate(&mut self, mut other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
// context
|
||||
for user_agent in other.user_agents.into_iter() {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
|
||||
// request
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
|
||||
self.time_spent.append(&mut other.time_spent);
|
||||
|
||||
// sort
|
||||
self.sort_with_geo_point |= other.sort_with_geo_point;
|
||||
self.sort_sum_of_criteria_terms =
|
||||
self.sort_sum_of_criteria_terms.saturating_add(other.sort_sum_of_criteria_terms);
|
||||
self.sort_total_number_of_criteria =
|
||||
self.sort_total_number_of_criteria.saturating_add(other.sort_total_number_of_criteria);
|
||||
|
||||
// filter
|
||||
self.filter_with_geo_radius |= other.filter_with_geo_radius;
|
||||
self.filter_sum_of_criteria_terms =
|
||||
self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms);
|
||||
self.filter_total_number_of_criteria = self
|
||||
.filter_total_number_of_criteria
|
||||
.saturating_add(other.filter_total_number_of_criteria);
|
||||
for (key, value) in other.used_syntax.into_iter() {
|
||||
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
||||
*used_syntax = used_syntax.saturating_add(value);
|
||||
}
|
||||
// q
|
||||
self.max_terms_number = self.max_terms_number.max(other.max_terms_number);
|
||||
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(other.max_limit);
|
||||
self.max_offset = self.max_offset.max(other.max_offset);
|
||||
self.finite_pagination += other.finite_pagination;
|
||||
|
||||
// formatting
|
||||
self.max_attributes_to_retrieve =
|
||||
self.max_attributes_to_retrieve.max(other.max_attributes_to_retrieve);
|
||||
self.max_attributes_to_highlight =
|
||||
self.max_attributes_to_highlight.max(other.max_attributes_to_highlight);
|
||||
self.highlight_pre_tag |= other.highlight_pre_tag;
|
||||
self.highlight_post_tag |= other.highlight_post_tag;
|
||||
self.max_attributes_to_crop = self.max_attributes_to_crop.max(other.max_attributes_to_crop);
|
||||
self.crop_marker |= other.crop_marker;
|
||||
self.show_matches_position |= other.show_matches_position;
|
||||
self.crop_length |= other.crop_length;
|
||||
|
||||
// facets
|
||||
self.facets_sum_of_terms =
|
||||
self.facets_sum_of_terms.saturating_add(other.facets_sum_of_terms);
|
||||
self.facets_total_number_of_facets =
|
||||
self.facets_total_number_of_facets.saturating_add(other.facets_total_number_of_facets);
|
||||
|
||||
// matching strategy
|
||||
for (key, value) in other.matching_strategy.into_iter() {
|
||||
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
|
||||
*matching_strategy = matching_strategy.saturating_add(value);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
// the index of the 99th percentage of value
|
||||
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
|
||||
// we get all the values in a sorted manner
|
||||
let time_spent = self.time_spent.into_sorted_vec();
|
||||
// We are only interested by the slowest value of the 99th fastest results
|
||||
let time_spent = time_spent.get(percentile_99th as usize);
|
||||
|
||||
let properties = json!({
|
||||
"user-agent": self.user_agents,
|
||||
"requests": {
|
||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||
"total_succeeded": self.total_succeeded,
|
||||
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
|
||||
"total_received": self.total_received,
|
||||
},
|
||||
"sort": {
|
||||
"with_geoPoint": self.sort_with_geo_point,
|
||||
"avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64),
|
||||
},
|
||||
"filter": {
|
||||
"with_geoRadius": self.filter_with_geo_radius,
|
||||
"avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64),
|
||||
"most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||
},
|
||||
"q": {
|
||||
"max_terms_number": self.max_terms_number,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": self.max_limit,
|
||||
"max_offset": self.max_offset,
|
||||
"most_used_navigation": if self.finite_pagination > (self.total_received / 2) { "exhaustive" } else { "estimated" },
|
||||
},
|
||||
"formatting": {
|
||||
"max_attributes_to_retrieve": self.max_attributes_to_retrieve,
|
||||
"max_attributes_to_highlight": self.max_attributes_to_highlight,
|
||||
"highlight_pre_tag": self.highlight_pre_tag,
|
||||
"highlight_post_tag": self.highlight_post_tag,
|
||||
"max_attributes_to_crop": self.max_attributes_to_crop,
|
||||
"crop_marker": self.crop_marker,
|
||||
"show_matches_position": self.show_matches_position,
|
||||
"crop_length": self.crop_length,
|
||||
},
|
||||
"facets": {
|
||||
"avg_facets_number": format!("{:.2}", self.facets_sum_of_terms as f64 / self.facets_total_number_of_facets as f64),
|
||||
},
|
||||
"matching_strategy": {
|
||||
"most_used_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||
}
|
||||
});
|
||||
|
||||
Some(Track {
|
||||
timestamp: self.timestamp,
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DocumentsAggregator {
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// set to true when at least one request was received
|
||||
updated: bool,
|
||||
|
||||
// context
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
content_types: HashSet<String>,
|
||||
primary_keys: HashSet<String>,
|
||||
index_creation: bool,
|
||||
}
|
||||
|
||||
impl DocumentsAggregator {
|
||||
pub fn from_query(
|
||||
documents_query: &UpdateDocumentsQuery,
|
||||
index_creation: bool,
|
||||
request: &HttpRequest,
|
||||
) -> Self {
|
||||
let mut ret = Self::default();
|
||||
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||
|
||||
ret.updated = true;
|
||||
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||
if let Some(primary_key) = documents_query.primary_key.clone() {
|
||||
ret.primary_keys.insert(primary_key);
|
||||
}
|
||||
let content_type = request
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|s| s.to_str().ok())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
ret.content_types.insert(content_type);
|
||||
ret.index_creation = index_creation;
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
/// Aggregate one [DocumentsAggregator] into another.
|
||||
pub fn aggregate(&mut self, other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
self.updated |= other.updated;
|
||||
// we can't create a union because there is no `into_union` method
|
||||
for user_agent in other.user_agents {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
for primary_key in other.primary_keys {
|
||||
self.primary_keys.insert(primary_key);
|
||||
}
|
||||
for content_type in other.content_types {
|
||||
self.content_types.insert(content_type);
|
||||
}
|
||||
self.index_creation |= other.index_creation;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
if !self.updated {
|
||||
None
|
||||
} else {
|
||||
let properties = json!({
|
||||
"user-agent": self.user_agents,
|
||||
"payload_type": self.content_types,
|
||||
"primary_key": self.primary_keys,
|
||||
"index_creation": self.index_creation,
|
||||
});
|
||||
|
||||
Some(Track {
|
||||
timestamp: self.timestamp,
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct DocumentsDeletionAggregator {
|
||||
#[serde(skip)]
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
#[serde(rename = "user-agent")]
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
total_received: usize,
|
||||
per_document_id: bool,
|
||||
clear_all: bool,
|
||||
per_batch: bool,
|
||||
}
|
||||
|
||||
impl DocumentsDeletionAggregator {
|
||||
pub fn from_query(kind: DocumentDeletionKind, request: &HttpRequest) -> Self {
|
||||
let mut ret = Self::default();
|
||||
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||
|
||||
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||
ret.total_received = 1;
|
||||
match kind {
|
||||
DocumentDeletionKind::PerDocumentId => ret.per_document_id = true,
|
||||
DocumentDeletionKind::ClearAll => ret.clear_all = true,
|
||||
DocumentDeletionKind::PerBatch => ret.per_batch = true,
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
/// Aggregate one [DocumentsAggregator] into another.
|
||||
pub fn aggregate(&mut self, other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
// we can't create a union because there is no `into_union` method
|
||||
for user_agent in other.user_agents {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
self.per_document_id |= other.per_document_id;
|
||||
self.clear_all |= other.clear_all;
|
||||
self.per_batch |= other.per_batch;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
// if we had no timestamp it means we never encountered any events and
|
||||
// thus we don't need to send this event.
|
||||
let timestamp = self.timestamp?;
|
||||
|
||||
Some(Track {
|
||||
timestamp: Some(timestamp),
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties: serde_json::to_value(self).ok()?,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct TasksAggregator {
|
||||
#[serde(skip)]
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
#[serde(rename = "user-agent")]
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
filtered_by_uid: bool,
|
||||
filtered_by_index_uid: bool,
|
||||
filtered_by_type: bool,
|
||||
filtered_by_status: bool,
|
||||
filtered_by_canceled_by: bool,
|
||||
filtered_by_before_enqueued_at: bool,
|
||||
filtered_by_after_enqueued_at: bool,
|
||||
filtered_by_before_started_at: bool,
|
||||
filtered_by_after_started_at: bool,
|
||||
filtered_by_before_finished_at: bool,
|
||||
filtered_by_after_finished_at: bool,
|
||||
total_received: usize,
|
||||
}
|
||||
|
||||
impl TasksAggregator {
|
||||
pub fn from_query(query: &TasksFilterQueryRaw, request: &HttpRequest) -> Self {
|
||||
Self {
|
||||
timestamp: Some(OffsetDateTime::now_utc()),
|
||||
user_agents: extract_user_agents(request).into_iter().collect(),
|
||||
filtered_by_uid: query.common.uids.is_some(),
|
||||
filtered_by_index_uid: query.common.index_uids.is_some(),
|
||||
filtered_by_type: query.common.types.is_some(),
|
||||
filtered_by_status: query.common.statuses.is_some(),
|
||||
filtered_by_canceled_by: query.common.canceled_by.is_some(),
|
||||
filtered_by_before_enqueued_at: query.dates.before_enqueued_at.is_some(),
|
||||
filtered_by_after_enqueued_at: query.dates.after_enqueued_at.is_some(),
|
||||
filtered_by_before_started_at: query.dates.before_started_at.is_some(),
|
||||
filtered_by_after_started_at: query.dates.after_started_at.is_some(),
|
||||
filtered_by_before_finished_at: query.dates.before_finished_at.is_some(),
|
||||
filtered_by_after_finished_at: query.dates.after_finished_at.is_some(),
|
||||
total_received: 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregate one [DocumentsAggregator] into another.
|
||||
pub fn aggregate(&mut self, other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
// we can't create a union because there is no `into_union` method
|
||||
for user_agent in other.user_agents {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
|
||||
self.filtered_by_uid |= other.filtered_by_uid;
|
||||
self.filtered_by_index_uid |= other.filtered_by_index_uid;
|
||||
self.filtered_by_type |= other.filtered_by_type;
|
||||
self.filtered_by_status |= other.filtered_by_status;
|
||||
self.filtered_by_canceled_by |= other.filtered_by_canceled_by;
|
||||
self.filtered_by_before_enqueued_at |= other.filtered_by_before_enqueued_at;
|
||||
self.filtered_by_after_enqueued_at |= other.filtered_by_after_enqueued_at;
|
||||
self.filtered_by_before_started_at |= other.filtered_by_before_started_at;
|
||||
self.filtered_by_after_started_at |= other.filtered_by_after_started_at;
|
||||
self.filtered_by_before_finished_at |= other.filtered_by_before_finished_at;
|
||||
self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at;
|
||||
self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at;
|
||||
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
// if we had no timestamp it means we never encountered any events and
|
||||
// thus we don't need to send this event.
|
||||
let timestamp = self.timestamp?;
|
||||
|
||||
Some(Track {
|
||||
timestamp: Some(timestamp),
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties: serde_json::to_value(self).ok()?,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct HealthAggregator {
|
||||
#[serde(skip)]
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
#[serde(rename = "user-agent")]
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
total_received: usize,
|
||||
}
|
||||
|
||||
impl HealthAggregator {
|
||||
pub fn from_query(request: &HttpRequest) -> Self {
|
||||
let mut ret = Self::default();
|
||||
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||
|
||||
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||
ret.total_received = 1;
|
||||
ret
|
||||
}
|
||||
|
||||
/// Aggregate one [DocumentsAggregator] into another.
|
||||
pub fn aggregate(&mut self, other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
// we can't create a union because there is no `into_union` method
|
||||
for user_agent in other.user_agents {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
// if we had no timestamp it means we never encountered any events and
|
||||
// thus we don't need to send this event.
|
||||
let timestamp = self.timestamp?;
|
||||
|
||||
Some(Track {
|
||||
timestamp: Some(timestamp),
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties: serde_json::to_value(self).ok()?,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
166
meilisearch/src/error.rs
Normal file
166
meilisearch/src/error.rs
Normal file
|
@ -0,0 +1,166 @@
|
|||
use actix_web as aweb;
|
||||
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
||||
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUidFormatError;
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
|
||||
MissingContentType(Vec<String>),
|
||||
#[error(
|
||||
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
|
||||
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
|
||||
)]
|
||||
InvalidContentType(String, Vec<String>),
|
||||
#[error("Document `{0}` not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
#[error("A {0} payload is missing.")]
|
||||
MissingPayload(PayloadType),
|
||||
#[error("The provided payload reached the size limit.")]
|
||||
PayloadTooLarge,
|
||||
#[error("Two indexes must be given for each swap. The list `{:?}` contains {} indexes.",
|
||||
.0, .0.len()
|
||||
)]
|
||||
SwapIndexPayloadWrongLength(Vec<String>),
|
||||
#[error(transparent)]
|
||||
IndexUid(#[from] IndexUidFormatError),
|
||||
#[error(transparent)]
|
||||
SerdeJson(#[from] serde_json::Error),
|
||||
#[error(transparent)]
|
||||
HeedError(#[from] meilisearch_types::heed::Error),
|
||||
#[error(transparent)]
|
||||
IndexScheduler(#[from] index_scheduler::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] meilisearch_types::milli::Error),
|
||||
#[error(transparent)]
|
||||
Payload(#[from] PayloadError),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
|
||||
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
|
||||
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
|
||||
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
MeilisearchHttpError::InvalidExpression(_, _) => Code::Filter,
|
||||
MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge,
|
||||
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::BadRequest,
|
||||
MeilisearchHttpError::IndexUid(e) => e.error_code(),
|
||||
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
|
||||
MeilisearchHttpError::Milli(e) => e.error_code(),
|
||||
MeilisearchHttpError::Payload(e) => e.error_code(),
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MeilisearchHttpError> for aweb::Error {
|
||||
fn from(other: MeilisearchHttpError) -> Self {
|
||||
aweb::Error::from(ResponseError::from(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<aweb::error::PayloadError> for MeilisearchHttpError {
|
||||
fn from(error: aweb::error::PayloadError) -> Self {
|
||||
MeilisearchHttpError::Payload(PayloadError::Payload(error))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum PayloadError {
|
||||
#[error(transparent)]
|
||||
Payload(aweb::error::PayloadError),
|
||||
#[error(transparent)]
|
||||
Json(JsonPayloadError),
|
||||
#[error(transparent)]
|
||||
Query(QueryPayloadError),
|
||||
#[error("The json payload provided is malformed. `{0}`.")]
|
||||
MalformedPayload(serde_json::error::Error),
|
||||
#[error("A json payload is missing.")]
|
||||
MissingPayload,
|
||||
#[error("Error while writing the playload to disk: `{0}`.")]
|
||||
ReceivePayloadErr(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
}
|
||||
|
||||
impl ErrorCode for PayloadError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
PayloadError::Payload(e) => match e {
|
||||
aweb::error::PayloadError::Incomplete(_) => Code::Internal,
|
||||
aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
|
||||
aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
|
||||
aweb::error::PayloadError::UnknownLength => Code::Internal,
|
||||
aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
|
||||
aweb::error::PayloadError::Io(_) => Code::Internal,
|
||||
_ => todo!(),
|
||||
},
|
||||
PayloadError::Json(err) => match err {
|
||||
JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
|
||||
JsonPayloadError::ContentType => Code::UnsupportedMediaType,
|
||||
JsonPayloadError::Payload(aweb::error::PayloadError::Overflow) => {
|
||||
Code::PayloadTooLarge
|
||||
}
|
||||
JsonPayloadError::Payload(_) => Code::BadRequest,
|
||||
JsonPayloadError::Deserialize(_) => Code::BadRequest,
|
||||
JsonPayloadError::Serialize(_) => Code::Internal,
|
||||
_ => Code::Internal,
|
||||
},
|
||||
PayloadError::Query(err) => match err {
|
||||
QueryPayloadError::Deserialize(_) => Code::BadRequest,
|
||||
_ => Code::Internal,
|
||||
},
|
||||
PayloadError::MissingPayload => Code::MissingPayload,
|
||||
PayloadError::MalformedPayload(_) => Code::MalformedPayload,
|
||||
PayloadError::ReceivePayloadErr(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JsonPayloadError> for PayloadError {
|
||||
fn from(other: JsonPayloadError) -> Self {
|
||||
match other {
|
||||
JsonPayloadError::Deserialize(e)
|
||||
if e.classify() == serde_json::error::Category::Eof
|
||||
&& e.line() == 1
|
||||
&& e.column() == 0 =>
|
||||
{
|
||||
Self::MissingPayload
|
||||
}
|
||||
JsonPayloadError::Deserialize(e)
|
||||
if e.classify() != serde_json::error::Category::Data =>
|
||||
{
|
||||
Self::MalformedPayload(e)
|
||||
}
|
||||
_ => Self::Json(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<QueryPayloadError> for PayloadError {
|
||||
fn from(other: QueryPayloadError) -> Self {
|
||||
Self::Query(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PayloadError> for aweb::Error {
|
||||
fn from(other: PayloadError) -> Self {
|
||||
aweb::Error::from(ResponseError::from(other))
|
||||
}
|
||||
}
|
25
meilisearch/src/extractors/authentication/error.rs
Normal file
25
meilisearch/src/extractors/authentication/error.rs
Normal file
|
@ -0,0 +1,25 @@
|
|||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum AuthenticationError {
|
||||
#[error("The Authorization header is missing. It must use the bearer authorization method.")]
|
||||
MissingAuthorizationHeader,
|
||||
#[error("The provided API key is invalid.")]
|
||||
InvalidToken,
|
||||
// Triggered on configuration error.
|
||||
#[error("An internal error has occurred. `Irretrievable state`.")]
|
||||
IrretrievableState,
|
||||
#[error("Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.")]
|
||||
MissingMasterKey,
|
||||
}
|
||||
|
||||
impl ErrorCode for AuthenticationError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
AuthenticationError::MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
|
||||
AuthenticationError::InvalidToken => Code::InvalidToken,
|
||||
AuthenticationError::IrretrievableState => Code::Internal,
|
||||
AuthenticationError::MissingMasterKey => Code::MissingMasterKey,
|
||||
}
|
||||
}
|
||||
}
|
247
meilisearch/src/extractors/authentication/mod.rs
Normal file
247
meilisearch/src/extractors/authentication/mod.rs
Normal file
|
@ -0,0 +1,247 @@
|
|||
mod error;
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::pin::Pin;
|
||||
|
||||
use actix_web::FromRequest;
|
||||
pub use error::AuthenticationError;
|
||||
use futures::future::err;
|
||||
use futures::Future;
|
||||
use meilisearch_auth::{AuthController, AuthFilter};
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
|
||||
pub struct GuardedData<P, D> {
|
||||
data: D,
|
||||
filters: AuthFilter,
|
||||
_marker: PhantomData<P>,
|
||||
}
|
||||
|
||||
impl<P, D> GuardedData<P, D> {
|
||||
pub fn filters(&self) -> &AuthFilter {
|
||||
&self.filters
|
||||
}
|
||||
|
||||
async fn auth_bearer(
|
||||
auth: AuthController,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
data: Option<D>,
|
||||
) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
let missing_master_key = auth.get_master_key().is_none();
|
||||
|
||||
match Self::authenticate(auth, token, index).await? {
|
||||
Some(filters) => match data {
|
||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||
},
|
||||
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
None => Err(AuthenticationError::InvalidToken.into()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn auth_token(auth: AuthController, data: Option<D>) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
let missing_master_key = auth.get_master_key().is_none();
|
||||
|
||||
match Self::authenticate(auth, String::new(), None).await? {
|
||||
Some(filters) => match data {
|
||||
Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
|
||||
None => Err(AuthenticationError::IrretrievableState.into()),
|
||||
},
|
||||
None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
|
||||
None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn authenticate(
|
||||
auth: AuthController,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
) -> Result<Option<AuthFilter>, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref()))
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))
|
||||
}
|
||||
}
|
||||
|
||||
impl<P, D> Deref for GuardedData<P, D> {
|
||||
type Target = D;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D> {
|
||||
type Error = ResponseError;
|
||||
|
||||
type Future = Pin<Box<dyn Future<Output = Result<Self, Self::Error>>>>;
|
||||
|
||||
fn from_request(
|
||||
req: &actix_web::HttpRequest,
|
||||
_payload: &mut actix_web::dev::Payload,
|
||||
) -> Self::Future {
|
||||
match req.app_data::<AuthController>().cloned() {
|
||||
Some(auth) => match req
|
||||
.headers()
|
||||
.get("Authorization")
|
||||
.map(|type_token| type_token.to_str().unwrap_or_default().splitn(2, ' '))
|
||||
{
|
||||
Some(mut type_token) => match type_token.next() {
|
||||
Some("Bearer") => {
|
||||
// TODO: find a less hardcoded way?
|
||||
let index = req.match_info().get("index_uid");
|
||||
match type_token.next() {
|
||||
Some(token) => Box::pin(Self::auth_bearer(
|
||||
auth,
|
||||
token.to_string(),
|
||||
index.map(String::from),
|
||||
req.app_data::<D>().cloned(),
|
||||
)),
|
||||
None => Box::pin(err(AuthenticationError::InvalidToken.into())),
|
||||
}
|
||||
}
|
||||
_otherwise => {
|
||||
Box::pin(err(AuthenticationError::MissingAuthorizationHeader.into()))
|
||||
}
|
||||
},
|
||||
None => Box::pin(Self::auth_token(auth, req.app_data::<D>().cloned())),
|
||||
},
|
||||
None => Box::pin(err(AuthenticationError::IrretrievableState.into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Policy {
|
||||
fn authenticate(auth: AuthController, token: &str, index: Option<&str>) -> Option<AuthFilter>;
|
||||
}
|
||||
|
||||
pub mod policies {
|
||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||
// reexport actions in policies in order to be used in routes configuration.
|
||||
pub use meilisearch_types::keys::{actions, Action};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::extractors::authentication::Policy;
|
||||
|
||||
fn tenant_token_validation() -> Validation {
|
||||
let mut validation = Validation::default();
|
||||
validation.validate_exp = false;
|
||||
validation.required_spec_claims.remove("exp");
|
||||
validation.algorithms = vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512];
|
||||
validation
|
||||
}
|
||||
|
||||
/// Extracts the key id used to sign the payload, without performing any validation.
|
||||
fn extract_key_id(token: &str) -> Option<Uuid> {
|
||||
let mut validation = tenant_token_validation();
|
||||
validation.insecure_disable_signature_validation();
|
||||
let dummy_key = DecodingKey::from_secret(b"secret");
|
||||
let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
|
||||
|
||||
// get token fields without validating it.
|
||||
let Claims { api_key_uid, .. } = token_data.claims;
|
||||
Some(api_key_uid)
|
||||
}
|
||||
|
||||
fn is_keys_action(action: u8) -> bool {
|
||||
use actions::*;
|
||||
matches!(action, KEYS_GET | KEYS_CREATE | KEYS_UPDATE | KEYS_DELETE)
|
||||
}
|
||||
|
||||
pub struct ActionPolicy<const A: u8>;
|
||||
|
||||
impl<const A: u8> Policy for ActionPolicy<A> {
|
||||
fn authenticate(
|
||||
auth: AuthController,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter> {
|
||||
// authenticate if token is the master key.
|
||||
// master key can only have access to keys routes.
|
||||
// if master key is None only keys routes are inaccessible.
|
||||
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
|
||||
return Some(AuthFilter::default());
|
||||
}
|
||||
|
||||
// Tenant token
|
||||
if let Some(filters) = ActionPolicy::<A>::authenticate_tenant_token(&auth, token, index)
|
||||
{
|
||||
return Some(filters);
|
||||
} else if let Some(action) = Action::from_repr(A) {
|
||||
// API key
|
||||
if let Ok(Some(uid)) = auth.get_optional_uid_from_encoded_key(token.as_bytes()) {
|
||||
if let Ok(true) = auth.is_key_authorized(uid, action, index) {
|
||||
return auth.get_key_filters(uid, None).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<const A: u8> ActionPolicy<A> {
|
||||
fn authenticate_tenant_token(
|
||||
auth: &AuthController,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter> {
|
||||
// Only search action can be accessed by a tenant token.
|
||||
if A != actions::SEARCH {
|
||||
return None;
|
||||
}
|
||||
|
||||
let uid = extract_key_id(token)?;
|
||||
// check if parent key is authorized to do the action.
|
||||
if auth.is_key_authorized(uid, Action::Search, index).ok()? {
|
||||
// Check if tenant token is valid.
|
||||
let key = auth.generate_key(uid)?;
|
||||
let data = decode::<Claims>(
|
||||
token,
|
||||
&DecodingKey::from_secret(key.as_bytes()),
|
||||
&tenant_token_validation(),
|
||||
)
|
||||
.ok()?;
|
||||
|
||||
// Check index access if an index restriction is provided.
|
||||
if let Some(index) = index {
|
||||
if !data.claims.search_rules.is_index_authorized(index) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if token is expired.
|
||||
if let Some(exp) = data.claims.exp {
|
||||
if OffsetDateTime::now_utc().unix_timestamp() > exp {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
return auth.get_key_filters(uid, Some(data.claims.search_rules)).ok();
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Claims {
|
||||
search_rules: SearchRules,
|
||||
exp: Option<i64>,
|
||||
api_key_uid: Uuid,
|
||||
}
|
||||
}
|
4
meilisearch/src/extractors/mod.rs
Normal file
4
meilisearch/src/extractors/mod.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
pub mod payload;
|
||||
#[macro_use]
|
||||
pub mod authentication;
|
||||
pub mod sequential_extractor;
|
69
meilisearch/src/extractors/payload.rs
Normal file
69
meilisearch/src/extractors/payload.rs
Normal file
|
@ -0,0 +1,69 @@
|
|||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use actix_http::encoding::Decoder as Decompress;
|
||||
use actix_web::{dev, web, FromRequest, HttpRequest};
|
||||
use futures::future::{ready, Ready};
|
||||
use futures::Stream;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
pub struct Payload {
|
||||
payload: Decompress<dev::Payload>,
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
pub struct PayloadConfig {
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
impl PayloadConfig {
|
||||
pub fn new(limit: usize) -> Self {
|
||||
Self { limit }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PayloadConfig {
|
||||
fn default() -> Self {
|
||||
Self { limit: 256 * 1024 }
|
||||
}
|
||||
}
|
||||
|
||||
impl FromRequest for Payload {
|
||||
type Error = MeilisearchHttpError;
|
||||
|
||||
type Future = Ready<Result<Payload, Self::Error>>;
|
||||
|
||||
#[inline]
|
||||
fn from_request(req: &HttpRequest, payload: &mut dev::Payload) -> Self::Future {
|
||||
let limit = req
|
||||
.app_data::<PayloadConfig>()
|
||||
.map(|c| c.limit)
|
||||
.unwrap_or(PayloadConfig::default().limit);
|
||||
ready(Ok(Payload {
|
||||
payload: Decompress::from_headers(payload.take(), req.headers()),
|
||||
limit,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for Payload {
|
||||
type Item = Result<web::Bytes, MeilisearchHttpError>;
|
||||
|
||||
#[inline]
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.payload).poll_next(cx) {
|
||||
Poll::Ready(Some(result)) => match result {
|
||||
Ok(bytes) => match self.limit.checked_sub(bytes.len()) {
|
||||
Some(new_limit) => {
|
||||
self.limit = new_limit;
|
||||
Poll::Ready(Some(Ok(bytes)))
|
||||
}
|
||||
None => Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge))),
|
||||
},
|
||||
x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))),
|
||||
},
|
||||
otherwise => otherwise.map(|o| o.map(|o| o.map_err(MeilisearchHttpError::from))),
|
||||
}
|
||||
}
|
||||
}
|
151
meilisearch/src/extractors/sequential_extractor.rs
Normal file
151
meilisearch/src/extractors/sequential_extractor.rs
Normal file
|
@ -0,0 +1,151 @@
|
|||
#![allow(non_snake_case)]
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::task::Poll;
|
||||
|
||||
use actix_web::dev::Payload;
|
||||
use actix_web::{FromRequest, Handler, HttpRequest};
|
||||
use pin_project_lite::pin_project;
|
||||
|
||||
/// `SeqHandler` is an actix `Handler` that enforces that extractors errors are returned in the
|
||||
/// same order as they are defined in the wrapped handler. This is needed because, by default, actix
|
||||
/// resolves the extractors concurrently, whereas we always need the authentication extractor to
|
||||
/// throw first.
|
||||
#[derive(Clone)]
|
||||
pub struct SeqHandler<H>(pub H);
|
||||
|
||||
pub struct SeqFromRequest<T>(T);
|
||||
|
||||
/// This macro implements `FromRequest` for arbitrary arity handler, except for one, which is
|
||||
/// useless anyway.
|
||||
macro_rules! gen_seq {
|
||||
($ty:ident; $($T:ident)+) => {
|
||||
pin_project! {
|
||||
pub struct $ty<$($T: FromRequest), +> {
|
||||
$(
|
||||
#[pin]
|
||||
$T: ExtractFuture<$T::Future, $T, $T::Error>,
|
||||
)+
|
||||
}
|
||||
}
|
||||
|
||||
impl<$($T: FromRequest), +> Future for $ty<$($T),+> {
|
||||
type Output = Result<SeqFromRequest<($($T),+)>, actix_web::Error>;
|
||||
|
||||
fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Self::Output> {
|
||||
let mut this = self.project();
|
||||
|
||||
let mut count_fut = 0;
|
||||
let mut count_finished = 0;
|
||||
|
||||
$(
|
||||
count_fut += 1;
|
||||
match this.$T.as_mut().project() {
|
||||
ExtractProj::Future { fut } => match fut.poll(cx) {
|
||||
Poll::Ready(Ok(output)) => {
|
||||
count_finished += 1;
|
||||
let _ = this
|
||||
.$T
|
||||
.as_mut()
|
||||
.project_replace(ExtractFuture::Done { output });
|
||||
}
|
||||
Poll::Ready(Err(error)) => {
|
||||
count_finished += 1;
|
||||
let _ = this
|
||||
.$T
|
||||
.as_mut()
|
||||
.project_replace(ExtractFuture::Error { error });
|
||||
}
|
||||
Poll::Pending => (),
|
||||
},
|
||||
ExtractProj::Done { .. } => count_finished += 1,
|
||||
ExtractProj::Error { .. } => {
|
||||
// short circuit if all previous are finished and we had an error.
|
||||
if count_finished == count_fut {
|
||||
match this.$T.project_replace(ExtractFuture::Empty) {
|
||||
ExtractReplaceProj::Error { error } => {
|
||||
return Poll::Ready(Err(error.into()))
|
||||
}
|
||||
_ => unreachable!("Invalid future state"),
|
||||
}
|
||||
} else {
|
||||
count_finished += 1;
|
||||
}
|
||||
}
|
||||
ExtractProj::Empty => unreachable!("From request polled after being finished. {}", stringify!($T)),
|
||||
}
|
||||
)+
|
||||
|
||||
if count_fut == count_finished {
|
||||
let result = (
|
||||
$(
|
||||
match this.$T.project_replace(ExtractFuture::Empty) {
|
||||
ExtractReplaceProj::Done { output } => output,
|
||||
ExtractReplaceProj::Error { error } => return Poll::Ready(Err(error.into())),
|
||||
_ => unreachable!("Invalid future state"),
|
||||
},
|
||||
)+
|
||||
);
|
||||
|
||||
Poll::Ready(Ok(SeqFromRequest(result)))
|
||||
} else {
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<$($T: FromRequest,)+> FromRequest for SeqFromRequest<($($T,)+)> {
|
||||
type Error = actix_web::Error;
|
||||
|
||||
type Future = $ty<$($T),+>;
|
||||
|
||||
fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future {
|
||||
$ty {
|
||||
$(
|
||||
$T: ExtractFuture::Future {
|
||||
fut: $T::from_request(req, payload),
|
||||
},
|
||||
)+
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Han, $($T: FromRequest),+> Handler<SeqFromRequest<($($T),+)>> for SeqHandler<Han>
|
||||
where
|
||||
Han: Handler<($($T),+)>,
|
||||
{
|
||||
type Output = Han::Output;
|
||||
type Future = Han::Future;
|
||||
|
||||
fn call(&self, args: SeqFromRequest<($($T),+)>) -> Self::Future {
|
||||
self.0.call(args.0)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Not working for a single argument, but then, it is not really necessary.
|
||||
// gen_seq! { SeqFromRequestFut1; A }
|
||||
gen_seq! { SeqFromRequestFut2; A B }
|
||||
gen_seq! { SeqFromRequestFut3; A B C }
|
||||
gen_seq! { SeqFromRequestFut4; A B C D }
|
||||
gen_seq! { SeqFromRequestFut5; A B C D E }
|
||||
gen_seq! { SeqFromRequestFut6; A B C D E F }
|
||||
|
||||
pin_project! {
|
||||
#[project = ExtractProj]
|
||||
#[project_replace = ExtractReplaceProj]
|
||||
enum ExtractFuture<Fut, Res, Err> {
|
||||
Future {
|
||||
#[pin]
|
||||
fut: Fut,
|
||||
},
|
||||
Done {
|
||||
output: Res,
|
||||
},
|
||||
Error {
|
||||
error: Err,
|
||||
},
|
||||
Empty,
|
||||
}
|
||||
}
|
417
meilisearch/src/lib.rs
Normal file
417
meilisearch/src/lib.rs
Normal file
|
@ -0,0 +1,417 @@
|
|||
#![allow(rustdoc::private_intra_doc_links)]
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod analytics;
|
||||
#[macro_use]
|
||||
pub mod extractors;
|
||||
pub mod option;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
pub mod metrics;
|
||||
#[cfg(feature = "metrics")]
|
||||
pub mod route_metrics;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, BufWriter};
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_cors::Cors;
|
||||
use actix_http::body::MessageBody;
|
||||
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||
use actix_web::error::JsonPayloadError;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{middleware, web, HttpRequest};
|
||||
use analytics::Analytics;
|
||||
use anyhow::bail;
|
||||
use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use http::header::CONTENT_TYPE;
|
||||
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
|
||||
use log::error;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
|
||||
use meilisearch_types::settings::apply_settings_to_builder;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::versioning::{check_version_file, create_version_file};
|
||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||
pub use option::Opt;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
/// Check if a db is empty. It does not provide any information on the
|
||||
/// validity of the data in it.
|
||||
/// We consider a database as non empty when it's a non empty directory.
|
||||
fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
let db_path = db_path.as_ref();
|
||||
|
||||
if !db_path.exists() {
|
||||
true
|
||||
// if we encounter an error or if the db is a file we consider the db non empty
|
||||
} else if let Ok(dir) = db_path.read_dir() {
|
||||
dir.count() == 0
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
) -> actix_web::App<
|
||||
impl ServiceFactory<
|
||||
actix_web::dev::ServiceRequest,
|
||||
Config = (),
|
||||
Response = ServiceResponse<impl MessageBody>,
|
||||
Error = actix_web::Error,
|
||||
InitError = (),
|
||||
>,
|
||||
> {
|
||||
let app = actix_web::App::new()
|
||||
.configure(|s| {
|
||||
configure_data(
|
||||
s,
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
&opt,
|
||||
analytics.clone(),
|
||||
)
|
||||
})
|
||||
.configure(routes::configure)
|
||||
.configure(|s| dashboard(s, enable_dashboard));
|
||||
#[cfg(feature = "metrics")]
|
||||
let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route));
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
let app = app.wrap(Condition::new(opt.enable_metrics_route, route_metrics::RouteMetrics));
|
||||
app.wrap(
|
||||
Cors::default()
|
||||
.send_wildcard()
|
||||
.allow_any_header()
|
||||
.allow_any_origin()
|
||||
.allow_any_method()
|
||||
.max_age(86_400), // 24h
|
||||
)
|
||||
.wrap(middleware::Logger::default())
|
||||
.wrap(middleware::Compress::default())
|
||||
.wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim))
|
||||
}
|
||||
|
||||
// TODO: TAMO: Finish setting up things
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key);
|
||||
let index_scheduler_builder = || {
|
||||
IndexScheduler::new(IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
tasks_path: opt.db_path.join("tasks"),
|
||||
update_file_path: opt.db_path.join("update_files"),
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_size: opt.max_index_size.get_bytes() as usize,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: !opt.scheduler_options.disable_auto_batching,
|
||||
})
|
||||
};
|
||||
|
||||
enum OnFailure {
|
||||
RemoveDb,
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
let meilisearch_builder = |on_failure: OnFailure| -> anyhow::Result<_> {
|
||||
// if anything wrong happens we delete the `data.ms` entirely.
|
||||
match (
|
||||
index_scheduler_builder().map_err(anyhow::Error::from),
|
||||
auth_controller_builder().map_err(anyhow::Error::from),
|
||||
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
|
||||
) {
|
||||
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
|
||||
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
|
||||
if matches!(on_failure, OnFailure::RemoveDb) {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
if empty_db && snapshot_path_exists {
|
||||
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
|
||||
Ok(()) => meilisearch_builder(OnFailure::RemoveDb)?,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
} else if !empty_db && !opt.ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
|
||||
)
|
||||
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
|
||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||
} else {
|
||||
meilisearch_builder(OnFailure::RemoveDb)?
|
||||
}
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let src_path_exists = path.exists();
|
||||
if empty_db && src_path_exists {
|
||||
let (mut index_scheduler, mut auth_controller) =
|
||||
meilisearch_builder(OnFailure::RemoveDb)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
} else if !empty_db && !opt.ignore_dump_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
|
||||
)
|
||||
} else if !src_path_exists && !opt.ignore_missing_dump {
|
||||
bail!("dump doesn't exist at {:?}", path)
|
||||
} else {
|
||||
let (mut index_scheduler, mut auth_controller) =
|
||||
meilisearch_builder(OnFailure::RemoveDb)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if !empty_db {
|
||||
check_version_file(&opt.db_path)?;
|
||||
}
|
||||
meilisearch_builder(OnFailure::KeepDb)?
|
||||
};
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
let index_scheduler = Arc::new(index_scheduler);
|
||||
if opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(opt.snapshot_interval_sec);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
thread::Builder::new()
|
||||
.name(String::from("register-snapshot-tasks"))
|
||||
.spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
Ok((index_scheduler, auth_controller))
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
db_path: &Path,
|
||||
dump_path: &Path,
|
||||
index_scheduler: &mut IndexScheduler,
|
||||
auth: &mut AuthController,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let reader = File::open(dump_path)?;
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
if let Some(date) = dump_reader.date() {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}` from the {}",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
date
|
||||
);
|
||||
} else {
|
||||
log::info!(
|
||||
"Importing a dump of meilisearch `{:?}`",
|
||||
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
|
||||
);
|
||||
}
|
||||
|
||||
let instance_uid = dump_reader.instance_uid()?;
|
||||
|
||||
// 1. Import the instance-uid.
|
||||
if let Some(ref instance_uid) = instance_uid {
|
||||
// we don't want to panic if there is an error with the instance-uid.
|
||||
let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes());
|
||||
};
|
||||
|
||||
// 2. Import the `Key`s.
|
||||
let mut keys = Vec::new();
|
||||
auth.raw_delete_all_keys()?;
|
||||
for key in dump_reader.keys()? {
|
||||
let key = key?;
|
||||
auth.raw_insert_key(key.clone())?;
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
|
||||
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
|
||||
// try to process tasks while we're trying to import the indexes.
|
||||
|
||||
// 3. Import the indexes.
|
||||
for index_reader in dump_reader.indexes()? {
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
log::info!("Importing index `{}`.", metadata.uid);
|
||||
let index = index_scheduler.create_raw_index(&metadata.uid)?;
|
||||
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config);
|
||||
// 3.1 Import the primary key if there is one.
|
||||
if let Some(ref primary_key) = metadata.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
// 3.2 Import the settings.
|
||||
log::info!("Importing the settings.");
|
||||
let settings = index_reader.settings()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?;
|
||||
|
||||
// 3.3 Import the documents.
|
||||
// 3.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
log::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
for document in index_reader.documents()? {
|
||||
builder.append_json_object(&document?)?;
|
||||
}
|
||||
|
||||
// This flush the content of the batch builder.
|
||||
let file = builder.into_inner()?.into_inner()?;
|
||||
|
||||
// 3.3.2 We feed it to the milli index.
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
indexer_config,
|
||||
IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| log::debug!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
)?;
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
log::info!("{} documents found.", user_result?);
|
||||
builder.execute()?;
|
||||
wtxn.commit()?;
|
||||
log::info!("All documents successfully imported.");
|
||||
}
|
||||
|
||||
// 4. Import the tasks.
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler.register_dumped_task(task, file)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: AuthController,
|
||||
opt: &Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
||||
config
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(web::Data::from(analytics))
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
.content_type(|mime| mime == mime::APPLICATION_JSON)
|
||||
.error_handler(|err, req: &HttpRequest| match err {
|
||||
JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
|
||||
Some(content_type) => MeilisearchHttpError::InvalidContentType(
|
||||
content_type.to_str().unwrap_or("unknown").to_string(),
|
||||
vec![mime::APPLICATION_JSON.to_string()],
|
||||
)
|
||||
.into(),
|
||||
None => MeilisearchHttpError::MissingContentType(vec![
|
||||
mime::APPLICATION_JSON.to_string(),
|
||||
])
|
||||
.into(),
|
||||
},
|
||||
err => PayloadError::from(err).into(),
|
||||
}),
|
||||
)
|
||||
.app_data(PayloadConfig::new(http_payload_size_limit))
|
||||
.app_data(
|
||||
web::QueryConfig::default().error_handler(|err, _req| PayloadError::from(err).into()),
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "mini-dashboard")]
|
||||
pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
|
||||
use actix_web::HttpResponse;
|
||||
use static_files::Resource;
|
||||
|
||||
mod generated {
|
||||
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
|
||||
}
|
||||
|
||||
if enable_frontend {
|
||||
let generated = generated::generate();
|
||||
// Generate routes for mini-dashboard assets
|
||||
for (path, resource) in generated.into_iter() {
|
||||
let Resource { mime_type, data, .. } = resource;
|
||||
// Redirect index.html to /
|
||||
if path == "index.html" {
|
||||
config.service(web::resource("/").route(web::get().to(move || async move {
|
||||
HttpResponse::Ok().content_type(mime_type).body(data)
|
||||
})));
|
||||
} else {
|
||||
config.service(web::resource(path).route(web::get().to(move || async move {
|
||||
HttpResponse::Ok().content_type(mime_type).body(data)
|
||||
})));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "mini-dashboard"))]
|
||||
pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
|
||||
config.service(web::resource("/").route(web::get().to(routes::running)));
|
||||
}
|
||||
|
||||
#[cfg(feature = "metrics")]
|
||||
pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_route: bool) {
|
||||
if enable_metrics_route {
|
||||
config.service(
|
||||
web::resource("/metrics").route(web::get().to(crate::route_metrics::get_metrics)),
|
||||
);
|
||||
}
|
||||
}
|
166
meilisearch/src/main.rs
Normal file
166
meilisearch/src/main.rs
Normal file
|
@ -0,0 +1,166 @@
|
|||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use actix_web::http::KeepAlive;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::HttpServer;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch::analytics::Analytics;
|
||||
use meilisearch::{analytics, create_app, setup_meilisearch, Opt};
|
||||
use meilisearch_auth::AuthController;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
/// does all the setup before meilisearch is launched
|
||||
fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
let mut log_builder = env_logger::Builder::new();
|
||||
log_builder.parse_filters(&opt.log_level);
|
||||
if opt.log_level == "info" {
|
||||
// if we are in info we only allow the warn log_level for milli
|
||||
log_builder.filter_module("milli", log::LevelFilter::Warn);
|
||||
}
|
||||
|
||||
log_builder.init();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
setup(&opt)?;
|
||||
|
||||
match opt.env.as_ref() {
|
||||
"production" => {
|
||||
if opt.master_key.is_none() {
|
||||
anyhow::bail!(
|
||||
"In production mode, the environment variable MEILI_MASTER_KEY is mandatory"
|
||||
)
|
||||
}
|
||||
}
|
||||
"development" => (),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
let analytics = if !opt.no_analytics {
|
||||
analytics::SegmentAnalytics::new(&opt, index_scheduler.clone()).await
|
||||
} else {
|
||||
analytics::MockAnalytics::new(&opt)
|
||||
};
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
let analytics = analytics::MockAnalytics::new(&opt);
|
||||
|
||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||
|
||||
run_http(index_scheduler, auth_controller, opt, analytics).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
index_scheduler.clone(),
|
||||
auth_controller.clone(),
|
||||
opt.clone(),
|
||||
analytics.clone(),
|
||||
enable_dashboard,
|
||||
)
|
||||
})
|
||||
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
|
||||
.disable_signals()
|
||||
.keep_alive(KeepAlive::Os);
|
||||
|
||||
if let Some(config) = opt_clone.get_ssl_config()? {
|
||||
http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?;
|
||||
} else {
|
||||
http_server.bind(&opt_clone.http_addr)?.run().await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn print_launch_resume(
|
||||
opt: &Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
config_read_from: Option<PathBuf>,
|
||||
) {
|
||||
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
||||
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
||||
let protocol =
|
||||
if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" };
|
||||
let ascii_name = r#"
|
||||
888b d888 d8b 888 d8b 888
|
||||
8888b d8888 Y8P 888 Y8P 888
|
||||
88888b.d88888 888 888
|
||||
888Y88888P888 .d88b. 888 888 888 .d8888b .d88b. 8888b. 888d888 .d8888b 88888b.
|
||||
888 Y888P 888 d8P Y8b 888 888 888 88K d8P Y8b "88b 888P" d88P" 888 "88b
|
||||
888 Y8P 888 88888888 888 888 888 "Y8888b. 88888888 .d888888 888 888 888 888
|
||||
888 " 888 Y8b. 888 888 888 X88 Y8b. 888 888 888 Y88b. 888 888
|
||||
888 888 "Y8888 888 888 888 88888P' "Y8888 "Y888888 888 "Y8888P 888 888
|
||||
"#;
|
||||
|
||||
eprintln!("{}", ascii_name);
|
||||
|
||||
eprintln!(
|
||||
"Config file path:\t{:?}",
|
||||
config_read_from
|
||||
.map(|config_file_path| config_file_path.display().to_string())
|
||||
.unwrap_or_else(|| "none".to_string())
|
||||
);
|
||||
eprintln!("Database path:\t\t{:?}", opt.db_path);
|
||||
eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
|
||||
eprintln!("Environment:\t\t{:?}", opt.env);
|
||||
eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string());
|
||||
eprintln!("Commit date:\t\t{:?}", commit_date.to_string());
|
||||
eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
{
|
||||
if !opt.no_analytics {
|
||||
eprintln!(
|
||||
"
|
||||
Thank you for using Meilisearch!
|
||||
|
||||
We collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html
|
||||
|
||||
Anonymous telemetry:\t\"Enabled\""
|
||||
);
|
||||
} else {
|
||||
eprintln!("Anonymous telemetry:\t\"Disabled\"");
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(instance_uid) = analytics.instance_uid() {
|
||||
eprintln!("Instance UID:\t\t\"{}\"", instance_uid);
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
|
||||
if opt.master_key.is_some() {
|
||||
eprintln!("A Master Key has been set. Requests to Meilisearch won't be authorized unless you provide an authentication key.");
|
||||
} else {
|
||||
eprintln!("No master key found; The server will accept unidentified requests. \
|
||||
If you need some protection in development mode, please export a key: export MEILI_MASTER_KEY=xxx");
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
|
||||
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html");
|
||||
eprintln!();
|
||||
}
|
36
meilisearch/src/metrics.rs
Normal file
36
meilisearch/src/metrics.rs
Normal file
|
@ -0,0 +1,36 @@
|
|||
use lazy_static::lazy_static;
|
||||
use prometheus::{
|
||||
opts, register_histogram_vec, register_int_counter_vec, register_int_gauge,
|
||||
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
|
||||
};
|
||||
|
||||
const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[
|
||||
0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015,
|
||||
0.002, 0.003, 1.0,
|
||||
];
|
||||
|
||||
lazy_static! {
|
||||
pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||
opts!("http_requests_total", "HTTP requests total"),
|
||||
&["method", "path"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes"))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_INDEX_DOCS_COUNT: IntGaugeVec = register_int_gauge_vec!(
|
||||
opts!("meilisearch_index_docs_count", "Meilisearch Index Docs Count"),
|
||||
&["index"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
|
||||
"http_response_time_seconds",
|
||||
"HTTP response times",
|
||||
&["method", "path"],
|
||||
HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
}
|
761
meilisearch/src/option.rs
Normal file
761
meilisearch/src/option.rs
Normal file
|
@ -0,0 +1,761 @@
|
|||
use std::convert::TryFrom;
|
||||
use std::env::VarError;
|
||||
use std::ffi::OsStr;
|
||||
use std::io::{BufReader, Read};
|
||||
use std::num::ParseIntError;
|
||||
use std::ops::Deref;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::{env, fmt, fs};
|
||||
|
||||
use byte_unit::{Byte, ByteError};
|
||||
use clap::Parser;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use rustls::server::{
|
||||
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
|
||||
};
|
||||
use rustls::RootCertStore;
|
||||
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
|
||||
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
||||
|
||||
const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
|
||||
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
||||
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
||||
const MEILI_ENV: &str = "MEILI_ENV";
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||
const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE";
|
||||
const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE";
|
||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||
const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
|
||||
const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH";
|
||||
const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH";
|
||||
const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH";
|
||||
const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH";
|
||||
const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION";
|
||||
const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS";
|
||||
const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT";
|
||||
const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT";
|
||||
const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS";
|
||||
const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR";
|
||||
const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT";
|
||||
const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC";
|
||||
const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP";
|
||||
const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
|
||||
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
|
||||
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
|
||||
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
#[cfg(feature = "metrics")]
|
||||
const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
const DEFAULT_ENV: &str = "development";
|
||||
const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB";
|
||||
const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB";
|
||||
const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
|
||||
const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
|
||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
|
||||
const DEFAULT_DUMP_DIR: &str = "dumps/";
|
||||
const DEFAULT_LOG_LEVEL: &str = "INFO";
|
||||
|
||||
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
|
||||
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
|
||||
const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
|
||||
const DEFAULT_LOG_EVERY_N: usize = 100000;
|
||||
|
||||
#[derive(Debug, Clone, Parser, Deserialize)]
|
||||
#[clap(version, next_display_order = None)]
|
||||
#[serde(rename_all = "snake_case", deny_unknown_fields)]
|
||||
pub struct Opt {
|
||||
/// Designates the location where database files will be created and retrieved.
|
||||
#[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())]
|
||||
#[serde(default = "default_db_path")]
|
||||
pub db_path: PathBuf,
|
||||
|
||||
/// Sets the HTTP address and port Meilisearch will use.
|
||||
#[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())]
|
||||
#[serde(default = "default_http_addr")]
|
||||
pub http_addr: String,
|
||||
|
||||
/// Sets the instance's master key, automatically protecting all routes except `GET /health`.
|
||||
#[clap(long, env = MEILI_MASTER_KEY)]
|
||||
pub master_key: Option<String>,
|
||||
|
||||
/// Configures the instance's environment. Value must be either `production` or `development`.
|
||||
#[clap(long, env = MEILI_ENV, default_value_t = default_env(), value_parser = POSSIBLE_ENV)]
|
||||
#[serde(default = "default_env")]
|
||||
pub env: String,
|
||||
|
||||
/// Deactivates Meilisearch's built-in telemetry when provided.
|
||||
///
|
||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||
/// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted
|
||||
/// at any time.
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
#[serde(default)] // we can't send true
|
||||
#[clap(long, env = MEILI_NO_ANALYTICS)]
|
||||
pub no_analytics: bool,
|
||||
|
||||
/// Sets the maximum size of the index. Value must be given in bytes or explicitly stating a base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb').
|
||||
#[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())]
|
||||
#[serde(default = "default_max_index_size")]
|
||||
pub max_index_size: Byte,
|
||||
|
||||
/// Sets the maximum size of the task database. Value must be given in bytes or explicitly stating a
|
||||
/// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb').
|
||||
#[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())]
|
||||
#[serde(default = "default_max_task_db_size")]
|
||||
pub max_task_db_size: Byte,
|
||||
|
||||
/// Sets the maximum size of accepted payloads. Value must be given in bytes or explicitly stating a
|
||||
/// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb').
|
||||
#[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())]
|
||||
#[serde(default = "default_http_payload_size_limit")]
|
||||
pub http_payload_size_limit: Byte,
|
||||
|
||||
/// Sets the server's SSL certificates.
|
||||
#[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)]
|
||||
pub ssl_cert_path: Option<PathBuf>,
|
||||
|
||||
/// Sets the server's SSL key files.
|
||||
#[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)]
|
||||
pub ssl_key_path: Option<PathBuf>,
|
||||
|
||||
/// Enables client authentication in the specified path.
|
||||
#[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)]
|
||||
pub ssl_auth_path: Option<PathBuf>,
|
||||
|
||||
/// Sets the server's OCSP file. *Optional*
|
||||
///
|
||||
/// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate.
|
||||
#[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)]
|
||||
pub ssl_ocsp_path: Option<PathBuf>,
|
||||
|
||||
/// Makes SSL authentication mandatory.
|
||||
#[serde(default)]
|
||||
#[clap(long, env = MEILI_SSL_REQUIRE_AUTH)]
|
||||
pub ssl_require_auth: bool,
|
||||
|
||||
/// Activates SSL session resumption.
|
||||
#[serde(default)]
|
||||
#[clap(long, env = MEILI_SSL_RESUMPTION)]
|
||||
pub ssl_resumption: bool,
|
||||
|
||||
/// Activates SSL tickets.
|
||||
#[serde(default)]
|
||||
#[clap(long, env = MEILI_SSL_TICKETS)]
|
||||
pub ssl_tickets: bool,
|
||||
|
||||
/// Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
|
||||
#[clap(long, env = MEILI_IMPORT_SNAPSHOT)]
|
||||
pub import_snapshot: Option<PathBuf>,
|
||||
|
||||
/// Prevents a Meilisearch instance from throwing an error when `--import-snapshot`
|
||||
/// does not point to a valid snapshot file.
|
||||
///
|
||||
/// This command will throw an error if `--import-snapshot` is not defined.
|
||||
#[clap(
|
||||
long,
|
||||
env = MEILI_IGNORE_MISSING_SNAPSHOT,
|
||||
requires = "import_snapshot"
|
||||
)]
|
||||
#[serde(default)]
|
||||
pub ignore_missing_snapshot: bool,
|
||||
|
||||
/// Prevents a Meilisearch instance with an existing database from throwing an
|
||||
/// error when using `--import-snapshot`. Instead, the snapshot will be ignored
|
||||
/// and Meilisearch will launch using the existing database.
|
||||
///
|
||||
/// This command will throw an error if `--import-snapshot` is not defined.
|
||||
#[clap(
|
||||
long,
|
||||
env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS,
|
||||
requires = "import_snapshot"
|
||||
)]
|
||||
#[serde(default)]
|
||||
pub ignore_snapshot_if_db_exists: bool,
|
||||
|
||||
/// Sets the directory where Meilisearch will store snapshots.
|
||||
#[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())]
|
||||
#[serde(default = "default_snapshot_dir")]
|
||||
pub snapshot_dir: PathBuf,
|
||||
|
||||
/// Activates scheduled snapshots when provided. Snapshots are disabled by default.
|
||||
#[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)]
|
||||
#[serde(default)]
|
||||
pub schedule_snapshot: bool,
|
||||
|
||||
/// Defines the interval between each snapshot. Value must be given in seconds.
|
||||
#[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())]
|
||||
#[serde(default = "default_snapshot_interval_sec")]
|
||||
pub snapshot_interval_sec: u64,
|
||||
|
||||
/// Imports the dump file located at the specified path. Path must point to a `.dump` file.
|
||||
/// If a database already exists, Meilisearch will throw an error and abort launch.
|
||||
#[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import_snapshot")]
|
||||
pub import_dump: Option<PathBuf>,
|
||||
|
||||
/// Prevents Meilisearch from throwing an error when `--import-dump` does not point to
|
||||
/// a valid dump file. Instead, Meilisearch will start normally without importing any dump.
|
||||
///
|
||||
/// This option will trigger an error if `--import-dump` is not defined.
|
||||
#[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import_dump")]
|
||||
#[serde(default)]
|
||||
pub ignore_missing_dump: bool,
|
||||
|
||||
/// Prevents a Meilisearch instance with an existing database from throwing an error
|
||||
/// when using `--import-dump`. Instead, the dump will be ignored and Meilisearch will
|
||||
/// launch using the existing database.
|
||||
///
|
||||
/// This option will trigger an error if `--import-dump` is not defined.
|
||||
#[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import_dump")]
|
||||
#[serde(default)]
|
||||
pub ignore_dump_if_db_exists: bool,
|
||||
|
||||
/// Sets the directory where Meilisearch will create dump files.
|
||||
#[clap(long, env = MEILI_DUMP_DIR, default_value_os_t = default_dump_dir())]
|
||||
#[serde(default = "default_dump_dir")]
|
||||
pub dump_dir: PathBuf,
|
||||
|
||||
/// Defines how much detail should be present in Meilisearch's logs.
|
||||
///
|
||||
/// Meilisearch currently supports five log levels, listed in order of increasing verbosity: ERROR, WARN, INFO, DEBUG, TRACE.
|
||||
#[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())]
|
||||
#[serde(default = "default_log_level")]
|
||||
pub log_level: String,
|
||||
|
||||
/// Enables Prometheus metrics and /metrics route.
|
||||
#[cfg(feature = "metrics")]
|
||||
#[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)]
|
||||
#[serde(default)]
|
||||
pub enable_metrics_route: bool,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub scheduler_options: SchedulerConfig,
|
||||
|
||||
/// Set the path to a configuration file that should be used to setup the engine.
|
||||
/// Format must be TOML.
|
||||
#[clap(long)]
|
||||
pub config_file_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl Opt {
|
||||
/// Whether analytics should be enabled or not.
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub fn analytics(&self) -> bool {
|
||||
!self.no_analytics
|
||||
}
|
||||
|
||||
/// Build a new Opt from config file, env vars and cli args.
|
||||
pub fn try_build() -> anyhow::Result<(Self, Option<PathBuf>)> {
|
||||
// Parse the args to get the config_file_path.
|
||||
let mut opts = Opt::parse();
|
||||
let mut config_read_from = None;
|
||||
let user_specified_config_file_path = opts
|
||||
.config_file_path
|
||||
.clone()
|
||||
.or_else(|| env::var("MEILI_CONFIG_FILE_PATH").map(PathBuf::from).ok());
|
||||
let config_file_path = user_specified_config_file_path
|
||||
.clone()
|
||||
.unwrap_or_else(|| PathBuf::from(DEFAULT_CONFIG_FILE_PATH));
|
||||
|
||||
match std::fs::read(&config_file_path) {
|
||||
Ok(config) => {
|
||||
// If the file is successfully read, we deserialize it with `toml`.
|
||||
let opt_from_config = toml::from_slice::<Opt>(&config)?;
|
||||
// Return an error if config file contains 'config_file_path'
|
||||
// Using that key in the config file doesn't make sense bc it creates a logical loop (config file referencing itself)
|
||||
if opt_from_config.config_file_path.is_some() {
|
||||
anyhow::bail!("`config_file_path` is not supported in the configuration file")
|
||||
}
|
||||
// We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args.
|
||||
opt_from_config.export_to_env();
|
||||
// Once injected we parse the cli args once again to take the new env vars into scope.
|
||||
opts = Opt::parse();
|
||||
config_read_from = Some(config_file_path);
|
||||
}
|
||||
Err(e) => {
|
||||
if let Some(path) = user_specified_config_file_path {
|
||||
// If we have an error while reading the file defined by the user.
|
||||
anyhow::bail!(
|
||||
"unable to open or read the {:?} configuration file: {}.",
|
||||
path,
|
||||
e,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((opts, config_read_from))
|
||||
}
|
||||
|
||||
/// Exports the opts values to their corresponding env vars if they are not set.
|
||||
fn export_to_env(self) {
|
||||
let Opt {
|
||||
db_path,
|
||||
http_addr,
|
||||
master_key,
|
||||
env,
|
||||
max_index_size,
|
||||
max_task_db_size,
|
||||
http_payload_size_limit,
|
||||
ssl_cert_path,
|
||||
ssl_key_path,
|
||||
ssl_auth_path,
|
||||
ssl_ocsp_path,
|
||||
ssl_require_auth,
|
||||
ssl_resumption,
|
||||
ssl_tickets,
|
||||
snapshot_dir,
|
||||
schedule_snapshot,
|
||||
snapshot_interval_sec,
|
||||
dump_dir,
|
||||
log_level,
|
||||
indexer_options,
|
||||
scheduler_options,
|
||||
import_snapshot: _,
|
||||
ignore_missing_snapshot: _,
|
||||
ignore_snapshot_if_db_exists: _,
|
||||
import_dump: _,
|
||||
ignore_missing_dump: _,
|
||||
ignore_dump_if_db_exists: _,
|
||||
config_file_path: _,
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics,
|
||||
#[cfg(feature = "metrics")]
|
||||
enable_metrics_route,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
if let Some(master_key) = master_key {
|
||||
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_ENV, env);
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
{
|
||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string());
|
||||
export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string());
|
||||
export_to_env_if_not_present(
|
||||
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
|
||||
http_payload_size_limit.to_string(),
|
||||
);
|
||||
if let Some(ssl_cert_path) = ssl_cert_path {
|
||||
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
|
||||
}
|
||||
if let Some(ssl_key_path) = ssl_key_path {
|
||||
export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path);
|
||||
}
|
||||
if let Some(ssl_auth_path) = ssl_auth_path {
|
||||
export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path);
|
||||
}
|
||||
if let Some(ssl_ocsp_path) = ssl_ocsp_path {
|
||||
export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string());
|
||||
export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string());
|
||||
export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string());
|
||||
export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir);
|
||||
export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string());
|
||||
export_to_env_if_not_present(
|
||||
MEILI_SNAPSHOT_INTERVAL_SEC,
|
||||
snapshot_interval_sec.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
|
||||
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level);
|
||||
#[cfg(feature = "metrics")]
|
||||
{
|
||||
export_to_env_if_not_present(
|
||||
MEILI_ENABLE_METRICS_ROUTE,
|
||||
enable_metrics_route.to_string(),
|
||||
);
|
||||
}
|
||||
indexer_options.export_to_env();
|
||||
scheduler_options.export_to_env();
|
||||
}
|
||||
|
||||
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
|
||||
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
|
||||
let config = rustls::ServerConfig::builder().with_safe_defaults();
|
||||
|
||||
let config = match &self.ssl_auth_path {
|
||||
Some(auth_path) => {
|
||||
let roots = load_certs(auth_path.to_path_buf())?;
|
||||
let mut client_auth_roots = RootCertStore::empty();
|
||||
for root in roots {
|
||||
client_auth_roots.add(&root).unwrap();
|
||||
}
|
||||
if self.ssl_require_auth {
|
||||
let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots);
|
||||
config.with_client_cert_verifier(verifier)
|
||||
} else {
|
||||
let verifier =
|
||||
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots);
|
||||
config.with_client_cert_verifier(verifier)
|
||||
}
|
||||
}
|
||||
None => config.with_no_client_auth(),
|
||||
};
|
||||
|
||||
let certs = load_certs(cert_path.to_path_buf())?;
|
||||
let privkey = load_private_key(key_path.to_path_buf())?;
|
||||
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
|
||||
let mut config = config
|
||||
.with_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
|
||||
.map_err(|_| anyhow::anyhow!("bad certificates/private key"))?;
|
||||
|
||||
config.key_log = Arc::new(rustls::KeyLogFile::new());
|
||||
|
||||
if self.ssl_resumption {
|
||||
config.session_storage = ServerSessionMemoryCache::new(256);
|
||||
}
|
||||
|
||||
if self.ssl_tickets {
|
||||
config.ticketer = rustls::Ticketer::new().unwrap();
|
||||
}
|
||||
|
||||
Ok(Some(config))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Parser, Deserialize)]
|
||||
pub struct IndexerOpts {
|
||||
/// Sets the amount of documents to skip before printing
|
||||
/// a log regarding the indexing advancement.
|
||||
#[serde(default = "default_log_every_n")]
|
||||
#[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k
|
||||
pub log_every_n: usize,
|
||||
|
||||
/// Grenad max number of chunks in bytes.
|
||||
#[clap(long, hide = true)]
|
||||
pub max_nb_chunks: Option<usize>,
|
||||
|
||||
/// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
|
||||
/// uses no more than two thirds of available memory.
|
||||
#[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
|
||||
#[serde(default)]
|
||||
pub max_indexing_memory: MaxMemory,
|
||||
|
||||
/// Sets the maximum number of threads Meilisearch can use during indexation. By default, the
|
||||
/// indexer avoids using more than half of a machine's total processing units. This ensures
|
||||
/// Meilisearch is always ready to perform searches, even while you are updating an index.
|
||||
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
|
||||
#[serde(default)]
|
||||
pub max_indexing_threads: MaxThreads,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
/// Exports the values to their corresponding env vars if they are not set.
|
||||
pub fn export_to_env(self) {
|
||||
let IndexerOpts {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
log_every_n: _,
|
||||
max_nb_chunks: _,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_MEMORY,
|
||||
max_indexing_memory.to_string(),
|
||||
);
|
||||
}
|
||||
export_to_env_if_not_present(
|
||||
MEILI_MAX_INDEXING_THREADS,
|
||||
max_indexing_threads.0.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Parser, Default, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", deny_unknown_fields)]
|
||||
pub struct SchedulerConfig {
|
||||
/// Deactivates auto-batching when provided.
|
||||
#[clap(long, env = DISABLE_AUTO_BATCHING)]
|
||||
#[serde(default)]
|
||||
pub disable_auto_batching: bool,
|
||||
}
|
||||
|
||||
impl SchedulerConfig {
|
||||
pub fn export_to_env(self) {
|
||||
let SchedulerConfig { disable_auto_batching } = self;
|
||||
export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
|
||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
||||
.thread_name(|index| format!("indexing-thread:{index}"))
|
||||
.num_threads(*other.max_indexing_threads)
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
log_every_n: Some(other.log_every_n),
|
||||
max_nb_chunks: other.max_nb_chunks,
|
||||
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
||||
thread_pool: Some(thread_pool),
|
||||
max_positions_per_attributes: None,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IndexerOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
log_every_n: 100_000,
|
||||
max_nb_chunks: None,
|
||||
max_indexing_memory: MaxMemory::default(),
|
||||
max_indexing_threads: MaxThreads::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A type used to detect the max memory available and use 2/3 of it.
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxMemory(Option<Byte>);
|
||||
|
||||
impl FromStr for MaxMemory {
|
||||
type Err = ByteError;
|
||||
|
||||
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
|
||||
Byte::from_str(s).map(Some).map(MaxMemory)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxMemory {
|
||||
fn default() -> MaxMemory {
|
||||
MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxMemory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
|
||||
None => f.write_str("unknown"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MaxMemory {
|
||||
type Target = Option<Byte>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl MaxMemory {
|
||||
pub fn unlimited() -> Self {
|
||||
Self(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
||||
fn total_memory_bytes() -> Option<u64> {
|
||||
if System::IS_SUPPORTED {
|
||||
let memory_kind = RefreshKind::new().with_memory();
|
||||
let mut system = System::new_with_specifics(memory_kind);
|
||||
system.refresh_memory();
|
||||
Some(system.total_memory())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
|
||||
pub struct MaxThreads(usize);
|
||||
|
||||
impl FromStr for MaxThreads {
|
||||
type Err = ParseIntError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
usize::from_str(s).map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MaxThreads {
|
||||
fn default() -> Self {
|
||||
MaxThreads(num_cpus::get() / 2)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MaxThreads {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MaxThreads {
|
||||
type Target = usize;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
fn load_certs(filename: PathBuf) -> anyhow::Result<Vec<rustls::Certificate>> {
|
||||
let certfile =
|
||||
fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?;
|
||||
let mut reader = BufReader::new(certfile);
|
||||
certs(&mut reader)
|
||||
.map(|certs| certs.into_iter().map(rustls::Certificate).collect())
|
||||
.map_err(|_| anyhow::anyhow!("cannot read certificate file"))
|
||||
}
|
||||
|
||||
fn load_private_key(filename: PathBuf) -> anyhow::Result<rustls::PrivateKey> {
|
||||
let rsa_keys = {
|
||||
let keyfile = fs::File::open(filename.clone())
|
||||
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
|
||||
let mut reader = BufReader::new(keyfile);
|
||||
rsa_private_keys(&mut reader)
|
||||
.map_err(|_| anyhow::anyhow!("file contains invalid rsa private key"))?
|
||||
};
|
||||
|
||||
let pkcs8_keys = {
|
||||
let keyfile = fs::File::open(filename)
|
||||
.map_err(|_| anyhow::anyhow!("cannot open private key file"))?;
|
||||
let mut reader = BufReader::new(keyfile);
|
||||
pkcs8_private_keys(&mut reader).map_err(|_| {
|
||||
anyhow::anyhow!(
|
||||
"file contains invalid pkcs8 private key (encrypted keys not supported)"
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
// prefer to load pkcs8 keys
|
||||
if !pkcs8_keys.is_empty() {
|
||||
Ok(rustls::PrivateKey(pkcs8_keys[0].clone()))
|
||||
} else {
|
||||
assert!(!rsa_keys.is_empty());
|
||||
Ok(rustls::PrivateKey(rsa_keys[0].clone()))
|
||||
}
|
||||
}
|
||||
|
||||
fn load_ocsp(filename: &Option<PathBuf>) -> anyhow::Result<Vec<u8>> {
|
||||
let mut ret = Vec::new();
|
||||
|
||||
if let Some(ref name) = filename {
|
||||
fs::File::open(name)
|
||||
.map_err(|_| anyhow::anyhow!("cannot open ocsp file"))?
|
||||
.read_to_end(&mut ret)
|
||||
.map_err(|_| anyhow::anyhow!("cannot read oscp file"))?;
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
/// Checks if the key is defined in the environment variables.
|
||||
/// If not, inserts it with the given value.
|
||||
pub fn export_to_env_if_not_present<T>(key: &str, value: T)
|
||||
where
|
||||
T: AsRef<OsStr>,
|
||||
{
|
||||
if let Err(VarError::NotPresent) = std::env::var(key) {
|
||||
std::env::set_var(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute.
|
||||
|
||||
fn default_db_path() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_DB_PATH)
|
||||
}
|
||||
|
||||
pub fn default_http_addr() -> String {
|
||||
DEFAULT_HTTP_ADDR.to_string()
|
||||
}
|
||||
|
||||
fn default_env() -> String {
|
||||
DEFAULT_ENV.to_string()
|
||||
}
|
||||
|
||||
fn default_max_index_size() -> Byte {
|
||||
Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap()
|
||||
}
|
||||
|
||||
fn default_max_task_db_size() -> Byte {
|
||||
Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap()
|
||||
}
|
||||
|
||||
fn default_http_payload_size_limit() -> Byte {
|
||||
Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
|
||||
}
|
||||
|
||||
fn default_snapshot_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_SNAPSHOT_DIR)
|
||||
}
|
||||
|
||||
fn default_snapshot_interval_sec() -> u64 {
|
||||
DEFAULT_SNAPSHOT_INTERVAL_SEC
|
||||
}
|
||||
|
||||
fn default_dump_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||
}
|
||||
|
||||
fn default_log_level() -> String {
|
||||
DEFAULT_LOG_LEVEL.to_string()
|
||||
}
|
||||
|
||||
fn default_log_every_n() -> usize {
|
||||
DEFAULT_LOG_EVERY_N
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_valid_opt() {
|
||||
assert!(Opt::try_parse_from(Some("")).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_meilli_config_file_path_valid() {
|
||||
temp_env::with_vars(
|
||||
vec![("MEILI_CONFIG_FILE_PATH", Some("../config.toml"))], // Relative path in meilisearch package
|
||||
|| {
|
||||
assert!(Opt::try_build().is_ok());
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_meilli_config_file_path_invalid() {
|
||||
temp_env::with_vars(vec![("MEILI_CONFIG_FILE_PATH", Some("../configgg.toml"))], || {
|
||||
let possible_error_messages = [
|
||||
"unable to open or read the \"../configgg.toml\" configuration file: No such file or directory (os error 2).",
|
||||
"unable to open or read the \"../configgg.toml\" configuration file: The system cannot find the file specified. (os error 2).", // Windows
|
||||
];
|
||||
let error_message = Opt::try_build().unwrap_err().to_string();
|
||||
assert!(
|
||||
possible_error_messages.contains(&error_message.as_str()),
|
||||
"Expected onf of {:?}, got {:?}.",
|
||||
possible_error_messages,
|
||||
error_message
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
104
meilisearch/src/route_metrics.rs
Normal file
104
meilisearch/src/route_metrics.rs
Normal file
|
@ -0,0 +1,104 @@
|
|||
use std::future::{ready, Ready};
|
||||
|
||||
use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
|
||||
use actix_web::http::header;
|
||||
use actix_web::{Error, HttpResponse};
|
||||
use futures_util::future::LocalBoxFuture;
|
||||
use meilisearch_auth::actions;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use prometheus::{Encoder, HistogramTimer, TextEncoder};
|
||||
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
|
||||
pub async fn get_metrics(
|
||||
meilisearch: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, MeiliSearch>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let search_rules = &meilisearch.filters().search_rules;
|
||||
let response = meilisearch.get_all_stats(search_rules).await?;
|
||||
|
||||
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
|
||||
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
||||
|
||||
for (index, value) in response.indexes.iter() {
|
||||
crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
|
||||
.with_label_values(&[index])
|
||||
.set(value.number_of_documents as i64);
|
||||
}
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
|
||||
|
||||
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
|
||||
|
||||
Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response))
|
||||
}
|
||||
|
||||
pub struct RouteMetrics;
|
||||
|
||||
// Middleware factory is `Transform` trait from actix-service crate
|
||||
// `S` - type of the next service
|
||||
// `B` - type of response's body
|
||||
impl<S, B> Transform<S, ServiceRequest> for RouteMetrics
|
||||
where
|
||||
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type InitError = ();
|
||||
type Transform = RouteMetricsMiddleware<S>;
|
||||
type Future = Ready<Result<Self::Transform, Self::InitError>>;
|
||||
|
||||
fn new_transform(&self, service: S) -> Self::Future {
|
||||
ready(Ok(RouteMetricsMiddleware { service }))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RouteMetricsMiddleware<S> {
|
||||
service: S,
|
||||
}
|
||||
|
||||
impl<S, B> Service<ServiceRequest> for RouteMetricsMiddleware<S>
|
||||
where
|
||||
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type Future = LocalBoxFuture<'static, Result<Self::Response, Self::Error>>;
|
||||
|
||||
dev::forward_ready!(service);
|
||||
|
||||
fn call(&self, req: ServiceRequest) -> Self::Future {
|
||||
let mut histogram_timer: Option<HistogramTimer> = None;
|
||||
let request_path = req.path();
|
||||
let is_registered_resource = req.resource_map().has_resource(request_path);
|
||||
if is_registered_resource {
|
||||
let request_method = req.method().to_string();
|
||||
histogram_timer = Some(
|
||||
crate::metrics::HTTP_RESPONSE_TIME_SECONDS
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.start_timer(),
|
||||
);
|
||||
crate::metrics::HTTP_REQUESTS_TOTAL
|
||||
.with_label_values(&[&request_method, request_path])
|
||||
.inc();
|
||||
}
|
||||
|
||||
let fut = self.service.call(req);
|
||||
|
||||
Box::pin(async move {
|
||||
let res = fut.await?;
|
||||
|
||||
if let Some(histogram_timer) = histogram_timer {
|
||||
histogram_timer.observe_duration();
|
||||
};
|
||||
Ok(res)
|
||||
})
|
||||
}
|
||||
}
|
158
meilisearch/src/routes/api_key.rs
Normal file
158
meilisearch/src/routes/api_key.rs
Normal file
|
@ -0,0 +1,158 @@
|
|||
use std::str;
|
||||
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use meilisearch_auth::error::AuthControllerError;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::Pagination;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::post().to(SeqHandler(create_api_key)))
|
||||
.route(web::get().to(SeqHandler(list_api_keys))),
|
||||
)
|
||||
.service(
|
||||
web::resource("/{key}")
|
||||
.route(web::get().to(SeqHandler(get_api_key)))
|
||||
.route(web::patch().to(SeqHandler(patch_api_key)))
|
||||
.route(web::delete().to(SeqHandler(delete_api_key))),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn create_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
|
||||
body: web::Json<Value>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let v = body.into_inner();
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let key = auth_controller.create_key(v)?;
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Created().json(res))
|
||||
}
|
||||
|
||||
pub async fn list_api_keys(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
paginate: web::Query<Pagination>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let keys = auth_controller.list_keys()?;
|
||||
let page_view = paginate
|
||||
.auto_paginate_sized(keys.into_iter().map(|k| KeyView::from_key(k, &auth_controller)));
|
||||
|
||||
Ok(page_view)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(page_view))
|
||||
}
|
||||
|
||||
pub async fn get_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.get_key(uid)?;
|
||||
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(res))
|
||||
}
|
||||
|
||||
pub async fn patch_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
|
||||
body: web::Json<Value>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
let body = body.into_inner();
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.update_key(uid, body)?;
|
||||
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(res))
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, AuthController>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
auth_controller.delete_key(uid)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct AuthParam {
|
||||
key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct KeyView {
|
||||
name: Option<String>,
|
||||
description: Option<String>,
|
||||
key: String,
|
||||
uid: Uuid,
|
||||
actions: Vec<Action>,
|
||||
indexes: Vec<String>,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||
expires_at: Option<OffsetDateTime>,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
created_at: OffsetDateTime,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
updated_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl KeyView {
|
||||
fn from_key(key: Key, auth: &AuthController) -> Self {
|
||||
let generated_key = auth.generate_key(key.uid).unwrap_or_default();
|
||||
|
||||
KeyView {
|
||||
name: key.name,
|
||||
description: key.description,
|
||||
key: generated_key,
|
||||
uid: key.uid,
|
||||
actions: key.actions,
|
||||
indexes: key.indexes.into_iter().map(String::from).collect(),
|
||||
expires_at: key.expires_at,
|
||||
created_at: key.created_at,
|
||||
updated_at: key.updated_at,
|
||||
}
|
||||
}
|
||||
}
|
37
meilisearch/src/routes/dump.rs
Normal file
37
meilisearch/src/routes/dump.rs
Normal file
|
@ -0,0 +1,37 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
||||
}
|
||||
|
||||
pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
|
||||
|
||||
let task = KindWithContent::DumpCreation {
|
||||
keys: auth_controller.list_keys()?,
|
||||
instance_uid: analytics.instance_uid().cloned(),
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
438
meilisearch/src/routes/indexes/documents.rs
Normal file
438
meilisearch/src/routes/indexes/documents.rs
Normal file
|
@ -0,0 +1,438 @@
|
|||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
|
||||
use bstr::ByteSlice;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::star_or::StarOr;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::{milli, Document, Index};
|
||||
use mime::Mime;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::Deserialize;
|
||||
use serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use std::io::ErrorKind;
|
||||
use tempfile::tempfile;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
|
||||
|
||||
use crate::analytics::{Analytics, DocumentDeletionKind};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::error::PayloadError::ReceivePayloadErr;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{fold_star_or, PaginationView, SummarizedTaskView};
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
|
||||
});
|
||||
|
||||
/// Extracts the mime type from the content type and return
|
||||
/// a meilisearch error if anything bad happen.
|
||||
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
|
||||
match req.mime_type() {
|
||||
Ok(Some(mime)) => Ok(Some(mime)),
|
||||
Ok(None) => Ok(None),
|
||||
Err(_) => match req.headers().get(CONTENT_TYPE) {
|
||||
Some(content_type) => Err(MeilisearchHttpError::InvalidContentType(
|
||||
content_type.as_bytes().as_bstr().to_string(),
|
||||
ACCEPTED_CONTENT_TYPE.clone(),
|
||||
)),
|
||||
None => Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct DocumentParam {
|
||||
index_uid: String,
|
||||
document_id: String,
|
||||
}
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_all_documents)))
|
||||
.route(web::post().to(SeqHandler(add_documents)))
|
||||
.route(web::put().to(SeqHandler(update_documents)))
|
||||
.route(web::delete().to(SeqHandler(clear_all_documents))),
|
||||
)
|
||||
// this route needs to be before the /documents/{document_id} to match properly
|
||||
.service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
|
||||
.service(
|
||||
web::resource("/{document_id}")
|
||||
.route(web::get().to(SeqHandler(get_document)))
|
||||
.route(web::delete().to(SeqHandler(delete_document))),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct GetDocument {
|
||||
fields: Option<CS<StarOr<String>>>,
|
||||
}
|
||||
|
||||
pub async fn get_document(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||
path: web::Path<DocumentParam>,
|
||||
params: web::Query<GetDocument>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let GetDocument { fields } = params.into_inner();
|
||||
let attributes_to_retrieve = fields.and_then(fold_star_or);
|
||||
|
||||
let index = index_scheduler.index(&path.index_uid)?;
|
||||
let document = retrieve_document(&index, &path.document_id, attributes_to_retrieve)?;
|
||||
debug!("returns: {:?}", document);
|
||||
Ok(HttpResponse::Ok().json(document))
|
||||
}
|
||||
|
||||
pub async fn delete_document(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<DocumentParam>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req);
|
||||
|
||||
let DocumentParam { document_id, index_uid } = path.into_inner();
|
||||
let task = KindWithContent::DocumentDeletion { index_uid, documents_ids: vec![document_id] };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct BrowseQuery {
|
||||
#[serde(default)]
|
||||
offset: usize,
|
||||
#[serde(default = "crate::routes::PAGINATION_DEFAULT_LIMIT")]
|
||||
limit: usize,
|
||||
fields: Option<CS<StarOr<String>>>,
|
||||
}
|
||||
|
||||
pub async fn get_all_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: web::Query<BrowseQuery>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
let BrowseQuery { limit, offset, fields } = params.into_inner();
|
||||
let attributes_to_retrieve = fields.and_then(fold_star_or);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let (total, documents) = retrieve_documents(&index, offset, limit, attributes_to_retrieve)?;
|
||||
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
|
||||
debug!("returns: {:?}", ret);
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct UpdateDocumentsQuery {
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn add_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
let params = params.into_inner();
|
||||
|
||||
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid.into_inner(),
|
||||
params.primary_key,
|
||||
body,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn update_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
path: web::Path<String>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
let index_uid = path.into_inner();
|
||||
|
||||
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
params.into_inner().primary_key,
|
||||
body,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
async fn document_addition(
|
||||
mime_type: Option<Mime>,
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
mut body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
allow_index_creation: bool,
|
||||
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
|
||||
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
|
||||
Some(("application", "json")) => PayloadType::Json,
|
||||
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
|
||||
Some(("text", "csv")) => PayloadType::Csv,
|
||||
Some((type_, subtype)) => {
|
||||
return Err(MeilisearchHttpError::InvalidContentType(
|
||||
format!("{}/{}", type_, subtype),
|
||||
ACCEPTED_CONTENT_TYPE.clone(),
|
||||
))
|
||||
}
|
||||
None => {
|
||||
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
|
||||
}
|
||||
};
|
||||
|
||||
// is your indexUid valid?
|
||||
let index_uid = IndexUid::try_from(index_uid)?.into_inner();
|
||||
|
||||
let (uuid, mut update_file) = index_scheduler.create_update_file()?;
|
||||
|
||||
let temp_file = match tempfile() {
|
||||
Ok(temp_file) => temp_file,
|
||||
Err(e) => {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
|
||||
}
|
||||
};
|
||||
|
||||
let mut buffer = File::from_std(temp_file);
|
||||
|
||||
let mut buffer_write_size: usize = 0;
|
||||
while let Some(bytes) = body.next().await {
|
||||
let byte = &bytes?;
|
||||
|
||||
if byte.is_empty() && buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
match buffer.write_all(byte).await {
|
||||
Ok(()) => buffer_write_size += 1,
|
||||
Err(e) => {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.flush().await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
|
||||
}
|
||||
|
||||
if buffer_write_size == 0 {
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayloadErr(Box::new(e))));
|
||||
};
|
||||
|
||||
let read_file = buffer.into_std().await;
|
||||
|
||||
let documents_count =
|
||||
tokio::task::spawn_blocking(move || -> Result<_, MeilisearchHttpError> {
|
||||
let documents_count = match format {
|
||||
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
|
||||
PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?,
|
||||
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
|
||||
};
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
Ok(documents_count)
|
||||
})
|
||||
.await;
|
||||
|
||||
let documents_count = match documents_count {
|
||||
Ok(Ok(documents_count)) => documents_count as u64,
|
||||
// in this case the file has not possibly be persisted.
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Err(e) => {
|
||||
// Here the file MAY have been persisted or not.
|
||||
// We don't know thus we ignore the file not found error.
|
||||
match index_scheduler.delete_update_file(uuid) {
|
||||
Ok(()) => (),
|
||||
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
|
||||
if e.kind() == ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}");
|
||||
}
|
||||
}
|
||||
// We still want to return the original error to the end user.
|
||||
return Err(e.into());
|
||||
}
|
||||
};
|
||||
|
||||
let task = KindWithContent::DocumentAdditionOrUpdate {
|
||||
method,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
primary_key,
|
||||
allow_index_creation,
|
||||
index_uid,
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
index_scheduler.delete_update_file(uuid)?;
|
||||
return Err(e.into());
|
||||
}
|
||||
};
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
pub async fn delete_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<String>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
|
||||
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
|
||||
|
||||
let ids = body
|
||||
.iter()
|
||||
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
|
||||
.collect();
|
||||
|
||||
let task =
|
||||
KindWithContent::DocumentDeletion { index_uid: path.into_inner(), documents_ids: ids };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn clear_all_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
|
||||
|
||||
let task = KindWithContent::DocumentClear { index_uid: path.into_inner() };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
fn all_documents<'a>(
|
||||
index: &Index,
|
||||
rtxn: &'a RoTxn,
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
Ok(index.all_documents(rtxn)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
|
||||
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
||||
fn retrieve_documents<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let mut documents = Vec::new();
|
||||
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document?,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document?,
|
||||
};
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
let number_of_documents = index.number_of_documents(&rtxn)?;
|
||||
Ok((number_of_documents, documents))
|
||||
}
|
||||
|
||||
fn retrieve_document<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
doc_id: &str,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Document, ResponseError> {
|
||||
let txn = index.read_txn()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&txn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let internal_id = index
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = index
|
||||
.documents(&txn, std::iter::once(internal_id))?
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|(_, d)| d)
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document,
|
||||
};
|
||||
|
||||
Ok(document)
|
||||
}
|
214
meilisearch/src/routes/indexes/mod.rs
Normal file
214
meilisearch/src/routes/indexes/mod.rs
Normal file
|
@ -0,0 +1,214 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::{self, FieldDistribution, Index};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Pagination, SummarizedTaskView};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub mod documents;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(list_indexes))
|
||||
.route(web::post().to(SeqHandler(create_index))),
|
||||
)
|
||||
.service(
|
||||
web::scope("/{index_uid}")
|
||||
.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_index)))
|
||||
.route(web::patch().to(SeqHandler(update_index)))
|
||||
.route(web::delete().to(SeqHandler(delete_index))),
|
||||
)
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexView {
|
||||
pub uid: String,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub created_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub updated_at: OffsetDateTime,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
impl IndexView {
|
||||
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
|
||||
let rtxn = index.read_txn()?;
|
||||
Ok(IndexView {
|
||||
uid,
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_indexes(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
||||
paginate: web::Query<Pagination>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let search_rules = &index_scheduler.filters().search_rules;
|
||||
let indexes: Vec<_> = index_scheduler.indexes()?;
|
||||
let indexes = indexes
|
||||
.into_iter()
|
||||
.filter(|(name, _)| search_rules.is_index_authorized(name))
|
||||
.map(|(name, index)| IndexView::new(name, &index))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
let ret = paginate.auto_paginate_sized(indexes.into_iter());
|
||||
|
||||
debug!("returns: {:?}", ret);
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct IndexCreateRequest {
|
||||
uid: String,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn create_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
|
||||
body: web::Json<IndexCreateRequest>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let IndexCreateRequest { primary_key, uid } = body.into_inner();
|
||||
let uid = IndexUid::try_from(uid)?.into_inner();
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid);
|
||||
if allow_index_creation {
|
||||
analytics.publish(
|
||||
"Index Created".to_string(),
|
||||
json!({ "primary_key": primary_key }),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid, primary_key };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
Err(AuthenticationError::InvalidToken.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
#[allow(dead_code)]
|
||||
pub struct UpdateIndexRequest {
|
||||
uid: Option<String>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn get_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let index_view = IndexView::new(index_uid.into_inner(), &index)?;
|
||||
|
||||
debug!("returns: {:?}", index_view);
|
||||
|
||||
Ok(HttpResponse::Ok().json(index_view))
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
|
||||
path: web::Path<String>,
|
||||
body: web::Json<UpdateIndexRequest>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
let body = body.into_inner();
|
||||
analytics.publish(
|
||||
"Index Updated".to_string(),
|
||||
json!({ "primary_key": body.primary_key}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexUpdate {
|
||||
index_uid: path.into_inner(),
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn delete_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get_index_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": true }), Some(&req));
|
||||
|
||||
let stats = IndexStats::new((*index_scheduler).clone(), index_uid.into_inner())?;
|
||||
|
||||
debug!("returns: {:?}", stats);
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexStats {
|
||||
pub number_of_documents: u64,
|
||||
pub is_indexing: bool,
|
||||
pub field_distribution: FieldDistribution,
|
||||
}
|
||||
|
||||
impl IndexStats {
|
||||
pub fn new(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
index_uid: String,
|
||||
) -> Result<Self, ResponseError> {
|
||||
// we check if there is currently a task processing associated with this index.
|
||||
let is_processing = index_scheduler.is_index_processing(&index_uid)?;
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(&rtxn)?,
|
||||
is_indexing: is_processing,
|
||||
field_distribution: index.field_distribution(&rtxn)?,
|
||||
})
|
||||
}
|
||||
}
|
225
meilisearch/src/routes/indexes/search.rs
Normal file
225
meilisearch/src/routes/indexes/search.rs
Normal file
|
@ -0,0 +1,225 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use serde::Deserialize;
|
||||
use serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(search_with_url_query)))
|
||||
.route(web::post().to(SeqHandler(search_with_post))),
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SearchQueryGet {
|
||||
q: Option<String>,
|
||||
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
|
||||
offset: usize,
|
||||
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
||||
limit: usize,
|
||||
page: Option<usize>,
|
||||
hits_per_page: Option<usize>,
|
||||
attributes_to_retrieve: Option<CS<String>>,
|
||||
attributes_to_crop: Option<CS<String>>,
|
||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
||||
crop_length: usize,
|
||||
attributes_to_highlight: Option<CS<String>>,
|
||||
filter: Option<String>,
|
||||
sort: Option<String>,
|
||||
#[serde(default = "Default::default")]
|
||||
show_matches_position: bool,
|
||||
facets: Option<CS<String>>,
|
||||
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
|
||||
highlight_pre_tag: String,
|
||||
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
|
||||
highlight_post_tag: String,
|
||||
#[serde(default = "DEFAULT_CROP_MARKER")]
|
||||
crop_marker: String,
|
||||
#[serde(default)]
|
||||
matching_strategy: MatchingStrategy,
|
||||
}
|
||||
|
||||
impl From<SearchQueryGet> for SearchQuery {
|
||||
fn from(other: SearchQueryGet) -> Self {
|
||||
let filter = match other.filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
_ => Some(Value::String(f)),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
Self {
|
||||
q: other.q,
|
||||
offset: other.offset,
|
||||
limit: other.limit,
|
||||
page: other.page,
|
||||
hits_per_page: other.hits_per_page,
|
||||
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
||||
crop_length: other.crop_length,
|
||||
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
||||
filter,
|
||||
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
|
||||
show_matches_position: other.show_matches_position,
|
||||
facets: other.facets.map(|o| o.into_iter().collect()),
|
||||
highlight_pre_tag: other.highlight_pre_tag,
|
||||
highlight_post_tag: other.highlight_post_tag,
|
||||
crop_marker: other.crop_marker,
|
||||
matching_strategy: other.matching_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Incorporate search rules in search query
|
||||
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
query.filter = match (query.filter.take(), rules.filter) {
|
||||
(None, rules_filter) => rules_filter,
|
||||
(filter, None) => filter,
|
||||
(Some(filter), Some(rules_filter)) => {
|
||||
let filter = match filter {
|
||||
Value::Array(filter) => filter,
|
||||
filter => vec![filter],
|
||||
};
|
||||
let rules_filter = match rules_filter {
|
||||
Value::Array(rules_filter) => rules_filter,
|
||||
rules_filter => vec![rules_filter],
|
||||
};
|
||||
|
||||
Some(Value::Array([filter, rules_filter].concat()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
|
||||
|
||||
/// Transform the sort query parameter into something that matches the post expected format.
|
||||
fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
||||
let mut sort_parameters = Vec::new();
|
||||
let mut merge = false;
|
||||
for current_sort in sort_query.trim_matches('"').split(',').map(|s| s.trim()) {
|
||||
if current_sort.starts_with("_geoPoint(") {
|
||||
sort_parameters.push(current_sort.to_string());
|
||||
merge = true;
|
||||
} else if merge && !sort_parameters.is_empty() {
|
||||
let s = sort_parameters.last_mut().unwrap();
|
||||
s.push(',');
|
||||
s.push_str(current_sort);
|
||||
if current_sort.ends_with("):desc") || current_sort.ends_with("):asc") {
|
||||
merge = false;
|
||||
}
|
||||
} else {
|
||||
sort_parameters.push(current_sort.to_string());
|
||||
merge = false;
|
||||
}
|
||||
}
|
||||
sort_parameters
|
||||
}
|
||||
|
||||
pub async fn search_with_url_query(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: web::Query<SearchQueryGet>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
let mut query: SearchQuery = params.into_inner().into();
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) =
|
||||
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
|
||||
{
|
||||
add_search_rules(&mut query, search_rules);
|
||||
}
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.get_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
pub async fn search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: web::Json<SearchQuery>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let mut query = params.into_inner();
|
||||
debug!("search called with params: {:?}", query);
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) =
|
||||
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
|
||||
{
|
||||
add_search_rules(&mut query, search_rules);
|
||||
}
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.post_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fix_sort_query_parameters() {
|
||||
let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc");
|
||||
assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]);
|
||||
let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc");
|
||||
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]);
|
||||
let sort = fix_sort_query_parameters(
|
||||
"doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc",
|
||||
);
|
||||
assert_eq!(
|
||||
sort,
|
||||
vec![
|
||||
"doggo:asc".to_string(),
|
||||
"_geoPoint(12.45,13.56,2590352):desc".to_string(),
|
||||
"catto:desc".to_string(),
|
||||
]
|
||||
);
|
||||
let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc");
|
||||
// This is ugly but eh, I don't want to write a full parser just for this unused route
|
||||
assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]);
|
||||
}
|
||||
}
|
565
meilisearch/src/routes/indexes/settings.rs
Normal file
565
meilisearch/src/routes/indexes/settings.rs
Normal file
|
@ -0,0 +1,565 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::settings::{settings, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
($route:literal, $update_verb:ident, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
|
||||
pub mod $attr {
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::{settings, Settings};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use $crate::analytics::Analytics;
|
||||
use $crate::extractors::authentication::policies::*;
|
||||
use $crate::extractors::authentication::GuardedData;
|
||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||
use $crate::routes::SummarizedTaskView;
|
||||
|
||||
pub async fn delete(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let new_settings = Settings { $attr: Setting::Reset, ..Default::default() };
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn update(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::SETTINGS_UPDATE }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: actix_web::web::Path<String>,
|
||||
body: actix_web::web::Json<Option<$type>>,
|
||||
req: HttpRequest,
|
||||
$analytics_var: web::Data<dyn Analytics>,
|
||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||
let body = body.into_inner();
|
||||
|
||||
$analytics(&body, &req);
|
||||
|
||||
let new_settings = Settings {
|
||||
$attr: match body {
|
||||
Some(inner_body) => Setting::Set(inner_body),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
index_scheduler: GuardedData<
|
||||
ActionPolicy<{ actions::SETTINGS_GET }>,
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: actix_web::web::Path<String>,
|
||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let settings = settings(&index, &rtxn)?;
|
||||
|
||||
debug!("returns: {:?}", settings);
|
||||
let mut json = serde_json::json!(&settings);
|
||||
let val = json[$camelcase_attr].take();
|
||||
|
||||
Ok(HttpResponse::Ok().json(val))
|
||||
}
|
||||
|
||||
pub fn resources() -> Resource {
|
||||
Resource::new($route)
|
||||
.route(web::get().to(SeqHandler(get)))
|
||||
.route(web::$update_verb().to(SeqHandler(update)))
|
||||
.route(web::delete().to(SeqHandler(delete)))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
make_setting_route!(
|
||||
"/filterable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
filterable_attributes,
|
||||
"filterableAttributes",
|
||||
analytics,
|
||||
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"FilterableAttributes Updated".to_string(),
|
||||
json!({
|
||||
"filterable_attributes": {
|
||||
"total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0),
|
||||
"has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/sortable-attributes",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
sortable_attributes,
|
||||
"sortableAttributes",
|
||||
analytics,
|
||||
|setting: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"SortableAttributes Updated".to_string(),
|
||||
json!({
|
||||
"sortable_attributes": {
|
||||
"total": setting.as_ref().map(|sort| sort.len()),
|
||||
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/displayed-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
displayed_attributes,
|
||||
"displayedAttributes",
|
||||
analytics,
|
||||
|displayed: &Option<Vec<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"DisplayedAttributes Updated".to_string(),
|
||||
json!({
|
||||
"displayed_attributes": {
|
||||
"total": displayed.as_ref().map(|displayed| displayed.len()),
|
||||
"with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/typo-tolerance",
|
||||
patch,
|
||||
meilisearch_types::settings::TypoSettings,
|
||||
typo_tolerance,
|
||||
"typoTolerance",
|
||||
analytics,
|
||||
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"TypoTolerance Updated".to_string(),
|
||||
json!({
|
||||
"typo_tolerance": {
|
||||
"enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
|
||||
"disable_on_attributes": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
|
||||
"disable_on_words": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
|
||||
"min_word_size_for_one_typo": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.one_typo.set()))
|
||||
.flatten(),
|
||||
"min_word_size_for_two_typos": setting
|
||||
.as_ref()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.two_typos.set()))
|
||||
.flatten(),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/searchable-attributes",
|
||||
put,
|
||||
Vec<String>,
|
||||
searchable_attributes,
|
||||
"searchableAttributes",
|
||||
analytics,
|
||||
|setting: &Option<Vec<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"SearchableAttributes Updated".to_string(),
|
||||
json!({
|
||||
"searchable_attributes": {
|
||||
"total": setting.as_ref().map(|searchable| searchable.len()),
|
||||
"with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/stop-words",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
stop_words,
|
||||
"stopWords",
|
||||
analytics,
|
||||
|stop_words: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"StopWords Updated".to_string(),
|
||||
json!({
|
||||
"stop_words": {
|
||||
"total": stop_words.as_ref().map(|stop_words| stop_words.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
std::collections::BTreeMap<String, Vec<String>>,
|
||||
synonyms,
|
||||
"synonyms",
|
||||
analytics,
|
||||
|synonyms: &Option<std::collections::BTreeMap<String, Vec<String>>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"Synonyms Updated".to_string(),
|
||||
json!({
|
||||
"synonyms": {
|
||||
"total": synonyms.as_ref().map(|synonyms| synonyms.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/distinct-attribute",
|
||||
put,
|
||||
String,
|
||||
distinct_attribute,
|
||||
"distinctAttribute",
|
||||
analytics,
|
||||
|distinct: &Option<String>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
analytics.publish(
|
||||
"DistinctAttribute Updated".to_string(),
|
||||
json!({
|
||||
"distinct_attribute": {
|
||||
"set": distinct.is_some(),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/ranking-rules",
|
||||
put,
|
||||
Vec<String>,
|
||||
ranking_rules,
|
||||
"rankingRules",
|
||||
analytics,
|
||||
|setting: &Option<Vec<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"RankingRules Updated".to_string(),
|
||||
json!({
|
||||
"ranking_rules": {
|
||||
"words_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "words")),
|
||||
"typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "typo")),
|
||||
"proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "proximity")),
|
||||
"attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "attribute")),
|
||||
"sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "sort")),
|
||||
"exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "exactness")),
|
||||
"values": setting.as_ref().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::<Vec<_>>().join(", ")),
|
||||
}
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/faceting",
|
||||
patch,
|
||||
meilisearch_types::settings::FacetingSettings,
|
||||
faceting,
|
||||
"faceting",
|
||||
analytics,
|
||||
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"Faceting Updated".to_string(),
|
||||
json!({
|
||||
"faceting": {
|
||||
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/pagination",
|
||||
patch,
|
||||
meilisearch_types::settings::PaginationSettings,
|
||||
pagination,
|
||||
"pagination",
|
||||
analytics,
|
||||
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"Pagination Updated".to_string(),
|
||||
json!({
|
||||
"pagination": {
|
||||
"max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
macro_rules! generate_configure {
|
||||
($($mod:ident),*) => {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::patch().to(SeqHandler(update_all)))
|
||||
.route(web::get().to(SeqHandler(get_all)))
|
||||
.route(web::delete().to(SeqHandler(delete_all))))
|
||||
$(.service($mod::resources()))*;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
generate_configure!(
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
stop_words,
|
||||
synonyms,
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
pagination,
|
||||
faceting
|
||||
);
|
||||
|
||||
pub async fn update_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: web::Json<Settings<Unchecked>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let new_settings = body.into_inner();
|
||||
|
||||
analytics.publish(
|
||||
"Settings Updated".to_string(),
|
||||
json!({
|
||||
"ranking_rules": {
|
||||
"words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "words")),
|
||||
"typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "typo")),
|
||||
"proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "proximity")),
|
||||
"attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "attribute")),
|
||||
"sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "sort")),
|
||||
"exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "exactness")),
|
||||
"values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::<Vec<_>>().join(", ")),
|
||||
},
|
||||
"searchable_attributes": {
|
||||
"total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
|
||||
"with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")),
|
||||
},
|
||||
"displayed_attributes": {
|
||||
"total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()),
|
||||
"with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")),
|
||||
},
|
||||
"sortable_attributes": {
|
||||
"total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
|
||||
"has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
|
||||
},
|
||||
"filterable_attributes": {
|
||||
"total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
|
||||
"has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
|
||||
},
|
||||
"distinct_attribute": {
|
||||
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||
},
|
||||
"typo_tolerance": {
|
||||
"enabled": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.enabled.as_ref().set())
|
||||
.copied(),
|
||||
"disable_on_attributes": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
|
||||
"disable_on_words": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
|
||||
"min_word_size_for_one_typo": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.one_typo.set()))
|
||||
.flatten(),
|
||||
"min_word_size_for_two_typos": new_settings.typo_tolerance
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.min_word_size_for_typos
|
||||
.as_ref()
|
||||
.set()
|
||||
.map(|s| s.two_typos.set()))
|
||||
.flatten(),
|
||||
},
|
||||
"faceting": {
|
||||
"max_values_per_facet": new_settings.faceting
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.max_values_per_facet.as_ref().set()),
|
||||
},
|
||||
"pagination": {
|
||||
"max_total_hits": new_settings.pagination
|
||||
.as_ref()
|
||||
.set()
|
||||
.and_then(|s| s.max_total_hits.as_ref().set()),
|
||||
},
|
||||
"stop_words": {
|
||||
"total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()),
|
||||
},
|
||||
"synonyms": {
|
||||
"total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
|
||||
},
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let new_settings = settings(&index, &rtxn)?;
|
||||
debug!("returns: {:?}", new_settings);
|
||||
Ok(HttpResponse::Ok().json(new_settings))
|
||||
}
|
||||
|
||||
pub async fn delete_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let new_settings = Settings::cleared().into_unchecked();
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation;
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
|
||||
let task = KindWithContent::SettingsUpdate {
|
||||
index_uid,
|
||||
new_settings: Box::new(new_settings),
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
339
meilisearch/src/routes/mod.rs
Normal file
339
meilisearch/src/routes/mod.rs
Normal file
|
@ -0,0 +1,339 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::{IndexScheduler, Query};
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::star_or::StarOr;
|
||||
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use self::indexes::IndexStats;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
|
||||
mod api_key;
|
||||
mod dump;
|
||||
pub mod indexes;
|
||||
mod swap_indexes;
|
||||
pub mod tasks;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
.service(web::scope("/keys").configure(api_key::configure))
|
||||
.service(web::scope("/dumps").configure(dump::configure))
|
||||
.service(web::resource("/stats").route(web::get().to(get_stats)))
|
||||
.service(web::resource("/version").route(web::get().to(get_version)))
|
||||
.service(web::scope("/indexes").configure(indexes::configure))
|
||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
|
||||
}
|
||||
|
||||
/// Extracts the raw values from the `StarOr` types and
|
||||
/// return None if a `StarOr::Star` is encountered.
|
||||
pub fn fold_star_or<T, O>(content: impl IntoIterator<Item = StarOr<T>>) -> Option<O>
|
||||
where
|
||||
O: FromIterator<T>,
|
||||
{
|
||||
content
|
||||
.into_iter()
|
||||
.map(|value| match value {
|
||||
StarOr::Star => None,
|
||||
StarOr::Other(val) => Some(val),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
const PAGINATION_DEFAULT_LIMIT: fn() -> usize = || 20;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
task_uid: TaskId,
|
||||
index_uid: Option<String>,
|
||||
status: Status,
|
||||
#[serde(rename = "type")]
|
||||
kind: Kind,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
enqueued_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl From<Task> for SummarizedTaskView {
|
||||
fn from(task: Task) -> Self {
|
||||
SummarizedTaskView {
|
||||
task_uid: task.uid,
|
||||
index_uid: task.index_uid().map(|s| s.to_string()),
|
||||
status: task.status,
|
||||
kind: task.kind.as_kind(),
|
||||
enqueued_at: task.enqueued_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct Pagination {
|
||||
#[serde(default)]
|
||||
pub offset: usize,
|
||||
#[serde(default = "PAGINATION_DEFAULT_LIMIT")]
|
||||
pub limit: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PaginationView<T> {
|
||||
pub results: Vec<T>,
|
||||
pub offset: usize,
|
||||
pub limit: usize,
|
||||
pub total: usize,
|
||||
}
|
||||
|
||||
impl Pagination {
|
||||
/// Given the full data to paginate, returns the selected section.
|
||||
pub fn auto_paginate_sized<T>(
|
||||
self,
|
||||
content: impl IntoIterator<Item = T> + ExactSizeIterator,
|
||||
) -> PaginationView<T>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let total = content.len();
|
||||
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
|
||||
self.format_with(total, content)
|
||||
}
|
||||
|
||||
/// Given an iterator and the total number of elements, returns the selected section.
|
||||
pub fn auto_paginate_unsized<T>(
|
||||
self,
|
||||
total: usize,
|
||||
content: impl IntoIterator<Item = T>,
|
||||
) -> PaginationView<T>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect();
|
||||
self.format_with(total, content)
|
||||
}
|
||||
|
||||
/// Given the data already paginated + the total number of elements, it stores
|
||||
/// everything in a [PaginationResult].
|
||||
pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<T>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
PaginationView { results, offset: self.offset, limit: self.limit, total }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PaginationView<T> {
|
||||
pub fn new(offset: usize, limit: usize, total: usize, results: Vec<T>) -> Self {
|
||||
Self { offset, limit, results, total }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[serde(tag = "name")]
|
||||
pub enum UpdateType {
|
||||
ClearAll,
|
||||
Customs,
|
||||
DocumentsAddition {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
number: Option<usize>,
|
||||
},
|
||||
DocumentsPartial {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
number: Option<usize>,
|
||||
},
|
||||
DocumentsDeletion {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
number: Option<usize>,
|
||||
},
|
||||
Settings {
|
||||
settings: Settings<Unchecked>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProcessedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
pub duration: f64, // in seconds
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub processed_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FailedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
pub error: ResponseError,
|
||||
pub duration: f64, // in seconds
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub processed_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct EnqueuedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub enqueued_at: OffsetDateTime,
|
||||
#[serde(skip_serializing_if = "Option::is_none", with = "time::serde::rfc3339::option")]
|
||||
pub started_processing_at: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", tag = "status")]
|
||||
pub enum UpdateStatusResponse {
|
||||
Enqueued {
|
||||
#[serde(flatten)]
|
||||
content: EnqueuedUpdateResult,
|
||||
},
|
||||
Processing {
|
||||
#[serde(flatten)]
|
||||
content: EnqueuedUpdateResult,
|
||||
},
|
||||
Failed {
|
||||
#[serde(flatten)]
|
||||
content: FailedUpdateResult,
|
||||
},
|
||||
Processed {
|
||||
#[serde(flatten)]
|
||||
content: ProcessedUpdateResult,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexUpdateResponse {
|
||||
pub update_id: u64,
|
||||
}
|
||||
|
||||
impl IndexUpdateResponse {
|
||||
pub fn with_id(update_id: u64) -> Self {
|
||||
Self { update_id }
|
||||
}
|
||||
}
|
||||
|
||||
/// Always return a 200 with:
|
||||
/// ```json
|
||||
/// {
|
||||
/// "status": "Meilisearch is running"
|
||||
/// }
|
||||
/// ```
|
||||
pub async fn running() -> HttpResponse {
|
||||
HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" }))
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Stats {
|
||||
pub database_size: u64,
|
||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||
pub last_update: Option<OffsetDateTime>,
|
||||
pub indexes: BTreeMap<String, IndexStats>,
|
||||
}
|
||||
|
||||
async fn get_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
|
||||
let search_rules = &index_scheduler.filters().search_rules;
|
||||
|
||||
let stats = create_all_stats((*index_scheduler).clone(), search_rules)?;
|
||||
|
||||
debug!("returns: {:?}", stats);
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
pub fn create_all_stats(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
search_rules: &meilisearch_auth::SearchRules,
|
||||
) -> Result<Stats, ResponseError> {
|
||||
let mut last_task: Option<OffsetDateTime> = None;
|
||||
let mut indexes = BTreeMap::new();
|
||||
let mut database_size = 0;
|
||||
let processing_task = index_scheduler.get_tasks_from_authorized_indexes(
|
||||
Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() },
|
||||
search_rules.authorized_indexes(),
|
||||
)?;
|
||||
let processing_index = processing_task.first().and_then(|task| task.index_uid());
|
||||
for (name, index) in index_scheduler.indexes()? {
|
||||
if !search_rules.is_index_authorized(&name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
database_size += index.on_disk_size()?;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let stats = IndexStats {
|
||||
number_of_documents: index.number_of_documents(&rtxn)?,
|
||||
is_indexing: processing_index.map_or(false, |index_name| name == index_name),
|
||||
field_distribution: index.field_distribution(&rtxn)?,
|
||||
};
|
||||
|
||||
let updated_at = index.updated_at(&rtxn)?;
|
||||
last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));
|
||||
|
||||
indexes.insert(name, stats);
|
||||
}
|
||||
let stats = Stats { database_size, last_update: last_task, indexes };
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct VersionResponse {
|
||||
commit_sha: String,
|
||||
commit_date: String,
|
||||
pkg_version: String,
|
||||
}
|
||||
|
||||
async fn get_version(
|
||||
_index_scheduler: GuardedData<ActionPolicy<{ actions::VERSION }>, Data<IndexScheduler>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> HttpResponse {
|
||||
analytics.publish("Version Seen".to_string(), json!(null), Some(&req));
|
||||
|
||||
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
||||
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
||||
|
||||
HttpResponse::Ok().json(VersionResponse {
|
||||
commit_sha: commit_sha.to_string(),
|
||||
commit_date: commit_date.to_string(),
|
||||
pkg_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct KeysResponse {
|
||||
private: Option<String>,
|
||||
public: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn get_health(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.health_seen(&req);
|
||||
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
|
||||
}
|
59
meilisearch/src/routes/swap_indexes.rs
Normal file
59
meilisearch/src/routes/swap_indexes.rs
Normal file
|
@ -0,0 +1,59 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
|
||||
use super::SummarizedTaskView;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
|
||||
}
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SwapIndexesPayload {
|
||||
indexes: Vec<String>,
|
||||
}
|
||||
|
||||
pub async fn swap_indexes(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
|
||||
params: web::Json<Vec<SwapIndexesPayload>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish(
|
||||
"Indexes Swapped".to_string(),
|
||||
json!({
|
||||
"swap_operation_number": params.len(),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
let search_rules = &index_scheduler.filters().search_rules;
|
||||
|
||||
let mut swaps = vec![];
|
||||
for SwapIndexesPayload { indexes } in params.into_inner().into_iter() {
|
||||
let (lhs, rhs) = match indexes.as_slice() {
|
||||
[lhs, rhs] => (lhs, rhs),
|
||||
_ => {
|
||||
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
|
||||
}
|
||||
};
|
||||
if !search_rules.is_index_authorized(lhs) || !search_rules.is_index_authorized(rhs) {
|
||||
return Err(AuthenticationError::InvalidToken.into());
|
||||
}
|
||||
swaps.push(IndexSwap { indexes: (lhs.clone(), rhs.clone()) });
|
||||
}
|
||||
|
||||
let task = KindWithContent::IndexSwap { swaps };
|
||||
|
||||
let task = index_scheduler.register(task)?;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
1020
meilisearch/src/routes/tasks.rs
Normal file
1020
meilisearch/src/routes/tasks.rs
Normal file
File diff suppressed because it is too large
Load diff
701
meilisearch/src/search.rs
Normal file
701
meilisearch/src/search.rs
Normal file
|
@ -0,0 +1,701 @@
|
|||
use std::cmp::min;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use either::Either;
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{
|
||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
|
||||
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||
|
||||
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
|
||||
pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20;
|
||||
pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
|
||||
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SearchQuery {
|
||||
pub q: Option<String>,
|
||||
#[serde(default = "DEFAULT_SEARCH_OFFSET")]
|
||||
pub offset: usize,
|
||||
#[serde(default = "DEFAULT_SEARCH_LIMIT")]
|
||||
pub limit: usize,
|
||||
pub page: Option<usize>,
|
||||
pub hits_per_page: Option<usize>,
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
pub attributes_to_crop: Option<Vec<String>>,
|
||||
#[serde(default = "DEFAULT_CROP_LENGTH")]
|
||||
pub crop_length: usize,
|
||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||
// Default to false
|
||||
#[serde(default = "Default::default")]
|
||||
pub show_matches_position: bool,
|
||||
pub filter: Option<Value>,
|
||||
pub sort: Option<Vec<String>>,
|
||||
pub facets: Option<Vec<String>>,
|
||||
#[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")]
|
||||
pub highlight_pre_tag: String,
|
||||
#[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")]
|
||||
pub highlight_post_tag: String,
|
||||
#[serde(default = "DEFAULT_CROP_MARKER")]
|
||||
pub crop_marker: String,
|
||||
#[serde(default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
}
|
||||
|
||||
impl SearchQuery {
|
||||
pub fn is_finite_pagination(&self) -> bool {
|
||||
self.page.or(self.hits_per_page).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum MatchingStrategy {
|
||||
/// Remove query words from last to first
|
||||
Last,
|
||||
/// All query words are mandatory
|
||||
All,
|
||||
}
|
||||
|
||||
impl Default for MatchingStrategy {
|
||||
fn default() -> Self {
|
||||
Self::Last
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MatchingStrategy> for TermsMatchingStrategy {
|
||||
fn from(other: MatchingStrategy) -> Self {
|
||||
match other {
|
||||
MatchingStrategy::Last => Self::Last,
|
||||
MatchingStrategy::All => Self::All,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
|
||||
pub struct SearchHit {
|
||||
#[serde(flatten)]
|
||||
pub document: Document,
|
||||
#[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")]
|
||||
pub formatted: Document,
|
||||
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
|
||||
pub matches_position: Option<MatchesPosition>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub query: String,
|
||||
pub processing_time_ms: u128,
|
||||
#[serde(flatten)]
|
||||
pub hits_info: HitsInfo,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(untagged)]
|
||||
pub enum HitsInfo {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
search.query(query);
|
||||
}
|
||||
|
||||
let is_finite_pagination = query.is_finite_pagination();
|
||||
search.terms_matching_strategy(query.matching_strategy.into());
|
||||
|
||||
let max_total_hits = index
|
||||
.pagination_max_total_hits(&rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
|
||||
// compute the offset on the limit depending on the pagination mode.
|
||||
let (offset, limit) = if is_finite_pagination {
|
||||
let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||
let page = query.page.unwrap_or(1);
|
||||
|
||||
// page 0 gives a limit of 0 forcing Meilisearch to return no document.
|
||||
page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit))
|
||||
} else {
|
||||
(query.offset, query.limit)
|
||||
};
|
||||
|
||||
// Make sure that a user can't get more documents than the hard limit,
|
||||
// we align that on the offset too.
|
||||
let offset = min(offset, max_total_hits);
|
||||
let limit = min(limit, max_total_hits.saturating_sub(offset));
|
||||
|
||||
search.offset(offset);
|
||||
search.limit(limit);
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter)? {
|
||||
search.filter(facets);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref sort) = query.sort {
|
||||
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
|
||||
Ok(sorts) => sorts,
|
||||
Err(asc_desc_error) => {
|
||||
return Err(milli::Error::from(SortError::from(asc_desc_error)).into())
|
||||
}
|
||||
};
|
||||
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
let displayed_ids = index
|
||||
.displayed_fields_ids(&rtxn)?
|
||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||
|
||||
let fids = |attrs: &BTreeSet<String>| {
|
||||
let mut ids = BTreeSet::new();
|
||||
for attr in attrs {
|
||||
if attr == "*" {
|
||||
ids = displayed_ids.clone();
|
||||
break;
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr) {
|
||||
ids.insert(id);
|
||||
}
|
||||
}
|
||||
ids
|
||||
};
|
||||
|
||||
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
|
||||
// but these attributes must be also be present
|
||||
// - in the fields_ids_map
|
||||
// - in the the displayed attributes
|
||||
let to_retrieve_ids: BTreeSet<_> = query
|
||||
.attributes_to_retrieve
|
||||
.as_ref()
|
||||
.map(fids)
|
||||
.unwrap_or_else(|| displayed_ids.clone())
|
||||
.intersection(&displayed_ids)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
|
||||
|
||||
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
|
||||
|
||||
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
|
||||
// These attributes are:
|
||||
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
|
||||
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
|
||||
// But these attributes must be also present in displayed attributes
|
||||
let formatted_options = compute_formatted_options(
|
||||
&attr_to_highlight,
|
||||
&attr_to_crop,
|
||||
query.crop_length,
|
||||
&to_retrieve_ids,
|
||||
&fields_ids_map,
|
||||
&displayed_ids,
|
||||
);
|
||||
|
||||
let tokenizer = TokenizerBuilder::default().build();
|
||||
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
||||
formatter_builder.crop_marker(query.crop_marker);
|
||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
let documents_iter = index.documents(&rtxn, documents_ids)?;
|
||||
|
||||
for (_id, obkv) in documents_iter {
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = to_retrieve_ids
|
||||
.iter()
|
||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||
let mut document =
|
||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||
|
||||
let (matches_position, formatted) = format_fields(
|
||||
&displayed_document,
|
||||
&fields_ids_map,
|
||||
&formatter_builder,
|
||||
&formatted_options,
|
||||
query.show_matches_position,
|
||||
&displayed_ids,
|
||||
)?;
|
||||
|
||||
if let Some(sort) = query.sort.as_ref() {
|
||||
insert_geo_distance(sort, &mut document);
|
||||
}
|
||||
|
||||
let hit = SearchHit { document, formatted, matches_position };
|
||||
documents.push(hit);
|
||||
}
|
||||
|
||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||
let hits_info = if is_finite_pagination {
|
||||
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
|
||||
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
|
||||
.checked_div(hits_per_page)
|
||||
.unwrap_or(0);
|
||||
|
||||
HitsInfo::Pagination {
|
||||
hits_per_page,
|
||||
page: query.page.unwrap_or(1),
|
||||
total_pages,
|
||||
total_hits: number_of_hits,
|
||||
}
|
||||
} else {
|
||||
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
|
||||
};
|
||||
|
||||
let facet_distribution = match query.facets {
|
||||
Some(ref fields) => {
|
||||
let mut facet_distribution = index.facets_distribution(&rtxn);
|
||||
|
||||
let max_values_by_facet = index
|
||||
.max_values_per_facet(&rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET);
|
||||
facet_distribution.max_values_per_facet(max_values_by_facet);
|
||||
|
||||
if fields.iter().all(|f| f != "*") {
|
||||
facet_distribution.facets(fields);
|
||||
}
|
||||
let distribution = facet_distribution.candidates(candidates).execute()?;
|
||||
|
||||
Some(distribution)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let result = SearchResult {
|
||||
hits: documents,
|
||||
hits_info,
|
||||
query: query.q.clone().unwrap_or_default(),
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
facet_distribution,
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
lazy_static::lazy_static! {
|
||||
static ref GEO_REGEX: Regex =
|
||||
Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap();
|
||||
};
|
||||
if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) {
|
||||
// TODO: TAMO: milli encountered an internal error, what do we want to do?
|
||||
let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
|
||||
let geo_point = &document.get("_geo").unwrap_or(&json!(null));
|
||||
if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
|
||||
let distance = milli::distance_between_two_points(&base, &[lat, lng]);
|
||||
document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_formatted_options(
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
attr_to_crop: &[String],
|
||||
query_crop_length: usize,
|
||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
displayed_ids: &BTreeSet<FieldId>,
|
||||
) -> BTreeMap<FieldId, FormatOptions> {
|
||||
let mut formatted_options = BTreeMap::new();
|
||||
|
||||
add_highlight_to_formatted_options(
|
||||
&mut formatted_options,
|
||||
attr_to_highlight,
|
||||
fields_ids_map,
|
||||
displayed_ids,
|
||||
);
|
||||
|
||||
add_crop_to_formatted_options(
|
||||
&mut formatted_options,
|
||||
attr_to_crop,
|
||||
query_crop_length,
|
||||
fields_ids_map,
|
||||
displayed_ids,
|
||||
);
|
||||
|
||||
// Should not return `_formatted` if no valid attributes to highlight/crop
|
||||
if !formatted_options.is_empty() {
|
||||
add_non_formatted_ids_to_formatted_options(&mut formatted_options, to_retrieve_ids);
|
||||
}
|
||||
|
||||
formatted_options
|
||||
}
|
||||
|
||||
fn add_highlight_to_formatted_options(
|
||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
||||
attr_to_highlight: &HashSet<String>,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
displayed_ids: &BTreeSet<FieldId>,
|
||||
) {
|
||||
for attr in attr_to_highlight {
|
||||
let new_format = FormatOptions { highlight: true, crop: None };
|
||||
|
||||
if attr == "*" {
|
||||
for id in displayed_ids {
|
||||
formatted_options.insert(*id, new_format);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr) {
|
||||
if displayed_ids.contains(&id) {
|
||||
formatted_options.insert(id, new_format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_crop_to_formatted_options(
|
||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
||||
attr_to_crop: &[String],
|
||||
crop_length: usize,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
displayed_ids: &BTreeSet<FieldId>,
|
||||
) {
|
||||
for attr in attr_to_crop {
|
||||
let mut split = attr.rsplitn(2, ':');
|
||||
let (attr_name, attr_len) = match split.next().zip(split.next()) {
|
||||
Some((len, name)) => {
|
||||
let crop_len = len.parse::<usize>().unwrap_or(crop_length);
|
||||
(name, crop_len)
|
||||
}
|
||||
None => (attr.as_str(), crop_length),
|
||||
};
|
||||
|
||||
if attr_name == "*" {
|
||||
for id in displayed_ids {
|
||||
formatted_options
|
||||
.entry(*id)
|
||||
.and_modify(|f| f.crop = Some(attr_len))
|
||||
.or_insert(FormatOptions { highlight: false, crop: Some(attr_len) });
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(id) = fields_ids_map.id(attr_name) {
|
||||
if displayed_ids.contains(&id) {
|
||||
formatted_options
|
||||
.entry(id)
|
||||
.and_modify(|f| f.crop = Some(attr_len))
|
||||
.or_insert(FormatOptions { highlight: false, crop: Some(attr_len) });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_non_formatted_ids_to_formatted_options(
|
||||
formatted_options: &mut BTreeMap<FieldId, FormatOptions>,
|
||||
to_retrieve_ids: &BTreeSet<FieldId>,
|
||||
) {
|
||||
for id in to_retrieve_ids {
|
||||
formatted_options.entry(*id).or_insert(FormatOptions { highlight: false, crop: None });
|
||||
}
|
||||
}
|
||||
|
||||
fn make_document(
|
||||
displayed_attributes: &BTreeSet<FieldId>,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
) -> Result<Document, MeilisearchHttpError> {
|
||||
let mut document = serde_json::Map::new();
|
||||
|
||||
// recreate the original json
|
||||
for (key, value) in obkv.iter() {
|
||||
let value = serde_json::from_slice(value)?;
|
||||
let key = field_ids_map.name(key).expect("Missing field name").to_string();
|
||||
|
||||
document.insert(key, value);
|
||||
}
|
||||
|
||||
// select the attributes to retrieve
|
||||
let displayed_attributes = displayed_attributes
|
||||
.iter()
|
||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
|
||||
let document = permissive_json_pointer::select_values(&document, displayed_attributes);
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
fn format_fields<'a, A: AsRef<[u8]>>(
|
||||
document: &Document,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
builder: &MatcherBuilder<'a, A>,
|
||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||
compute_matches: bool,
|
||||
displayable_ids: &BTreeSet<FieldId>,
|
||||
) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
|
||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
||||
let mut document = document.clone();
|
||||
|
||||
// select the attributes to retrieve
|
||||
let displayable_names =
|
||||
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
|
||||
// To get the formatting option of each key we need to see all the rules that applies
|
||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
||||
// highlighted.
|
||||
let format = formatted_options
|
||||
.iter()
|
||||
.filter(|(field, _option)| {
|
||||
let name = field_ids_map.name(**field).unwrap();
|
||||
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
||||
})
|
||||
.map(|(_, option)| *option)
|
||||
.reduce(|acc, option| acc.merge(option));
|
||||
let mut infos = Vec::new();
|
||||
|
||||
*value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches);
|
||||
|
||||
if let Some(matches) = matches_position.as_mut() {
|
||||
if !infos.is_empty() {
|
||||
matches.insert(key.to_owned(), infos);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let selectors = formatted_options
|
||||
.keys()
|
||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
||||
// before.
|
||||
.map(|&fid| field_ids_map.name(fid).unwrap());
|
||||
let document = permissive_json_pointer::select_values(&document, selectors);
|
||||
|
||||
Ok((matches_position, document))
|
||||
}
|
||||
|
||||
fn format_value<'a, A: AsRef<[u8]>>(
|
||||
value: Value,
|
||||
builder: &MatcherBuilder<'a, A>,
|
||||
format_options: Option<FormatOptions>,
|
||||
infos: &mut Vec<MatchBounds>,
|
||||
compute_matches: bool,
|
||||
) -> Value {
|
||||
match value {
|
||||
Value::String(old_string) => {
|
||||
let mut matcher = builder.build(&old_string);
|
||||
if compute_matches {
|
||||
let matches = matcher.matches();
|
||||
infos.extend_from_slice(&matches[..]);
|
||||
}
|
||||
|
||||
match format_options {
|
||||
Some(format_options) => {
|
||||
let value = matcher.format(format_options);
|
||||
Value::String(value.into_owned())
|
||||
}
|
||||
None => Value::String(old_string),
|
||||
}
|
||||
}
|
||||
Value::Array(values) => Value::Array(
|
||||
values
|
||||
.into_iter()
|
||||
.map(|v| {
|
||||
format_value(
|
||||
v,
|
||||
builder,
|
||||
format_options.map(|format_options| FormatOptions {
|
||||
highlight: format_options.highlight,
|
||||
crop: None,
|
||||
}),
|
||||
infos,
|
||||
compute_matches,
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
Value::Object(object) => Value::Object(
|
||||
object
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
format_value(
|
||||
v,
|
||||
builder,
|
||||
format_options.map(|format_options| FormatOptions {
|
||||
highlight: format_options.highlight,
|
||||
crop: None,
|
||||
}),
|
||||
infos,
|
||||
compute_matches,
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
Value::Number(number) => {
|
||||
let s = number.to_string();
|
||||
|
||||
let mut matcher = builder.build(&s);
|
||||
if compute_matches {
|
||||
let matches = matcher.matches();
|
||||
infos.extend_from_slice(&matches[..]);
|
||||
}
|
||||
|
||||
match format_options {
|
||||
Some(format_options) => {
|
||||
let value = matcher.format(format_options);
|
||||
Value::String(value.into_owned())
|
||||
}
|
||||
None => Value::Number(number),
|
||||
}
|
||||
}
|
||||
value => value,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
match facets {
|
||||
Value::String(expr) => {
|
||||
let condition = Filter::from_str(expr)?;
|
||||
Ok(condition)
|
||||
}
|
||||
Value::Array(arr) => parse_filter_array(arr),
|
||||
v => Err(MeilisearchHttpError::InvalidExpression(&["Array"], v.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
let mut ands = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
Value::String(s) => ands.push(Either::Right(s.as_str())),
|
||||
Value::Array(arr) => {
|
||||
let mut ors = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
Value::String(s) => ors.push(s.as_str()),
|
||||
v => {
|
||||
return Err(MeilisearchHttpError::InvalidExpression(
|
||||
&["String"],
|
||||
v.clone(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
ands.push(Either::Left(ors));
|
||||
}
|
||||
v => {
|
||||
return Err(MeilisearchHttpError::InvalidExpression(
|
||||
&["String", "[String]"],
|
||||
v.clone(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Filter::from_array(ands)?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_insert_geo_distance() {
|
||||
let value: Document = serde_json::from_str(
|
||||
r#"{
|
||||
"_geo": {
|
||||
"lat": 50.629973371633746,
|
||||
"lng": 3.0569447399419567
|
||||
},
|
||||
"city": "Lille",
|
||||
"id": "1"
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let sorters =
|
||||
&["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()];
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
let sorters = &[
|
||||
"prix:asc",
|
||||
"villeneuve:desc",
|
||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
||||
"ubu:asc",
|
||||
]
|
||||
.map(|s| s.to_string());
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
// only the first geoPoint is used to compute the distance
|
||||
let sorters = &[
|
||||
"chien:desc",
|
||||
"_geoPoint(50.629973371633746, 3.0569447399419567):asc",
|
||||
"pangolin:desc",
|
||||
"_geoPoint(100.0, -80.0):asc",
|
||||
"chat:asc",
|
||||
]
|
||||
.map(|s| s.to_string());
|
||||
let mut document = value.clone();
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
|
||||
|
||||
// there was no _geoPoint so nothing is inserted in the document
|
||||
let sorters = &["chien:asc".to_string()];
|
||||
let mut document = value;
|
||||
insert_geo_distance(sorters, &mut document);
|
||||
assert_eq!(document.get("_geoDistance"), None);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue