2024-10-17 00:38:18 +02:00
|
|
|
use std::any::TypeId;
|
2024-10-20 17:54:43 +02:00
|
|
|
use std::collections::{HashMap, HashSet};
|
2021-10-27 18:16:13 +02:00
|
|
|
use std::fs;
|
2022-01-18 18:17:38 +01:00
|
|
|
use std::path::{Path, PathBuf};
|
2021-10-29 15:58:06 +02:00
|
|
|
use std::sync::Arc;
|
2021-10-27 18:16:13 +02:00
|
|
|
use std::time::{Duration, Instant};
|
|
|
|
|
2024-10-17 09:14:34 +02:00
|
|
|
use actix_web::http::header::USER_AGENT;
|
2021-10-27 18:16:13 +02:00
|
|
|
use actix_web::HttpRequest;
|
2022-11-28 16:27:41 +01:00
|
|
|
use byte_unit::Byte;
|
2022-10-18 12:45:06 +02:00
|
|
|
use index_scheduler::IndexScheduler;
|
2023-02-20 09:25:29 +01:00
|
|
|
use meilisearch_auth::{AuthController, AuthFilter};
|
2024-10-17 09:49:21 +02:00
|
|
|
use meilisearch_types::features::RuntimeTogglableFeatures;
|
2022-10-18 12:45:06 +02:00
|
|
|
use meilisearch_types::InstanceUid;
|
2021-10-27 18:16:13 +02:00
|
|
|
use once_cell::sync::Lazy;
|
|
|
|
use segment::message::{Identify, Track, User};
|
|
|
|
use segment::{AutoBatcher, Batcher, HttpClient};
|
2022-11-28 16:27:41 +01:00
|
|
|
use serde::Serialize;
|
2021-10-27 18:16:13 +02:00
|
|
|
use serde_json::{json, Value};
|
2024-01-16 15:43:03 +01:00
|
|
|
use sysinfo::{Disks, System};
|
2022-02-14 15:32:41 +01:00
|
|
|
use time::OffsetDateTime;
|
2021-10-28 16:28:41 +02:00
|
|
|
use tokio::select;
|
|
|
|
use tokio::sync::mpsc::{self, Receiver, Sender};
|
2021-10-27 18:16:13 +02:00
|
|
|
use uuid::Uuid;
|
|
|
|
|
2024-10-20 17:54:43 +02:00
|
|
|
use super::{config_user_id_path, Aggregate, MEILISEARCH_CONFIG_PATH};
|
2024-02-12 11:06:37 +01:00
|
|
|
use crate::option::{
|
|
|
|
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
|
|
|
};
|
2022-10-18 12:45:06 +02:00
|
|
|
use crate::routes::{create_all_stats, Stats};
|
2024-10-20 17:54:43 +02:00
|
|
|
use crate::Opt;
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2022-05-19 14:08:34 +02:00
|
|
|
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
|
2022-10-18 12:45:06 +02:00
|
|
|
fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
|
2022-12-07 18:22:36 +01:00
|
|
|
let _ = fs::write(db_path.join("instance-uid"), user_id.to_string());
|
2022-10-20 18:00:07 +02:00
|
|
|
if let Some((meilisearch_config_path, user_id_path)) =
|
|
|
|
MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path))
|
2021-10-27 18:16:13 +02:00
|
|
|
{
|
2023-09-21 17:41:12 +02:00
|
|
|
let _ = fs::create_dir_all(meilisearch_config_path);
|
2022-10-18 12:45:06 +02:00
|
|
|
let _ = fs::write(user_id_path, user_id.to_string());
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-04 08:10:12 +01:00
|
|
|
const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb";
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2024-10-17 09:06:23 +02:00
|
|
|
pub fn extract_user_agents(request: &HttpRequest) -> HashSet<String> {
|
2021-10-27 18:16:13 +02:00
|
|
|
request
|
|
|
|
.headers()
|
2022-05-19 14:08:34 +02:00
|
|
|
.get(ANALYTICS_HEADER)
|
|
|
|
.or_else(|| request.headers().get(USER_AGENT))
|
2023-09-21 17:41:12 +02:00
|
|
|
.and_then(|header| header.to_str().ok())
|
2021-10-27 18:16:13 +02:00
|
|
|
.unwrap_or("unknown")
|
|
|
|
.split(';')
|
|
|
|
.map(str::trim)
|
|
|
|
.map(ToString::to_string)
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2024-10-17 00:38:18 +02:00
|
|
|
pub struct Message {
|
2024-10-17 00:43:34 +02:00
|
|
|
// Since the type_id is solved statically we cannot retrieve it from the Box.
|
|
|
|
// Thus we have to send it in the message directly.
|
2024-10-17 00:38:18 +02:00
|
|
|
type_id: TypeId,
|
2024-10-17 00:43:34 +02:00
|
|
|
// Same for the aggregate function.
|
2024-10-17 09:14:34 +02:00
|
|
|
#[allow(clippy::type_complexity)]
|
2024-10-17 00:38:18 +02:00
|
|
|
aggregator_function: fn(Box<dyn Aggregate>, Box<dyn Aggregate>) -> Option<Box<dyn Aggregate>>,
|
2024-10-17 09:06:23 +02:00
|
|
|
event: Event,
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct Event {
|
|
|
|
original: Box<dyn Aggregate>,
|
|
|
|
timestamp: OffsetDateTime,
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
total: usize,
|
2024-10-17 00:38:18 +02:00
|
|
|
}
|
|
|
|
|
2024-10-20 17:36:29 +02:00
|
|
|
/// This function should always be called on the same type. If `this` and `other`
|
|
|
|
/// aren't the same type the function will do nothing and return `None`.
|
|
|
|
fn downcast_aggregate<ConcreteType: Aggregate>(
|
|
|
|
old: Box<dyn Aggregate>,
|
|
|
|
new: Box<dyn Aggregate>,
|
|
|
|
) -> Option<Box<dyn Aggregate>> {
|
|
|
|
if old.is::<ConcreteType>() && new.is::<ConcreteType>() {
|
|
|
|
// Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping
|
|
|
|
let this = old.downcast::<ConcreteType>().ok()?;
|
|
|
|
let other = new.downcast::<ConcreteType>().ok()?;
|
|
|
|
Some(ConcreteType::aggregate(this, other))
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-17 00:38:18 +02:00
|
|
|
impl Message {
|
2024-10-17 09:06:23 +02:00
|
|
|
pub fn new<T: Aggregate>(event: T, request: &HttpRequest) -> Self {
|
2024-10-17 00:38:18 +02:00
|
|
|
Self {
|
|
|
|
type_id: TypeId::of::<T>(),
|
2024-10-17 09:06:23 +02:00
|
|
|
event: Event {
|
|
|
|
original: Box::new(event),
|
|
|
|
timestamp: OffsetDateTime::now_utc(),
|
|
|
|
user_agents: extract_user_agents(request),
|
|
|
|
total: 1,
|
|
|
|
},
|
2024-10-20 17:36:29 +02:00
|
|
|
aggregator_function: downcast_aggregate::<T>,
|
2024-10-17 00:38:18 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
pub struct SegmentAnalytics {
|
2024-10-16 15:43:27 +02:00
|
|
|
pub instance_uid: InstanceUid,
|
|
|
|
pub user: User,
|
2024-10-17 00:38:18 +02:00
|
|
|
pub sender: Sender<Message>,
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl SegmentAnalytics {
|
2023-09-21 17:41:12 +02:00
|
|
|
#[allow(clippy::new_ret_no_self)]
|
2023-01-24 18:09:03 +01:00
|
|
|
pub async fn new(
|
|
|
|
opt: &Opt,
|
|
|
|
index_scheduler: Arc<IndexScheduler>,
|
2023-04-06 13:38:47 +02:00
|
|
|
auth_controller: Arc<AuthController>,
|
2024-10-17 08:38:11 +02:00
|
|
|
) -> Option<Arc<Self>> {
|
2022-10-18 12:45:06 +02:00
|
|
|
let instance_uid = super::find_user_id(&opt.db_path);
|
|
|
|
let first_time_run = instance_uid.is_none();
|
2023-09-21 17:41:12 +02:00
|
|
|
let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4);
|
2022-10-18 12:45:06 +02:00
|
|
|
write_user_id(&opt.db_path, &instance_uid);
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build();
|
2022-05-25 14:36:00 +02:00
|
|
|
|
|
|
|
// if reqwest throws an error we won't be able to send analytics
|
|
|
|
if client.is_err() {
|
2024-10-17 08:38:11 +02:00
|
|
|
return None;
|
2022-05-25 14:36:00 +02:00
|
|
|
}
|
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
let client =
|
|
|
|
HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string());
|
|
|
|
let user = User::UserId { user_id: instance_uid.to_string() };
|
2021-12-02 16:03:26 +01:00
|
|
|
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
|
|
|
|
|
2022-01-26 17:43:16 +01:00
|
|
|
// If Meilisearch is Launched for the first time:
|
2021-12-02 16:03:26 +01:00
|
|
|
// 1. Send an event Launched associated to the user `total_launch`.
|
|
|
|
// 2. Batch an event Launched with the real instance-id and send it in one hour.
|
|
|
|
if first_time_run {
|
|
|
|
let _ = batcher
|
|
|
|
.push(Track {
|
2022-10-20 18:00:07 +02:00
|
|
|
user: User::UserId { user_id: "total_launch".to_string() },
|
2021-12-02 16:03:26 +01:00
|
|
|
event: "Launched".to_string(),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
let _ = batcher.flush().await;
|
|
|
|
let _ = batcher
|
|
|
|
.push(Track {
|
|
|
|
user: user.clone(),
|
|
|
|
event: "Launched".to_string(),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
}
|
2021-10-28 16:28:41 +02:00
|
|
|
|
|
|
|
let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize
|
|
|
|
|
|
|
|
let segment = Box::new(Segment {
|
|
|
|
inbox,
|
|
|
|
user: user.clone(),
|
2021-10-27 18:16:13 +02:00
|
|
|
opt: opt.clone(),
|
|
|
|
batcher,
|
2024-10-17 08:38:11 +02:00
|
|
|
events: HashMap::new(),
|
2021-10-27 18:16:13 +02:00
|
|
|
});
|
2023-01-24 18:09:03 +01:00
|
|
|
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
let this = Self { instance_uid, sender, user: user.clone() };
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2024-10-17 08:38:11 +02:00
|
|
|
Some(Arc::new(this))
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-28 16:27:41 +01:00
|
|
|
/// This structure represent the `infos` field we send in the analytics.
|
|
|
|
/// It's quite close to the `Opt` structure except all sensitive informations
|
|
|
|
/// have been simplified to a boolean.
|
|
|
|
/// It's send as-is in amplitude thus you should never update a name of the
|
|
|
|
/// struct without the approval of the PM.
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
|
|
struct Infos {
|
|
|
|
env: String,
|
2024-07-17 11:13:37 +02:00
|
|
|
experimental_contains_filter: bool,
|
2024-10-17 09:49:21 +02:00
|
|
|
experimental_vector_store: bool,
|
2023-02-21 19:19:02 +01:00
|
|
|
experimental_enable_metrics: bool,
|
2024-10-17 09:49:21 +02:00
|
|
|
experimental_edit_documents_by_function: bool,
|
2024-03-26 16:43:40 +01:00
|
|
|
experimental_search_queue_size: usize,
|
2024-10-10 13:17:25 +02:00
|
|
|
experimental_drop_search_after: usize,
|
|
|
|
experimental_nb_searches_per_core: usize,
|
2024-02-12 11:06:37 +01:00
|
|
|
experimental_logs_mode: LogMode,
|
2024-02-21 14:33:40 +01:00
|
|
|
experimental_replication_parameters: bool,
|
2024-02-05 13:29:01 +01:00
|
|
|
experimental_enable_logs_route: bool,
|
2023-05-15 11:23:58 +02:00
|
|
|
experimental_reduce_indexing_memory_usage: bool,
|
2023-12-12 10:55:33 +01:00
|
|
|
experimental_max_number_of_batched_tasks: usize,
|
2024-02-26 10:41:47 +01:00
|
|
|
gpu_enabled: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
db_path: bool,
|
|
|
|
import_dump: bool,
|
2022-12-01 16:24:02 +01:00
|
|
|
dump_dir: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
ignore_missing_dump: bool,
|
|
|
|
ignore_dump_if_db_exists: bool,
|
|
|
|
import_snapshot: bool,
|
2022-12-27 18:17:29 +01:00
|
|
|
schedule_snapshot: Option<u64>,
|
2022-11-28 16:27:41 +01:00
|
|
|
snapshot_dir: bool,
|
|
|
|
ignore_missing_snapshot: bool,
|
|
|
|
ignore_snapshot_if_db_exists: bool,
|
|
|
|
http_addr: bool,
|
|
|
|
http_payload_size_limit: Byte,
|
2023-12-04 10:38:01 +01:00
|
|
|
task_queue_webhook: bool,
|
2023-12-19 12:18:45 +01:00
|
|
|
task_webhook_authorization_header: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
log_level: String,
|
|
|
|
max_indexing_memory: MaxMemory,
|
|
|
|
max_indexing_threads: MaxThreads,
|
|
|
|
with_configuration_file: bool,
|
|
|
|
ssl_auth_path: bool,
|
|
|
|
ssl_cert_path: bool,
|
|
|
|
ssl_key_path: bool,
|
|
|
|
ssl_ocsp_path: bool,
|
|
|
|
ssl_require_auth: bool,
|
|
|
|
ssl_resumption: bool,
|
|
|
|
ssl_tickets: bool,
|
|
|
|
}
|
|
|
|
|
2024-10-17 09:49:21 +02:00
|
|
|
impl Infos {
|
|
|
|
pub fn new(options: Opt, features: RuntimeTogglableFeatures) -> Self {
|
2022-11-28 16:27:41 +01:00
|
|
|
// We wants to decompose this whole struct by hand to be sure we don't forget
|
|
|
|
// to add analytics when we add a field in the Opt.
|
|
|
|
// Thus we must not insert `..` at the end.
|
|
|
|
let Opt {
|
|
|
|
db_path,
|
2024-07-17 11:13:37 +02:00
|
|
|
experimental_contains_filter,
|
2023-02-21 19:19:02 +01:00
|
|
|
experimental_enable_metrics,
|
2024-03-26 16:43:40 +01:00
|
|
|
experimental_search_queue_size,
|
2024-10-10 13:17:25 +02:00
|
|
|
experimental_drop_search_after,
|
|
|
|
experimental_nb_searches_per_core,
|
2024-02-12 11:06:37 +01:00
|
|
|
experimental_logs_mode,
|
2024-02-21 14:33:40 +01:00
|
|
|
experimental_replication_parameters,
|
2024-02-05 13:29:01 +01:00
|
|
|
experimental_enable_logs_route,
|
2023-05-15 11:23:58 +02:00
|
|
|
experimental_reduce_indexing_memory_usage,
|
2023-12-12 10:55:33 +01:00
|
|
|
experimental_max_number_of_batched_tasks,
|
2022-11-28 16:27:41 +01:00
|
|
|
http_addr,
|
|
|
|
master_key: _,
|
|
|
|
env,
|
2023-12-04 10:38:01 +01:00
|
|
|
task_webhook_url,
|
2023-12-19 12:18:45 +01:00
|
|
|
task_webhook_authorization_header,
|
2022-12-26 11:41:31 +01:00
|
|
|
max_index_size: _,
|
|
|
|
max_task_db_size: _,
|
2022-11-28 16:27:41 +01:00
|
|
|
http_payload_size_limit,
|
|
|
|
ssl_cert_path,
|
|
|
|
ssl_key_path,
|
|
|
|
ssl_auth_path,
|
|
|
|
ssl_ocsp_path,
|
|
|
|
ssl_require_auth,
|
|
|
|
ssl_resumption,
|
|
|
|
ssl_tickets,
|
|
|
|
import_snapshot,
|
|
|
|
ignore_missing_snapshot,
|
|
|
|
ignore_snapshot_if_db_exists,
|
|
|
|
snapshot_dir,
|
|
|
|
schedule_snapshot,
|
|
|
|
import_dump,
|
|
|
|
ignore_missing_dump,
|
|
|
|
ignore_dump_if_db_exists,
|
2022-12-01 16:24:02 +01:00
|
|
|
dump_dir,
|
2022-11-28 16:27:41 +01:00
|
|
|
log_level,
|
|
|
|
indexer_options,
|
|
|
|
config_file_path,
|
2024-10-17 09:49:21 +02:00
|
|
|
no_analytics: _,
|
2022-11-28 16:27:41 +01:00
|
|
|
} = options;
|
|
|
|
|
2022-12-27 18:17:29 +01:00
|
|
|
let schedule_snapshot = match schedule_snapshot {
|
|
|
|
ScheduleSnapshot::Disabled => None,
|
|
|
|
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
|
|
|
};
|
|
|
|
|
2023-02-15 12:31:14 +01:00
|
|
|
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
|
|
|
|
indexer_options;
|
2022-11-28 16:27:41 +01:00
|
|
|
|
2024-10-17 09:49:21 +02:00
|
|
|
let RuntimeTogglableFeatures {
|
|
|
|
vector_store,
|
|
|
|
metrics,
|
|
|
|
logs_route,
|
|
|
|
edit_documents_by_function,
|
|
|
|
contains_filter,
|
|
|
|
} = features;
|
|
|
|
|
2022-11-28 16:27:41 +01:00
|
|
|
// We're going to override every sensible information.
|
|
|
|
// We consider information sensible if it contains a path, an address, or a key.
|
|
|
|
Self {
|
|
|
|
env,
|
2024-10-17 09:49:21 +02:00
|
|
|
experimental_contains_filter: experimental_contains_filter | contains_filter,
|
|
|
|
experimental_vector_store: vector_store,
|
|
|
|
experimental_edit_documents_by_function: edit_documents_by_function,
|
|
|
|
experimental_enable_metrics: experimental_enable_metrics | metrics,
|
2024-03-26 16:43:40 +01:00
|
|
|
experimental_search_queue_size,
|
2024-10-10 13:17:25 +02:00
|
|
|
experimental_drop_search_after: experimental_drop_search_after.into(),
|
|
|
|
experimental_nb_searches_per_core: experimental_nb_searches_per_core.into(),
|
2024-02-12 11:06:37 +01:00
|
|
|
experimental_logs_mode,
|
2024-02-21 14:33:40 +01:00
|
|
|
experimental_replication_parameters,
|
2024-10-17 09:49:21 +02:00
|
|
|
experimental_enable_logs_route: experimental_enable_logs_route | logs_route,
|
2023-05-15 11:23:58 +02:00
|
|
|
experimental_reduce_indexing_memory_usage,
|
2024-02-26 10:41:47 +01:00
|
|
|
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
2022-11-28 16:27:41 +01:00
|
|
|
db_path: db_path != PathBuf::from("./data.ms"),
|
|
|
|
import_dump: import_dump.is_some(),
|
2022-12-01 16:24:02 +01:00
|
|
|
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
2022-11-28 16:27:41 +01:00
|
|
|
ignore_missing_dump,
|
|
|
|
ignore_dump_if_db_exists,
|
|
|
|
import_snapshot: import_snapshot.is_some(),
|
|
|
|
schedule_snapshot,
|
|
|
|
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
|
|
|
ignore_missing_snapshot,
|
|
|
|
ignore_snapshot_if_db_exists,
|
|
|
|
http_addr: http_addr != default_http_addr(),
|
|
|
|
http_payload_size_limit,
|
2023-12-12 10:55:33 +01:00
|
|
|
experimental_max_number_of_batched_tasks,
|
2023-12-04 10:38:01 +01:00
|
|
|
task_queue_webhook: task_webhook_url.is_some(),
|
2023-12-19 12:18:45 +01:00
|
|
|
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
2023-01-03 11:55:34 +01:00
|
|
|
log_level: log_level.to_string(),
|
2022-11-28 16:27:41 +01:00
|
|
|
max_indexing_memory,
|
|
|
|
max_indexing_threads,
|
|
|
|
with_configuration_file: config_file_path.is_some(),
|
|
|
|
ssl_auth_path: ssl_auth_path.is_some(),
|
|
|
|
ssl_cert_path: ssl_cert_path.is_some(),
|
|
|
|
ssl_key_path: ssl_key_path.is_some(),
|
|
|
|
ssl_ocsp_path: ssl_ocsp_path.is_some(),
|
|
|
|
ssl_require_auth,
|
|
|
|
ssl_resumption,
|
|
|
|
ssl_tickets,
|
|
|
|
}
|
|
|
|
}
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
pub struct Segment {
|
2024-10-17 00:38:18 +02:00
|
|
|
inbox: Receiver<Message>,
|
2021-10-28 16:28:41 +02:00
|
|
|
user: User,
|
|
|
|
opt: Opt,
|
|
|
|
batcher: AutoBatcher,
|
2024-10-17 09:06:23 +02:00
|
|
|
events: HashMap<TypeId, Event>,
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Segment {
|
2024-10-17 09:49:21 +02:00
|
|
|
fn compute_traits(opt: &Opt, stats: Stats, features: RuntimeTogglableFeatures) -> Value {
|
2021-10-28 16:28:41 +02:00
|
|
|
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
|
|
|
|
static SYSTEM: Lazy<Value> = Lazy::new(|| {
|
2024-01-16 15:43:03 +01:00
|
|
|
let disks = Disks::new_with_refreshed_list();
|
2021-10-28 16:28:41 +02:00
|
|
|
let mut sys = System::new_all();
|
|
|
|
sys.refresh_all();
|
2024-01-16 15:43:03 +01:00
|
|
|
let kernel_version = System::kernel_version()
|
|
|
|
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
|
2021-10-28 16:28:41 +02:00
|
|
|
json!({
|
2024-01-16 15:43:03 +01:00
|
|
|
"distribution": System::name(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"kernel_version": kernel_version,
|
2022-10-04 12:04:16 +02:00
|
|
|
"cores": sys.cpus().len(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"ram_size": sys.total_memory(),
|
2024-01-16 15:43:03 +01:00
|
|
|
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
|
|
|
|
})
|
|
|
|
});
|
2022-10-20 18:00:07 +02:00
|
|
|
let number_of_documents =
|
|
|
|
stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>();
|
2021-10-28 16:28:41 +02:00
|
|
|
|
|
|
|
json!({
|
|
|
|
"start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day
|
|
|
|
"system": *SYSTEM,
|
|
|
|
"stats": {
|
|
|
|
"database_size": stats.database_size,
|
|
|
|
"indexes_number": stats.indexes.len(),
|
|
|
|
"documents_number": number_of_documents,
|
|
|
|
},
|
2024-10-17 09:49:21 +02:00
|
|
|
"infos": Infos::new(opt.clone(), features),
|
2021-10-28 16:28:41 +02:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-04-06 13:38:47 +02:00
|
|
|
async fn run(
|
|
|
|
mut self,
|
|
|
|
index_scheduler: Arc<IndexScheduler>,
|
|
|
|
auth_controller: Arc<AuthController>,
|
|
|
|
) {
|
2022-10-24 13:56:26 +02:00
|
|
|
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
|
|
|
// The first batch must be sent after one hour.
|
2021-12-02 16:03:26 +01:00
|
|
|
let mut interval =
|
|
|
|
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
|
2021-10-28 16:47:56 +02:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
loop {
|
|
|
|
select! {
|
|
|
|
_ = interval.tick() => {
|
2023-01-24 18:09:03 +01:00
|
|
|
self.tick(index_scheduler.clone(), auth_controller.clone()).await;
|
2021-10-28 16:28:41 +02:00
|
|
|
},
|
2024-10-17 09:06:23 +02:00
|
|
|
Some(msg) = self.inbox.recv() => {
|
|
|
|
self.handle_msg(msg);
|
|
|
|
}
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-17 09:06:23 +02:00
|
|
|
fn handle_msg(&mut self, Message { type_id, aggregator_function, event }: Message) {
|
|
|
|
let new_event = match self.events.remove(&type_id) {
|
|
|
|
Some(old) => {
|
|
|
|
// The function should never fail since we retrieved the corresponding TypeId in the map. But in the unfortunate
|
|
|
|
// case it could happens we're going to silently ignore the error
|
|
|
|
let Some(original) = (aggregator_function)(old.original, event.original) else {
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
Event {
|
|
|
|
original,
|
|
|
|
// We always want to return the FIRST timestamp ever encountered
|
|
|
|
timestamp: old.timestamp,
|
|
|
|
user_agents: old.user_agents.union(&event.user_agents).cloned().collect(),
|
|
|
|
total: old.total.saturating_add(event.total),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None => event,
|
|
|
|
};
|
|
|
|
self.events.insert(type_id, new_event);
|
|
|
|
}
|
|
|
|
|
2023-01-24 18:09:03 +01:00
|
|
|
async fn tick(
|
|
|
|
&mut self,
|
|
|
|
index_scheduler: Arc<IndexScheduler>,
|
2023-04-06 13:38:47 +02:00
|
|
|
auth_controller: Arc<AuthController>,
|
2023-01-24 18:09:03 +01:00
|
|
|
) {
|
2024-10-17 09:49:21 +02:00
|
|
|
if let Ok(stats) = create_all_stats(
|
|
|
|
index_scheduler.clone().into(),
|
|
|
|
auth_controller.into(),
|
|
|
|
&AuthFilter::default(),
|
|
|
|
) {
|
2023-02-07 16:01:12 +01:00
|
|
|
// Replace the version number with the prototype name if any.
|
2024-02-27 18:34:52 +01:00
|
|
|
let version = if let Some(prototype) = build_info::DescribeResult::from_build()
|
|
|
|
.and_then(|describe| describe.as_prototype())
|
|
|
|
{
|
2023-02-07 16:01:12 +01:00
|
|
|
prototype
|
|
|
|
} else {
|
|
|
|
env!("CARGO_PKG_VERSION")
|
|
|
|
};
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self
|
|
|
|
.batcher
|
|
|
|
.push(Identify {
|
|
|
|
context: Some(json!({
|
|
|
|
"app": {
|
2023-02-07 16:01:12 +01:00
|
|
|
"version": version.to_string(),
|
2021-10-28 16:28:41 +02:00
|
|
|
},
|
|
|
|
})),
|
|
|
|
user: self.user.clone(),
|
2024-10-17 09:49:21 +02:00
|
|
|
traits: Self::compute_traits(
|
|
|
|
&self.opt,
|
|
|
|
stats,
|
|
|
|
index_scheduler.features().runtime_features(),
|
|
|
|
),
|
2021-10-28 16:28:41 +02:00
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
|
2024-10-16 21:17:06 +02:00
|
|
|
// We empty the list of events
|
|
|
|
let events = std::mem::take(&mut self.events);
|
2024-05-27 10:54:12 +02:00
|
|
|
|
2024-10-17 00:38:18 +02:00
|
|
|
for (_, event) in events {
|
2024-10-17 09:06:23 +02:00
|
|
|
let Event { original, timestamp, user_agents, total } = event;
|
|
|
|
let name = original.event_name();
|
|
|
|
let mut properties = original.into_event();
|
|
|
|
if properties["user-agent"].is_null() {
|
|
|
|
properties["user-agent"] = json!(user_agents);
|
|
|
|
};
|
|
|
|
if properties["requests"]["total_received"].is_null() {
|
|
|
|
properties["requests"]["total_received"] = total.into();
|
|
|
|
};
|
|
|
|
|
2024-10-17 09:14:34 +02:00
|
|
|
let _ = self
|
|
|
|
.batcher
|
|
|
|
.push(Track {
|
|
|
|
user: self.user.clone(),
|
|
|
|
event: name.to_string(),
|
|
|
|
properties,
|
|
|
|
timestamp: Some(timestamp),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
2024-05-27 10:54:12 +02:00
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self.batcher.flush().await;
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|