2021-11-10 16:52:24 +01:00
|
|
|
use std::collections::{BinaryHeap, HashMap, HashSet};
|
2021-10-27 18:16:13 +02:00
|
|
|
use std::fs;
|
2022-01-18 18:17:38 +01:00
|
|
|
use std::path::{Path, PathBuf};
|
2021-10-29 15:58:06 +02:00
|
|
|
use std::sync::Arc;
|
2021-10-27 18:16:13 +02:00
|
|
|
use std::time::{Duration, Instant};
|
|
|
|
|
|
|
|
use actix_web::http::header::USER_AGENT;
|
|
|
|
use actix_web::HttpRequest;
|
|
|
|
use http::header::CONTENT_TYPE;
|
2022-01-12 15:35:33 +01:00
|
|
|
use meilisearch_auth::SearchRules;
|
2022-04-11 14:18:47 +02:00
|
|
|
use meilisearch_lib::index::{
|
|
|
|
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
|
|
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
|
|
|
};
|
2021-10-27 18:16:13 +02:00
|
|
|
use meilisearch_lib::index_controller::Stats;
|
|
|
|
use meilisearch_lib::MeiliSearch;
|
|
|
|
use once_cell::sync::Lazy;
|
|
|
|
use regex::Regex;
|
|
|
|
use segment::message::{Identify, Track, User};
|
|
|
|
use segment::{AutoBatcher, Batcher, HttpClient};
|
|
|
|
use serde_json::{json, Value};
|
|
|
|
use sysinfo::{DiskExt, System, SystemExt};
|
2022-02-14 15:32:41 +01:00
|
|
|
use time::OffsetDateTime;
|
2021-10-28 16:28:41 +02:00
|
|
|
use tokio::select;
|
|
|
|
use tokio::sync::mpsc::{self, Receiver, Sender};
|
2021-10-27 18:16:13 +02:00
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
|
|
use crate::analytics::Analytics;
|
|
|
|
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
|
|
|
use crate::Opt;
|
|
|
|
|
|
|
|
use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH};
|
|
|
|
|
2022-05-19 14:08:34 +02:00
|
|
|
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
|
|
|
|
fn write_user_id(db_path: &Path, user_id: &str) {
|
|
|
|
let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes());
|
|
|
|
if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH
|
|
|
|
.as_ref()
|
|
|
|
.zip(config_user_id_path(db_path))
|
|
|
|
{
|
|
|
|
let _ = fs::create_dir_all(&meilisearch_config_path);
|
|
|
|
let _ = fs::write(user_id_path, user_id.as_bytes());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-04 08:10:12 +01:00
|
|
|
const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb";
|
2021-10-27 18:16:13 +02:00
|
|
|
|
|
|
|
pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
|
|
|
|
request
|
|
|
|
.headers()
|
2022-05-19 14:08:34 +02:00
|
|
|
.get(ANALYTICS_HEADER)
|
|
|
|
.or_else(|| request.headers().get(USER_AGENT))
|
2021-10-27 18:16:13 +02:00
|
|
|
.map(|header| header.to_str().ok())
|
|
|
|
.flatten()
|
|
|
|
.unwrap_or("unknown")
|
|
|
|
.split(';')
|
|
|
|
.map(str::trim)
|
|
|
|
.map(ToString::to_string)
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2021-10-29 15:58:06 +02:00
|
|
|
pub enum AnalyticsMsg {
|
2021-10-28 16:28:41 +02:00
|
|
|
BatchMessage(Track),
|
|
|
|
AggregateGetSearch(SearchAggregator),
|
|
|
|
AggregatePostSearch(SearchAggregator),
|
|
|
|
AggregateAddDocuments(DocumentsAggregator),
|
|
|
|
AggregateUpdateDocuments(DocumentsAggregator),
|
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
pub struct SegmentAnalytics {
|
2021-10-29 15:58:06 +02:00
|
|
|
sender: Sender<AnalyticsMsg>,
|
2021-10-27 18:16:13 +02:00
|
|
|
user: User,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl SegmentAnalytics {
|
2021-10-29 15:58:06 +02:00
|
|
|
pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> (Arc<dyn Analytics>, String) {
|
2021-10-27 18:16:13 +02:00
|
|
|
let user_id = super::find_user_id(&opt.db_path);
|
|
|
|
let first_time_run = user_id.is_none();
|
|
|
|
let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string());
|
|
|
|
write_user_id(&opt.db_path, &user_id);
|
|
|
|
|
2022-05-25 14:36:00 +02:00
|
|
|
let client = reqwest::Client::builder()
|
|
|
|
.connect_timeout(Duration::from_secs(10))
|
|
|
|
.build();
|
|
|
|
|
|
|
|
// if reqwest throws an error we won't be able to send analytics
|
|
|
|
if client.is_err() {
|
|
|
|
return super::MockAnalytics::new(opt);
|
|
|
|
}
|
|
|
|
|
|
|
|
let client = HttpClient::new(
|
|
|
|
client.unwrap(),
|
|
|
|
"https://telemetry.meilisearch.com".to_string(),
|
|
|
|
);
|
2021-10-28 13:05:58 +02:00
|
|
|
let user = User::UserId { user_id };
|
2021-12-02 16:03:26 +01:00
|
|
|
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
|
|
|
|
|
2022-01-26 17:43:16 +01:00
|
|
|
// If Meilisearch is Launched for the first time:
|
2021-12-02 16:03:26 +01:00
|
|
|
// 1. Send an event Launched associated to the user `total_launch`.
|
|
|
|
// 2. Batch an event Launched with the real instance-id and send it in one hour.
|
|
|
|
if first_time_run {
|
|
|
|
let _ = batcher
|
|
|
|
.push(Track {
|
|
|
|
user: User::UserId {
|
|
|
|
user_id: "total_launch".to_string(),
|
|
|
|
},
|
|
|
|
event: "Launched".to_string(),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
let _ = batcher.flush().await;
|
|
|
|
let _ = batcher
|
|
|
|
.push(Track {
|
|
|
|
user: user.clone(),
|
|
|
|
event: "Launched".to_string(),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
}
|
2021-10-28 16:28:41 +02:00
|
|
|
|
|
|
|
let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize
|
|
|
|
|
|
|
|
let segment = Box::new(Segment {
|
|
|
|
inbox,
|
|
|
|
user: user.clone(),
|
2021-10-27 18:16:13 +02:00
|
|
|
opt: opt.clone(),
|
|
|
|
batcher,
|
2021-10-28 16:28:41 +02:00
|
|
|
post_search_aggregator: SearchAggregator::default(),
|
|
|
|
get_search_aggregator: SearchAggregator::default(),
|
|
|
|
add_documents_aggregator: DocumentsAggregator::default(),
|
|
|
|
update_documents_aggregator: DocumentsAggregator::default(),
|
2021-10-27 18:16:13 +02:00
|
|
|
});
|
2021-10-28 16:28:41 +02:00
|
|
|
tokio::spawn(segment.run(meilisearch.clone()));
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2021-10-29 15:58:06 +02:00
|
|
|
let this = Self {
|
|
|
|
sender,
|
|
|
|
user: user.clone(),
|
|
|
|
};
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2021-10-29 15:58:06 +02:00
|
|
|
(Arc::new(this), user.to_string())
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl super::Analytics for SegmentAnalytics {
|
2021-10-29 15:58:06 +02:00
|
|
|
fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) {
|
2022-05-19 14:08:34 +02:00
|
|
|
let user_agent = request.map(|req| extract_user_agents(req));
|
2021-10-28 12:52:53 +02:00
|
|
|
|
|
|
|
send["user-agent"] = json!(user_agent);
|
2021-10-28 16:28:41 +02:00
|
|
|
let event = Track {
|
|
|
|
user: self.user.clone(),
|
|
|
|
event: event_name.clone(),
|
|
|
|
properties: send,
|
|
|
|
..Default::default()
|
|
|
|
};
|
2021-10-29 16:10:58 +02:00
|
|
|
let _ = self
|
|
|
|
.sender
|
|
|
|
.try_send(AnalyticsMsg::BatchMessage(event.into()));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
2021-10-29 15:58:06 +02:00
|
|
|
|
|
|
|
fn get_search(&self, aggregate: SearchAggregator) {
|
2021-10-29 16:10:58 +02:00
|
|
|
let _ = self
|
|
|
|
.sender
|
|
|
|
.try_send(AnalyticsMsg::AggregateGetSearch(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2021-10-29 15:58:06 +02:00
|
|
|
fn post_search(&self, aggregate: SearchAggregator) {
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self
|
|
|
|
.sender
|
2021-10-29 15:58:06 +02:00
|
|
|
.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fn add_documents(
|
2021-10-29 15:58:06 +02:00
|
|
|
&self,
|
2021-10-27 18:16:13 +02:00
|
|
|
documents_query: &UpdateDocumentsQuery,
|
|
|
|
index_creation: bool,
|
|
|
|
request: &HttpRequest,
|
|
|
|
) {
|
2021-10-28 16:28:41 +02:00
|
|
|
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
|
|
|
|
let _ = self
|
|
|
|
.sender
|
2021-10-29 15:58:06 +02:00
|
|
|
.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fn update_documents(
|
2021-10-29 15:58:06 +02:00
|
|
|
&self,
|
2021-10-27 18:16:13 +02:00
|
|
|
documents_query: &UpdateDocumentsQuery,
|
|
|
|
index_creation: bool,
|
|
|
|
request: &HttpRequest,
|
|
|
|
) {
|
2021-10-28 16:28:41 +02:00
|
|
|
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
|
|
|
|
let _ = self
|
|
|
|
.sender
|
2021-10-29 15:58:06 +02:00
|
|
|
.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
pub struct Segment {
|
2021-10-29 15:58:06 +02:00
|
|
|
inbox: Receiver<AnalyticsMsg>,
|
2021-10-28 16:28:41 +02:00
|
|
|
user: User,
|
|
|
|
opt: Opt,
|
|
|
|
batcher: AutoBatcher,
|
|
|
|
get_search_aggregator: SearchAggregator,
|
|
|
|
post_search_aggregator: SearchAggregator,
|
|
|
|
add_documents_aggregator: DocumentsAggregator,
|
|
|
|
update_documents_aggregator: DocumentsAggregator,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Segment {
|
|
|
|
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
|
|
|
|
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
|
|
|
|
static SYSTEM: Lazy<Value> = Lazy::new(|| {
|
|
|
|
let mut sys = System::new_all();
|
|
|
|
sys.refresh_all();
|
|
|
|
let kernel_version = sys
|
|
|
|
.kernel_version()
|
|
|
|
.map(|k| k.split_once("-").map(|(k, _)| k.to_string()))
|
|
|
|
.flatten();
|
|
|
|
json!({
|
|
|
|
"distribution": sys.name(),
|
|
|
|
"kernel_version": kernel_version,
|
2022-10-04 12:04:16 +02:00
|
|
|
"cores": sys.cpus().len(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"ram_size": sys.total_memory(),
|
2021-11-04 08:03:11 +01:00
|
|
|
"disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
|
|
|
|
})
|
|
|
|
});
|
2022-01-18 18:17:38 +01:00
|
|
|
// The infos are all cli option except every option containing sensitive information.
|
|
|
|
// We consider an information as sensible if it contains a path, an address or a key.
|
|
|
|
let infos = {
|
|
|
|
// First we see if any sensitive fields were used.
|
|
|
|
let db_path = opt.db_path != PathBuf::from("./data.ms");
|
|
|
|
let import_dump = opt.import_dump.is_some();
|
|
|
|
let dumps_dir = opt.dumps_dir != PathBuf::from("dumps/");
|
|
|
|
let import_snapshot = opt.import_snapshot.is_some();
|
|
|
|
let snapshots_dir = opt.snapshot_dir != PathBuf::from("snapshots/");
|
2022-10-06 02:14:37 +02:00
|
|
|
let http_addr = opt.http_addr != default_http_addr();
|
2022-01-18 18:17:38 +01:00
|
|
|
|
|
|
|
let mut infos = serde_json::to_value(opt).unwrap();
|
|
|
|
|
|
|
|
// Then we overwrite all sensitive field with a boolean representing if
|
|
|
|
// the feature was used or not.
|
|
|
|
infos["db_path"] = json!(db_path);
|
|
|
|
infos["import_dump"] = json!(import_dump);
|
|
|
|
infos["dumps_dir"] = json!(dumps_dir);
|
|
|
|
infos["import_snapshot"] = json!(import_snapshot);
|
|
|
|
infos["snapshot_dir"] = json!(snapshots_dir);
|
|
|
|
infos["http_addr"] = json!(http_addr);
|
|
|
|
|
|
|
|
infos
|
|
|
|
};
|
2021-10-28 16:28:41 +02:00
|
|
|
|
|
|
|
let number_of_documents = stats
|
|
|
|
.indexes
|
|
|
|
.values()
|
|
|
|
.map(|index| index.number_of_documents)
|
|
|
|
.collect::<Vec<u64>>();
|
|
|
|
|
|
|
|
json!({
|
|
|
|
"start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day
|
|
|
|
"system": *SYSTEM,
|
|
|
|
"stats": {
|
|
|
|
"database_size": stats.database_size,
|
|
|
|
"indexes_number": stats.indexes.len(),
|
|
|
|
"documents_number": number_of_documents,
|
|
|
|
},
|
|
|
|
"infos": infos,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn run(mut self, meilisearch: MeiliSearch) {
|
|
|
|
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
2021-12-02 16:03:26 +01:00
|
|
|
// The first batch must be sent after one hour.
|
|
|
|
let mut interval =
|
|
|
|
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
|
2021-10-28 16:47:56 +02:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
loop {
|
|
|
|
select! {
|
|
|
|
_ = interval.tick() => {
|
|
|
|
self.tick(meilisearch.clone()).await;
|
|
|
|
},
|
|
|
|
msg = self.inbox.recv() => {
|
|
|
|
match msg {
|
2021-10-29 15:58:06 +02:00
|
|
|
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
|
|
|
|
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
|
|
|
|
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
|
|
|
|
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
|
|
|
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
2021-10-28 16:28:41 +02:00
|
|
|
None => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn tick(&mut self, meilisearch: MeiliSearch) {
|
2022-01-12 15:35:33 +01:00
|
|
|
if let Ok(stats) = meilisearch.get_all_stats(&SearchRules::default()).await {
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self
|
|
|
|
.batcher
|
|
|
|
.push(Identify {
|
|
|
|
context: Some(json!({
|
|
|
|
"app": {
|
|
|
|
"version": env!("CARGO_PKG_VERSION").to_string(),
|
|
|
|
},
|
|
|
|
})),
|
|
|
|
user: self.user.clone(),
|
|
|
|
traits: Self::compute_traits(&self.opt, stats),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
}
|
|
|
|
let get_search = std::mem::take(&mut self.get_search_aggregator)
|
2021-11-11 01:38:10 +01:00
|
|
|
.into_event(&self.user, "Documents Searched GET");
|
2021-10-28 16:28:41 +02:00
|
|
|
let post_search = std::mem::take(&mut self.post_search_aggregator)
|
2021-11-11 01:38:10 +01:00
|
|
|
.into_event(&self.user, "Documents Searched POST");
|
2021-10-28 16:28:41 +02:00
|
|
|
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
|
|
|
|
.into_event(&self.user, "Documents Added");
|
|
|
|
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
|
|
|
|
.into_event(&self.user, "Documents Updated");
|
|
|
|
|
|
|
|
if let Some(get_search) = get_search {
|
|
|
|
let _ = self.batcher.push(get_search).await;
|
|
|
|
}
|
|
|
|
if let Some(post_search) = post_search {
|
|
|
|
let _ = self.batcher.push(post_search).await;
|
|
|
|
}
|
|
|
|
if let Some(add_documents) = add_documents {
|
|
|
|
let _ = self.batcher.push(add_documents).await;
|
|
|
|
}
|
|
|
|
if let Some(update_documents) = update_documents {
|
|
|
|
let _ = self.batcher.push(update_documents).await;
|
|
|
|
}
|
|
|
|
let _ = self.batcher.flush().await;
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Default)]
|
2021-10-28 16:28:41 +02:00
|
|
|
pub struct SearchAggregator {
|
2022-02-14 15:32:41 +01:00
|
|
|
timestamp: Option<OffsetDateTime>,
|
2022-01-20 19:01:37 +01:00
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
// context
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
|
|
|
// requests
|
|
|
|
total_received: usize,
|
|
|
|
total_succeeded: usize,
|
2021-11-10 16:52:24 +01:00
|
|
|
time_spent: BinaryHeap<usize>,
|
2021-10-27 18:16:13 +02:00
|
|
|
|
|
|
|
// sort
|
|
|
|
sort_with_geo_point: bool,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
2021-10-27 18:16:13 +02:00
|
|
|
sort_sum_of_criteria_terms: usize,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by one
|
2021-10-27 18:16:13 +02:00
|
|
|
sort_total_number_of_criteria: usize,
|
|
|
|
|
|
|
|
// filter
|
|
|
|
filter_with_geo_radius: bool,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
2021-10-27 18:16:13 +02:00
|
|
|
filter_sum_of_criteria_terms: usize,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by one
|
2021-10-27 18:16:13 +02:00
|
|
|
filter_total_number_of_criteria: usize,
|
|
|
|
used_syntax: HashMap<String, usize>,
|
|
|
|
|
|
|
|
// q
|
2021-12-02 16:03:26 +01:00
|
|
|
// The maximum number of terms in a q request
|
|
|
|
max_terms_number: usize,
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a search is done, we increment the counter linked to the used settings
|
2022-08-23 16:30:56 +02:00
|
|
|
matching_strategy: HashMap<String, usize>,
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
// pagination
|
|
|
|
max_limit: usize,
|
|
|
|
max_offset: usize,
|
2022-04-11 14:18:47 +02:00
|
|
|
|
|
|
|
// formatting
|
|
|
|
highlight_pre_tag: bool,
|
|
|
|
highlight_post_tag: bool,
|
|
|
|
crop_marker: bool,
|
2022-05-18 13:17:56 +02:00
|
|
|
show_matches_position: bool,
|
2022-04-11 14:18:47 +02:00
|
|
|
crop_length: bool,
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
impl SearchAggregator {
|
|
|
|
pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
|
|
|
|
let mut ret = Self::default();
|
2022-02-14 15:32:41 +01:00
|
|
|
ret.timestamp = Some(OffsetDateTime::now_utc());
|
2022-01-20 19:01:37 +01:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
ret.total_received = 1;
|
|
|
|
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
|
|
|
|
|
|
|
if let Some(ref sort) = query.sort {
|
|
|
|
ret.sort_total_number_of_criteria = 1;
|
|
|
|
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
|
|
|
|
ret.sort_sum_of_criteria_terms = sort.len();
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(ref filter) = query.filter {
|
|
|
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
|
|
|
ret.filter_total_number_of_criteria = 1;
|
|
|
|
|
|
|
|
let syntax = match filter {
|
|
|
|
Value::String(_) => "string".to_string(),
|
|
|
|
Value::Array(values) => {
|
|
|
|
if values
|
|
|
|
.iter()
|
|
|
|
.map(|v| v.to_string())
|
|
|
|
.any(|s| RE.is_match(&s))
|
|
|
|
{
|
|
|
|
"mixed".to_string()
|
|
|
|
} else {
|
|
|
|
"array".to_string()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => "none".to_string(),
|
|
|
|
};
|
|
|
|
// convert the string to a HashMap
|
|
|
|
ret.used_syntax.insert(syntax, 1);
|
|
|
|
|
|
|
|
let stringified_filters = filter.to_string();
|
|
|
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
|
|
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(ref q) = query.q {
|
2021-12-02 16:03:26 +01:00
|
|
|
ret.max_terms_number = q.split_whitespace().count();
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
2022-08-23 16:30:56 +02:00
|
|
|
ret.matching_strategy
|
|
|
|
.insert(format!("{:?}", query.matching_strategy), 1);
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
ret.max_limit = query.limit;
|
|
|
|
ret.max_offset = query.offset.unwrap_or_default();
|
|
|
|
|
2022-06-06 10:44:46 +02:00
|
|
|
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
|
|
|
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
|
|
|
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER();
|
|
|
|
ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH();
|
2022-05-18 13:17:56 +02:00
|
|
|
ret.show_matches_position = query.show_matches_position;
|
2022-04-11 14:18:47 +02:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
ret
|
|
|
|
}
|
|
|
|
|
2021-11-02 12:38:01 +01:00
|
|
|
pub fn succeed(&mut self, result: &SearchResult) {
|
2021-12-02 16:03:26 +01:00
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
2021-10-28 16:28:41 +02:00
|
|
|
self.time_spent.push(result.processing_time_ms as usize);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Aggregate one [SearchAggregator] into another.
|
|
|
|
pub fn aggregate(&mut self, mut other: Self) {
|
2022-01-20 19:01:37 +01:00
|
|
|
if self.timestamp.is_none() {
|
|
|
|
self.timestamp = other.timestamp;
|
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
// context
|
|
|
|
for user_agent in other.user_agents.into_iter() {
|
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
|
|
|
// request
|
2021-12-02 16:03:26 +01:00
|
|
|
self.total_received = self.total_received.saturating_add(other.total_received);
|
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
|
2021-10-28 16:28:41 +02:00
|
|
|
self.time_spent.append(&mut other.time_spent);
|
|
|
|
// sort
|
|
|
|
self.sort_with_geo_point |= other.sort_with_geo_point;
|
2021-12-02 16:03:26 +01:00
|
|
|
self.sort_sum_of_criteria_terms = self
|
|
|
|
.sort_sum_of_criteria_terms
|
|
|
|
.saturating_add(other.sort_sum_of_criteria_terms);
|
|
|
|
self.sort_total_number_of_criteria = self
|
|
|
|
.sort_total_number_of_criteria
|
|
|
|
.saturating_add(other.sort_total_number_of_criteria);
|
2021-10-28 16:28:41 +02:00
|
|
|
// filter
|
|
|
|
self.filter_with_geo_radius |= other.filter_with_geo_radius;
|
2021-12-02 16:03:26 +01:00
|
|
|
self.filter_sum_of_criteria_terms = self
|
|
|
|
.filter_sum_of_criteria_terms
|
|
|
|
.saturating_add(other.filter_sum_of_criteria_terms);
|
|
|
|
self.filter_total_number_of_criteria = self
|
|
|
|
.filter_total_number_of_criteria
|
|
|
|
.saturating_add(other.filter_total_number_of_criteria);
|
2021-10-28 16:28:41 +02:00
|
|
|
for (key, value) in other.used_syntax.into_iter() {
|
2021-12-02 16:03:26 +01:00
|
|
|
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
|
|
|
*used_syntax = used_syntax.saturating_add(value);
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
// q
|
2021-12-02 16:03:26 +01:00
|
|
|
self.max_terms_number = self.max_terms_number.max(other.max_terms_number);
|
2022-08-23 16:30:56 +02:00
|
|
|
|
|
|
|
for (key, value) in other.matching_strategy.into_iter() {
|
|
|
|
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
|
|
|
|
*matching_strategy = matching_strategy.saturating_add(value);
|
|
|
|
}
|
2021-10-28 16:28:41 +02:00
|
|
|
// pagination
|
|
|
|
self.max_limit = self.max_limit.max(other.max_limit);
|
|
|
|
self.max_offset = self.max_offset.max(other.max_offset);
|
2022-04-11 14:18:47 +02:00
|
|
|
|
|
|
|
self.highlight_pre_tag |= other.highlight_pre_tag;
|
|
|
|
self.highlight_post_tag |= other.highlight_post_tag;
|
|
|
|
self.crop_marker |= other.crop_marker;
|
2022-05-18 13:17:56 +02:00
|
|
|
self.show_matches_position |= other.show_matches_position;
|
2022-04-11 14:18:47 +02:00
|
|
|
self.crop_length |= other.crop_length;
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
2021-11-10 16:52:24 +01:00
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
2021-10-27 18:16:13 +02:00
|
|
|
if self.total_received == 0 {
|
|
|
|
None
|
|
|
|
} else {
|
2021-11-10 16:52:24 +01:00
|
|
|
// the index of the 99th percentage of value
|
2021-10-27 18:16:13 +02:00
|
|
|
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
|
2021-11-10 16:52:24 +01:00
|
|
|
// we get all the values in a sorted manner
|
|
|
|
let time_spent = self.time_spent.into_sorted_vec();
|
2022-06-11 17:21:05 +02:00
|
|
|
// We are only interested by the slowest value of the 99th fastest results
|
2021-12-02 16:03:26 +01:00
|
|
|
let time_spent = time_spent.get(percentile_99th as usize);
|
2021-10-27 18:16:13 +02:00
|
|
|
|
|
|
|
let properties = json!({
|
2021-10-28 12:52:53 +02:00
|
|
|
"user-agent": self.user_agents,
|
2021-10-27 18:16:13 +02:00
|
|
|
"requests": {
|
2021-12-02 16:03:26 +01:00
|
|
|
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
2021-10-27 18:16:13 +02:00
|
|
|
"total_succeeded": self.total_succeeded,
|
|
|
|
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
|
|
|
|
"total_received": self.total_received,
|
|
|
|
},
|
|
|
|
"sort": {
|
|
|
|
"with_geoPoint": self.sort_with_geo_point,
|
|
|
|
"avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64),
|
|
|
|
},
|
|
|
|
"filter": {
|
|
|
|
"with_geoRadius": self.filter_with_geo_radius,
|
|
|
|
"avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64),
|
|
|
|
"most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
|
|
},
|
|
|
|
"q": {
|
2021-12-02 16:03:26 +01:00
|
|
|
"max_terms_number": self.max_terms_number,
|
2022-08-23 16:30:56 +02:00
|
|
|
"most_used_matching_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
2021-10-27 18:16:13 +02:00
|
|
|
},
|
|
|
|
"pagination": {
|
|
|
|
"max_limit": self.max_limit,
|
|
|
|
"max_offset": self.max_offset,
|
|
|
|
},
|
2022-04-11 14:18:47 +02:00
|
|
|
"formatting": {
|
|
|
|
"highlight_pre_tag": self.highlight_pre_tag,
|
|
|
|
"highlight_post_tag": self.highlight_post_tag,
|
|
|
|
"crop_marker": self.crop_marker,
|
2022-05-18 13:17:56 +02:00
|
|
|
"show_matches_position": self.show_matches_position,
|
2022-04-11 14:18:47 +02:00
|
|
|
"crop_length": self.crop_length,
|
|
|
|
},
|
2021-10-27 18:16:13 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
Some(Track {
|
2022-01-20 19:01:37 +01:00
|
|
|
timestamp: self.timestamp,
|
2021-10-27 18:16:13 +02:00
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
|
|
|
properties,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Default)]
|
2021-10-28 16:28:41 +02:00
|
|
|
pub struct DocumentsAggregator {
|
2022-02-14 15:32:41 +01:00
|
|
|
timestamp: Option<OffsetDateTime>,
|
2022-01-20 19:01:37 +01:00
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
// set to true when at least one request was received
|
|
|
|
updated: bool,
|
|
|
|
|
|
|
|
// context
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
|
|
|
content_types: HashSet<String>,
|
|
|
|
primary_keys: HashSet<String>,
|
|
|
|
index_creation: bool,
|
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
impl DocumentsAggregator {
|
|
|
|
pub fn from_query(
|
|
|
|
documents_query: &UpdateDocumentsQuery,
|
|
|
|
index_creation: bool,
|
|
|
|
request: &HttpRequest,
|
|
|
|
) -> Self {
|
|
|
|
let mut ret = Self::default();
|
2022-02-14 15:32:41 +01:00
|
|
|
ret.timestamp = Some(OffsetDateTime::now_utc());
|
2021-10-28 16:28:41 +02:00
|
|
|
|
|
|
|
ret.updated = true;
|
|
|
|
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
|
|
|
if let Some(primary_key) = documents_query.primary_key.clone() {
|
|
|
|
ret.primary_keys.insert(primary_key);
|
|
|
|
}
|
|
|
|
let content_type = request
|
|
|
|
.headers()
|
|
|
|
.get(CONTENT_TYPE)
|
2022-07-07 10:56:02 +02:00
|
|
|
.and_then(|s| s.to_str().ok())
|
2022-06-11 17:21:05 +02:00
|
|
|
.unwrap_or("unknown")
|
2021-10-28 16:28:41 +02:00
|
|
|
.to_string();
|
|
|
|
ret.content_types.insert(content_type);
|
|
|
|
ret.index_creation = index_creation;
|
|
|
|
|
|
|
|
ret
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Aggregate one [DocumentsAggregator] into another.
|
|
|
|
pub fn aggregate(&mut self, other: Self) {
|
2022-01-20 19:01:37 +01:00
|
|
|
if self.timestamp.is_none() {
|
|
|
|
self.timestamp = other.timestamp;
|
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
self.updated |= other.updated;
|
|
|
|
// we can't create a union because there is no `into_union` method
|
2022-07-07 10:56:02 +02:00
|
|
|
for user_agent in other.user_agents {
|
2021-10-28 16:28:41 +02:00
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
2022-07-07 10:56:02 +02:00
|
|
|
for primary_key in other.primary_keys {
|
2021-10-28 16:28:41 +02:00
|
|
|
self.primary_keys.insert(primary_key);
|
|
|
|
}
|
2022-07-07 10:56:02 +02:00
|
|
|
for content_type in other.content_types {
|
2021-10-28 16:28:41 +02:00
|
|
|
self.content_types.insert(content_type);
|
|
|
|
}
|
|
|
|
self.index_creation |= other.index_creation;
|
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
|
|
|
if !self.updated {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let properties = json!({
|
2021-10-28 12:52:53 +02:00
|
|
|
"user-agent": self.user_agents,
|
2021-10-27 18:16:13 +02:00
|
|
|
"payload_type": self.content_types,
|
|
|
|
"primary_key": self.primary_keys,
|
|
|
|
"index_creation": self.index_creation,
|
|
|
|
});
|
|
|
|
|
|
|
|
Some(Track {
|
2022-01-20 19:01:37 +01:00
|
|
|
timestamp: self.timestamp,
|
2021-10-27 18:16:13 +02:00
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
|
|
|
properties,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|