2021-11-10 16:52:24 +01:00
|
|
|
use std::collections::{BinaryHeap, HashMap, HashSet};
|
2021-10-27 18:16:13 +02:00
|
|
|
use std::fs;
|
2023-04-26 17:08:55 +02:00
|
|
|
use std::mem::take;
|
2022-01-18 18:17:38 +01:00
|
|
|
use std::path::{Path, PathBuf};
|
2021-10-29 15:58:06 +02:00
|
|
|
use std::sync::Arc;
|
2021-10-27 18:16:13 +02:00
|
|
|
use std::time::{Duration, Instant};
|
|
|
|
|
|
|
|
use actix_web::http::header::USER_AGENT;
|
|
|
|
use actix_web::HttpRequest;
|
2022-11-28 16:27:41 +01:00
|
|
|
use byte_unit::Byte;
|
2021-10-27 18:16:13 +02:00
|
|
|
use http::header::CONTENT_TYPE;
|
2022-10-18 12:45:06 +02:00
|
|
|
use index_scheduler::IndexScheduler;
|
2023-02-20 09:25:29 +01:00
|
|
|
use meilisearch_auth::{AuthController, AuthFilter};
|
2022-10-18 12:45:06 +02:00
|
|
|
use meilisearch_types::InstanceUid;
|
2021-10-27 18:16:13 +02:00
|
|
|
use once_cell::sync::Lazy;
|
|
|
|
use regex::Regex;
|
|
|
|
use segment::message::{Identify, Track, User};
|
|
|
|
use segment::{AutoBatcher, Batcher, HttpClient};
|
2022-11-28 16:27:41 +01:00
|
|
|
use serde::Serialize;
|
2021-10-27 18:16:13 +02:00
|
|
|
use serde_json::{json, Value};
|
2024-01-16 15:43:03 +01:00
|
|
|
use sysinfo::{Disks, System};
|
2022-02-14 15:32:41 +01:00
|
|
|
use time::OffsetDateTime;
|
2021-10-28 16:28:41 +02:00
|
|
|
use tokio::select;
|
|
|
|
use tokio::sync::mpsc::{self, Receiver, Sender};
|
2021-10-27 18:16:13 +02:00
|
|
|
use uuid::Uuid;
|
|
|
|
|
2023-05-09 19:52:11 +02:00
|
|
|
use super::{
|
|
|
|
config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
|
|
|
|
};
|
2021-10-27 18:16:13 +02:00
|
|
|
use crate::analytics::Analytics;
|
2024-02-12 11:06:37 +01:00
|
|
|
use crate::option::{
|
|
|
|
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
|
|
|
};
|
2021-10-27 18:16:13 +02:00
|
|
|
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
2023-04-26 17:08:55 +02:00
|
|
|
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
2022-10-18 12:45:06 +02:00
|
|
|
use crate::routes::{create_all_stats, Stats};
|
|
|
|
use crate::search::{
|
2023-04-26 17:08:55 +02:00
|
|
|
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
2024-05-27 10:54:12 +02:00
|
|
|
SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
|
|
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
|
|
|
DEFAULT_SEMANTIC_RATIO,
|
2022-10-18 12:45:06 +02:00
|
|
|
};
|
2021-10-27 18:16:13 +02:00
|
|
|
use crate::Opt;
|
|
|
|
|
2022-05-19 14:08:34 +02:00
|
|
|
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
|
2022-10-18 12:45:06 +02:00
|
|
|
fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
|
2022-12-07 18:22:36 +01:00
|
|
|
let _ = fs::write(db_path.join("instance-uid"), user_id.to_string());
|
2022-10-20 18:00:07 +02:00
|
|
|
if let Some((meilisearch_config_path, user_id_path)) =
|
|
|
|
MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path))
|
2021-10-27 18:16:13 +02:00
|
|
|
{
|
2023-09-21 17:41:12 +02:00
|
|
|
let _ = fs::create_dir_all(meilisearch_config_path);
|
2022-10-18 12:45:06 +02:00
|
|
|
let _ = fs::write(user_id_path, user_id.to_string());
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-04 08:10:12 +01:00
|
|
|
const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb";
|
2021-10-27 18:16:13 +02:00
|
|
|
|
|
|
|
pub fn extract_user_agents(request: &HttpRequest) -> Vec<String> {
|
|
|
|
request
|
|
|
|
.headers()
|
2022-05-19 14:08:34 +02:00
|
|
|
.get(ANALYTICS_HEADER)
|
|
|
|
.or_else(|| request.headers().get(USER_AGENT))
|
2023-09-21 17:41:12 +02:00
|
|
|
.and_then(|header| header.to_str().ok())
|
2021-10-27 18:16:13 +02:00
|
|
|
.unwrap_or("unknown")
|
|
|
|
.split(';')
|
|
|
|
.map(str::trim)
|
|
|
|
.map(ToString::to_string)
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2021-10-29 15:58:06 +02:00
|
|
|
pub enum AnalyticsMsg {
|
2021-10-28 16:28:41 +02:00
|
|
|
BatchMessage(Track),
|
|
|
|
AggregateGetSearch(SearchAggregator),
|
|
|
|
AggregatePostSearch(SearchAggregator),
|
2024-05-27 10:54:12 +02:00
|
|
|
AggregateGetSimilar(SimilarAggregator),
|
|
|
|
AggregatePostSimilar(SimilarAggregator),
|
2023-02-20 09:21:52 +01:00
|
|
|
AggregatePostMultiSearch(MultiSearchAggregator),
|
2023-04-26 17:08:55 +02:00
|
|
|
AggregatePostFacetSearch(FacetSearchAggregator),
|
2021-10-28 16:28:41 +02:00
|
|
|
AggregateAddDocuments(DocumentsAggregator),
|
2022-11-28 16:27:41 +01:00
|
|
|
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
2021-10-28 16:28:41 +02:00
|
|
|
AggregateUpdateDocuments(DocumentsAggregator),
|
2023-05-09 19:52:11 +02:00
|
|
|
AggregateGetFetchDocuments(DocumentsFetchAggregator),
|
|
|
|
AggregatePostFetchDocuments(DocumentsFetchAggregator),
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
pub struct SegmentAnalytics {
|
2022-10-18 12:45:06 +02:00
|
|
|
instance_uid: InstanceUid,
|
2021-10-29 15:58:06 +02:00
|
|
|
sender: Sender<AnalyticsMsg>,
|
2021-10-27 18:16:13 +02:00
|
|
|
user: User,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl SegmentAnalytics {
|
2023-09-21 17:41:12 +02:00
|
|
|
#[allow(clippy::new_ret_no_self)]
|
2023-01-24 18:09:03 +01:00
|
|
|
pub async fn new(
|
|
|
|
opt: &Opt,
|
|
|
|
index_scheduler: Arc<IndexScheduler>,
|
2023-04-06 13:38:47 +02:00
|
|
|
auth_controller: Arc<AuthController>,
|
2023-01-24 18:09:03 +01:00
|
|
|
) -> Arc<dyn Analytics> {
|
2022-10-18 12:45:06 +02:00
|
|
|
let instance_uid = super::find_user_id(&opt.db_path);
|
|
|
|
let first_time_run = instance_uid.is_none();
|
2023-09-21 17:41:12 +02:00
|
|
|
let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4);
|
2022-10-18 12:45:06 +02:00
|
|
|
write_user_id(&opt.db_path, &instance_uid);
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build();
|
2022-05-25 14:36:00 +02:00
|
|
|
|
|
|
|
// if reqwest throws an error we won't be able to send analytics
|
|
|
|
if client.is_err() {
|
|
|
|
return super::MockAnalytics::new(opt);
|
|
|
|
}
|
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
let client =
|
|
|
|
HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string());
|
|
|
|
let user = User::UserId { user_id: instance_uid.to_string() };
|
2021-12-02 16:03:26 +01:00
|
|
|
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
|
|
|
|
|
2022-01-26 17:43:16 +01:00
|
|
|
// If Meilisearch is Launched for the first time:
|
2021-12-02 16:03:26 +01:00
|
|
|
// 1. Send an event Launched associated to the user `total_launch`.
|
|
|
|
// 2. Batch an event Launched with the real instance-id and send it in one hour.
|
|
|
|
if first_time_run {
|
|
|
|
let _ = batcher
|
|
|
|
.push(Track {
|
2022-10-20 18:00:07 +02:00
|
|
|
user: User::UserId { user_id: "total_launch".to_string() },
|
2021-12-02 16:03:26 +01:00
|
|
|
event: "Launched".to_string(),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
let _ = batcher.flush().await;
|
|
|
|
let _ = batcher
|
|
|
|
.push(Track {
|
|
|
|
user: user.clone(),
|
|
|
|
event: "Launched".to_string(),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
}
|
2021-10-28 16:28:41 +02:00
|
|
|
|
|
|
|
let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize
|
|
|
|
|
|
|
|
let segment = Box::new(Segment {
|
|
|
|
inbox,
|
|
|
|
user: user.clone(),
|
2021-10-27 18:16:13 +02:00
|
|
|
opt: opt.clone(),
|
|
|
|
batcher,
|
2021-10-28 16:28:41 +02:00
|
|
|
post_search_aggregator: SearchAggregator::default(),
|
2023-02-20 09:21:52 +01:00
|
|
|
post_multi_search_aggregator: MultiSearchAggregator::default(),
|
2023-04-26 17:08:55 +02:00
|
|
|
post_facet_search_aggregator: FacetSearchAggregator::default(),
|
2021-10-28 16:28:41 +02:00
|
|
|
get_search_aggregator: SearchAggregator::default(),
|
|
|
|
add_documents_aggregator: DocumentsAggregator::default(),
|
2022-11-28 16:27:41 +01:00
|
|
|
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
2021-10-28 16:28:41 +02:00
|
|
|
update_documents_aggregator: DocumentsAggregator::default(),
|
2023-05-09 19:52:11 +02:00
|
|
|
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
|
|
|
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
2024-05-27 10:54:12 +02:00
|
|
|
get_similar_aggregator: SimilarAggregator::default(),
|
|
|
|
post_similar_aggregator: SimilarAggregator::default(),
|
2021-10-27 18:16:13 +02:00
|
|
|
});
|
2023-01-24 18:09:03 +01:00
|
|
|
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2022-10-20 18:00:07 +02:00
|
|
|
let this = Self { instance_uid, sender, user: user.clone() };
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2022-10-18 12:45:06 +02:00
|
|
|
Arc::new(this)
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl super::Analytics for SegmentAnalytics {
|
2022-10-18 12:45:06 +02:00
|
|
|
fn instance_uid(&self) -> Option<&InstanceUid> {
|
|
|
|
Some(&self.instance_uid)
|
|
|
|
}
|
|
|
|
|
2021-10-29 15:58:06 +02:00
|
|
|
fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) {
|
2023-09-21 17:41:12 +02:00
|
|
|
let user_agent = request.map(extract_user_agents);
|
2021-10-28 12:52:53 +02:00
|
|
|
|
|
|
|
send["user-agent"] = json!(user_agent);
|
2021-10-28 16:28:41 +02:00
|
|
|
let event = Track {
|
|
|
|
user: self.user.clone(),
|
|
|
|
event: event_name.clone(),
|
|
|
|
properties: send,
|
|
|
|
..Default::default()
|
|
|
|
};
|
2023-09-21 17:41:12 +02:00
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
2021-10-29 15:58:06 +02:00
|
|
|
|
|
|
|
fn get_search(&self, aggregate: SearchAggregator) {
|
2022-10-20 18:00:07 +02:00
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2021-10-29 15:58:06 +02:00
|
|
|
fn post_search(&self, aggregate: SearchAggregator) {
|
2022-10-20 18:00:07 +02:00
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2024-05-27 10:54:12 +02:00
|
|
|
fn get_similar(&self, aggregate: SimilarAggregator) {
|
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate));
|
|
|
|
}
|
|
|
|
|
|
|
|
fn post_similar(&self, aggregate: SimilarAggregator) {
|
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate));
|
|
|
|
}
|
|
|
|
|
2023-04-26 17:08:55 +02:00
|
|
|
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
|
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
|
|
|
|
}
|
|
|
|
|
2023-02-20 09:21:52 +01:00
|
|
|
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
|
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
|
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
fn add_documents(
|
2021-10-29 15:58:06 +02:00
|
|
|
&self,
|
2021-10-27 18:16:13 +02:00
|
|
|
documents_query: &UpdateDocumentsQuery,
|
|
|
|
index_creation: bool,
|
|
|
|
request: &HttpRequest,
|
|
|
|
) {
|
2021-10-28 16:28:41 +02:00
|
|
|
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
|
2022-10-20 18:00:07 +02:00
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2022-11-28 16:27:41 +01:00
|
|
|
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest) {
|
|
|
|
let aggregate = DocumentsDeletionAggregator::from_query(kind, request);
|
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateDeleteDocuments(aggregate));
|
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
fn update_documents(
|
2021-10-29 15:58:06 +02:00
|
|
|
&self,
|
2021-10-27 18:16:13 +02:00
|
|
|
documents_query: &UpdateDocumentsQuery,
|
|
|
|
index_creation: bool,
|
|
|
|
request: &HttpRequest,
|
|
|
|
) {
|
2021-10-28 16:28:41 +02:00
|
|
|
let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request);
|
2022-10-20 18:00:07 +02:00
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
2022-11-28 16:27:41 +01:00
|
|
|
|
2023-05-09 19:52:11 +02:00
|
|
|
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
|
|
|
|
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
|
|
|
|
}
|
|
|
|
|
|
|
|
fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
|
|
|
|
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
|
|
|
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
|
|
|
|
}
|
2022-11-28 16:27:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// This structure represent the `infos` field we send in the analytics.
|
|
|
|
/// It's quite close to the `Opt` structure except all sensitive informations
|
|
|
|
/// have been simplified to a boolean.
|
|
|
|
/// It's send as-is in amplitude thus you should never update a name of the
|
|
|
|
/// struct without the approval of the PM.
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
|
|
struct Infos {
|
|
|
|
env: String,
|
2023-02-21 19:19:02 +01:00
|
|
|
experimental_enable_metrics: bool,
|
2024-03-26 16:43:40 +01:00
|
|
|
experimental_search_queue_size: usize,
|
2024-02-12 11:06:37 +01:00
|
|
|
experimental_logs_mode: LogMode,
|
2024-02-21 14:33:40 +01:00
|
|
|
experimental_replication_parameters: bool,
|
2024-02-05 13:29:01 +01:00
|
|
|
experimental_enable_logs_route: bool,
|
2023-05-15 11:23:58 +02:00
|
|
|
experimental_reduce_indexing_memory_usage: bool,
|
2023-12-12 10:55:33 +01:00
|
|
|
experimental_max_number_of_batched_tasks: usize,
|
2024-02-26 10:41:47 +01:00
|
|
|
gpu_enabled: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
db_path: bool,
|
|
|
|
import_dump: bool,
|
2022-12-01 16:24:02 +01:00
|
|
|
dump_dir: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
ignore_missing_dump: bool,
|
|
|
|
ignore_dump_if_db_exists: bool,
|
|
|
|
import_snapshot: bool,
|
2022-12-27 18:17:29 +01:00
|
|
|
schedule_snapshot: Option<u64>,
|
2022-11-28 16:27:41 +01:00
|
|
|
snapshot_dir: bool,
|
|
|
|
ignore_missing_snapshot: bool,
|
|
|
|
ignore_snapshot_if_db_exists: bool,
|
|
|
|
http_addr: bool,
|
|
|
|
http_payload_size_limit: Byte,
|
2023-12-04 10:38:01 +01:00
|
|
|
task_queue_webhook: bool,
|
2023-12-19 12:18:45 +01:00
|
|
|
task_webhook_authorization_header: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
log_level: String,
|
|
|
|
max_indexing_memory: MaxMemory,
|
|
|
|
max_indexing_threads: MaxThreads,
|
|
|
|
with_configuration_file: bool,
|
|
|
|
ssl_auth_path: bool,
|
|
|
|
ssl_cert_path: bool,
|
|
|
|
ssl_key_path: bool,
|
|
|
|
ssl_ocsp_path: bool,
|
|
|
|
ssl_require_auth: bool,
|
|
|
|
ssl_resumption: bool,
|
|
|
|
ssl_tickets: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<Opt> for Infos {
|
|
|
|
fn from(options: Opt) -> Self {
|
|
|
|
// We wants to decompose this whole struct by hand to be sure we don't forget
|
|
|
|
// to add analytics when we add a field in the Opt.
|
|
|
|
// Thus we must not insert `..` at the end.
|
|
|
|
let Opt {
|
|
|
|
db_path,
|
2023-02-21 19:19:02 +01:00
|
|
|
experimental_enable_metrics,
|
2024-03-26 16:43:40 +01:00
|
|
|
experimental_search_queue_size,
|
2024-02-12 11:06:37 +01:00
|
|
|
experimental_logs_mode,
|
2024-02-21 14:33:40 +01:00
|
|
|
experimental_replication_parameters,
|
2024-02-05 13:29:01 +01:00
|
|
|
experimental_enable_logs_route,
|
2023-05-15 11:23:58 +02:00
|
|
|
experimental_reduce_indexing_memory_usage,
|
2023-12-12 10:55:33 +01:00
|
|
|
experimental_max_number_of_batched_tasks,
|
2022-11-28 16:27:41 +01:00
|
|
|
http_addr,
|
|
|
|
master_key: _,
|
|
|
|
env,
|
2023-12-04 10:38:01 +01:00
|
|
|
task_webhook_url,
|
2023-12-19 12:18:45 +01:00
|
|
|
task_webhook_authorization_header,
|
2022-12-26 11:41:31 +01:00
|
|
|
max_index_size: _,
|
|
|
|
max_task_db_size: _,
|
2022-11-28 16:27:41 +01:00
|
|
|
http_payload_size_limit,
|
|
|
|
ssl_cert_path,
|
|
|
|
ssl_key_path,
|
|
|
|
ssl_auth_path,
|
|
|
|
ssl_ocsp_path,
|
|
|
|
ssl_require_auth,
|
|
|
|
ssl_resumption,
|
|
|
|
ssl_tickets,
|
|
|
|
import_snapshot,
|
|
|
|
ignore_missing_snapshot,
|
|
|
|
ignore_snapshot_if_db_exists,
|
|
|
|
snapshot_dir,
|
|
|
|
schedule_snapshot,
|
|
|
|
import_dump,
|
|
|
|
ignore_missing_dump,
|
|
|
|
ignore_dump_if_db_exists,
|
2022-12-01 16:24:02 +01:00
|
|
|
dump_dir,
|
2022-11-28 16:27:41 +01:00
|
|
|
log_level,
|
|
|
|
indexer_options,
|
|
|
|
config_file_path,
|
2023-09-21 17:01:05 +02:00
|
|
|
#[cfg(feature = "analytics")]
|
2022-11-28 16:27:41 +01:00
|
|
|
no_analytics: _,
|
|
|
|
} = options;
|
|
|
|
|
2022-12-27 18:17:29 +01:00
|
|
|
let schedule_snapshot = match schedule_snapshot {
|
|
|
|
ScheduleSnapshot::Disabled => None,
|
|
|
|
ScheduleSnapshot::Enabled(interval) => Some(interval),
|
|
|
|
};
|
|
|
|
|
2023-02-15 12:31:14 +01:00
|
|
|
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
|
|
|
|
indexer_options;
|
2022-11-28 16:27:41 +01:00
|
|
|
|
|
|
|
// We're going to override every sensible information.
|
|
|
|
// We consider information sensible if it contains a path, an address, or a key.
|
|
|
|
Self {
|
|
|
|
env,
|
2023-02-21 19:19:02 +01:00
|
|
|
experimental_enable_metrics,
|
2024-03-26 16:43:40 +01:00
|
|
|
experimental_search_queue_size,
|
2024-02-12 11:06:37 +01:00
|
|
|
experimental_logs_mode,
|
2024-02-21 14:33:40 +01:00
|
|
|
experimental_replication_parameters,
|
2024-02-05 13:29:01 +01:00
|
|
|
experimental_enable_logs_route,
|
2023-05-15 11:23:58 +02:00
|
|
|
experimental_reduce_indexing_memory_usage,
|
2024-02-26 10:41:47 +01:00
|
|
|
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
2022-11-28 16:27:41 +01:00
|
|
|
db_path: db_path != PathBuf::from("./data.ms"),
|
|
|
|
import_dump: import_dump.is_some(),
|
2022-12-01 16:24:02 +01:00
|
|
|
dump_dir: dump_dir != PathBuf::from("dumps/"),
|
2022-11-28 16:27:41 +01:00
|
|
|
ignore_missing_dump,
|
|
|
|
ignore_dump_if_db_exists,
|
|
|
|
import_snapshot: import_snapshot.is_some(),
|
|
|
|
schedule_snapshot,
|
|
|
|
snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"),
|
|
|
|
ignore_missing_snapshot,
|
|
|
|
ignore_snapshot_if_db_exists,
|
|
|
|
http_addr: http_addr != default_http_addr(),
|
|
|
|
http_payload_size_limit,
|
2023-12-12 10:55:33 +01:00
|
|
|
experimental_max_number_of_batched_tasks,
|
2023-12-04 10:38:01 +01:00
|
|
|
task_queue_webhook: task_webhook_url.is_some(),
|
2023-12-19 12:18:45 +01:00
|
|
|
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
2023-01-03 11:55:34 +01:00
|
|
|
log_level: log_level.to_string(),
|
2022-11-28 16:27:41 +01:00
|
|
|
max_indexing_memory,
|
|
|
|
max_indexing_threads,
|
|
|
|
with_configuration_file: config_file_path.is_some(),
|
|
|
|
ssl_auth_path: ssl_auth_path.is_some(),
|
|
|
|
ssl_cert_path: ssl_cert_path.is_some(),
|
|
|
|
ssl_key_path: ssl_key_path.is_some(),
|
|
|
|
ssl_ocsp_path: ssl_ocsp_path.is_some(),
|
|
|
|
ssl_require_auth,
|
|
|
|
ssl_resumption,
|
|
|
|
ssl_tickets,
|
|
|
|
}
|
|
|
|
}
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
pub struct Segment {
|
2021-10-29 15:58:06 +02:00
|
|
|
inbox: Receiver<AnalyticsMsg>,
|
2021-10-28 16:28:41 +02:00
|
|
|
user: User,
|
|
|
|
opt: Opt,
|
|
|
|
batcher: AutoBatcher,
|
|
|
|
get_search_aggregator: SearchAggregator,
|
|
|
|
post_search_aggregator: SearchAggregator,
|
2023-02-20 09:21:52 +01:00
|
|
|
post_multi_search_aggregator: MultiSearchAggregator,
|
2023-04-26 17:08:55 +02:00
|
|
|
post_facet_search_aggregator: FacetSearchAggregator,
|
2021-10-28 16:28:41 +02:00
|
|
|
add_documents_aggregator: DocumentsAggregator,
|
2022-11-28 16:27:41 +01:00
|
|
|
delete_documents_aggregator: DocumentsDeletionAggregator,
|
2021-10-28 16:28:41 +02:00
|
|
|
update_documents_aggregator: DocumentsAggregator,
|
2023-05-09 19:52:11 +02:00
|
|
|
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
|
|
|
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
2024-05-27 10:54:12 +02:00
|
|
|
get_similar_aggregator: SimilarAggregator,
|
|
|
|
post_similar_aggregator: SimilarAggregator,
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Segment {
|
|
|
|
fn compute_traits(opt: &Opt, stats: Stats) -> Value {
|
|
|
|
static FIRST_START_TIMESTAMP: Lazy<Instant> = Lazy::new(Instant::now);
|
|
|
|
static SYSTEM: Lazy<Value> = Lazy::new(|| {
|
2024-01-16 15:43:03 +01:00
|
|
|
let disks = Disks::new_with_refreshed_list();
|
2021-10-28 16:28:41 +02:00
|
|
|
let mut sys = System::new_all();
|
|
|
|
sys.refresh_all();
|
2024-01-16 15:43:03 +01:00
|
|
|
let kernel_version = System::kernel_version()
|
|
|
|
.and_then(|k| k.split_once('-').map(|(k, _)| k.to_string()));
|
2021-10-28 16:28:41 +02:00
|
|
|
json!({
|
2024-01-16 15:43:03 +01:00
|
|
|
"distribution": System::name(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"kernel_version": kernel_version,
|
2022-10-04 12:04:16 +02:00
|
|
|
"cores": sys.cpus().len(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"ram_size": sys.total_memory(),
|
2024-01-16 15:43:03 +01:00
|
|
|
"disk_size": disks.iter().map(|disk| disk.total_space()).max(),
|
2021-10-28 16:28:41 +02:00
|
|
|
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(),
|
|
|
|
})
|
|
|
|
});
|
2022-10-20 18:00:07 +02:00
|
|
|
let number_of_documents =
|
|
|
|
stats.indexes.values().map(|index| index.number_of_documents).collect::<Vec<u64>>();
|
2021-10-28 16:28:41 +02:00
|
|
|
|
|
|
|
json!({
|
|
|
|
"start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day
|
|
|
|
"system": *SYSTEM,
|
|
|
|
"stats": {
|
|
|
|
"database_size": stats.database_size,
|
|
|
|
"indexes_number": stats.indexes.len(),
|
|
|
|
"documents_number": number_of_documents,
|
|
|
|
},
|
2022-11-28 16:27:41 +01:00
|
|
|
"infos": Infos::from(opt.clone()),
|
2021-10-28 16:28:41 +02:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-04-06 13:38:47 +02:00
|
|
|
async fn run(
|
|
|
|
mut self,
|
|
|
|
index_scheduler: Arc<IndexScheduler>,
|
|
|
|
auth_controller: Arc<AuthController>,
|
|
|
|
) {
|
2022-10-24 13:56:26 +02:00
|
|
|
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
|
|
|
// The first batch must be sent after one hour.
|
2021-12-02 16:03:26 +01:00
|
|
|
let mut interval =
|
|
|
|
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
|
2021-10-28 16:47:56 +02:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
loop {
|
|
|
|
select! {
|
|
|
|
_ = interval.tick() => {
|
2023-01-24 18:09:03 +01:00
|
|
|
self.tick(index_scheduler.clone(), auth_controller.clone()).await;
|
2021-10-28 16:28:41 +02:00
|
|
|
},
|
|
|
|
msg = self.inbox.recv() => {
|
|
|
|
match msg {
|
2021-10-29 15:58:06 +02:00
|
|
|
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
|
|
|
|
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
|
|
|
|
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
|
2023-02-20 09:21:52 +01:00
|
|
|
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
|
2023-04-26 17:08:55 +02:00
|
|
|
Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
|
2021-10-29 15:58:06 +02:00
|
|
|
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
2022-11-28 16:27:41 +01:00
|
|
|
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
2021-10-29 15:58:06 +02:00
|
|
|
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
2023-05-09 19:52:11 +02:00
|
|
|
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
|
|
|
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
2024-05-27 10:54:12 +02:00
|
|
|
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
|
|
|
|
Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg),
|
2021-10-28 16:28:41 +02:00
|
|
|
None => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-24 18:09:03 +01:00
|
|
|
async fn tick(
|
|
|
|
&mut self,
|
|
|
|
index_scheduler: Arc<IndexScheduler>,
|
2023-04-06 13:38:47 +02:00
|
|
|
auth_controller: Arc<AuthController>,
|
2023-01-24 18:09:03 +01:00
|
|
|
) {
|
|
|
|
if let Ok(stats) =
|
2023-04-06 13:38:47 +02:00
|
|
|
create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
|
2023-01-24 18:09:03 +01:00
|
|
|
{
|
2023-02-07 16:01:12 +01:00
|
|
|
// Replace the version number with the prototype name if any.
|
2024-02-27 18:34:52 +01:00
|
|
|
let version = if let Some(prototype) = build_info::DescribeResult::from_build()
|
|
|
|
.and_then(|describe| describe.as_prototype())
|
|
|
|
{
|
2023-02-07 16:01:12 +01:00
|
|
|
prototype
|
|
|
|
} else {
|
|
|
|
env!("CARGO_PKG_VERSION")
|
|
|
|
};
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self
|
|
|
|
.batcher
|
|
|
|
.push(Identify {
|
|
|
|
context: Some(json!({
|
|
|
|
"app": {
|
2023-02-07 16:01:12 +01:00
|
|
|
"version": version.to_string(),
|
2021-10-28 16:28:41 +02:00
|
|
|
},
|
|
|
|
})),
|
|
|
|
user: self.user.clone(),
|
|
|
|
traits: Self::compute_traits(&self.opt, stats),
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
.await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
|
|
|
|
let Segment {
|
|
|
|
inbox: _,
|
|
|
|
opt: _,
|
|
|
|
batcher: _,
|
|
|
|
user,
|
|
|
|
get_search_aggregator,
|
|
|
|
post_search_aggregator,
|
|
|
|
post_multi_search_aggregator,
|
|
|
|
post_facet_search_aggregator,
|
|
|
|
add_documents_aggregator,
|
|
|
|
delete_documents_aggregator,
|
|
|
|
update_documents_aggregator,
|
|
|
|
get_fetch_documents_aggregator,
|
|
|
|
post_fetch_documents_aggregator,
|
2024-05-27 10:54:12 +02:00
|
|
|
get_similar_aggregator,
|
|
|
|
post_similar_aggregator,
|
2023-04-26 17:08:55 +02:00
|
|
|
} = self;
|
|
|
|
|
|
|
|
if let Some(get_search) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(get_search_aggregator).into_event(user, "Documents Searched GET")
|
2023-04-26 17:08:55 +02:00
|
|
|
{
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self.batcher.push(get_search).await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
if let Some(post_search) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(post_search_aggregator).into_event(user, "Documents Searched POST")
|
2023-04-26 17:08:55 +02:00
|
|
|
{
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self.batcher.push(post_search).await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
if let Some(post_multi_search) = take(post_multi_search_aggregator)
|
2023-09-21 17:41:12 +02:00
|
|
|
.into_event(user, "Documents Searched by Multi-Search POST")
|
2023-04-26 17:08:55 +02:00
|
|
|
{
|
2023-02-20 09:21:52 +01:00
|
|
|
let _ = self.batcher.push(post_multi_search).await;
|
|
|
|
}
|
2023-06-27 17:38:24 +02:00
|
|
|
if let Some(post_facet_search) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(post_facet_search_aggregator).into_event(user, "Facet Searched POST")
|
2023-04-26 17:08:55 +02:00
|
|
|
{
|
|
|
|
let _ = self.batcher.push(post_facet_search).await;
|
|
|
|
}
|
|
|
|
if let Some(add_documents) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(add_documents_aggregator).into_event(user, "Documents Added")
|
2023-04-26 17:08:55 +02:00
|
|
|
{
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self.batcher.push(add_documents).await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
if let Some(delete_documents) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(delete_documents_aggregator).into_event(user, "Documents Deleted")
|
2023-04-26 17:08:55 +02:00
|
|
|
{
|
2022-11-28 16:27:41 +01:00
|
|
|
let _ = self.batcher.push(delete_documents).await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
if let Some(update_documents) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(update_documents_aggregator).into_event(user, "Documents Updated")
|
2023-04-26 17:08:55 +02:00
|
|
|
{
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self.batcher.push(update_documents).await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
if let Some(get_fetch_documents) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET")
|
2023-06-07 17:02:41 +02:00
|
|
|
{
|
2023-05-09 19:52:11 +02:00
|
|
|
let _ = self.batcher.push(get_fetch_documents).await;
|
|
|
|
}
|
2023-04-26 17:08:55 +02:00
|
|
|
if let Some(post_fetch_documents) =
|
2023-09-21 17:41:12 +02:00
|
|
|
take(post_fetch_documents_aggregator).into_event(user, "Documents Fetched POST")
|
2023-06-07 17:02:41 +02:00
|
|
|
{
|
2023-05-09 19:52:11 +02:00
|
|
|
let _ = self.batcher.push(post_fetch_documents).await;
|
|
|
|
}
|
2024-05-27 10:54:12 +02:00
|
|
|
|
|
|
|
if let Some(get_similar_documents) =
|
|
|
|
take(get_similar_aggregator).into_event(user, "Similar GET")
|
|
|
|
{
|
|
|
|
let _ = self.batcher.push(get_similar_documents).await;
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(post_similar_documents) =
|
|
|
|
take(post_similar_aggregator).into_event(user, "Similar POST")
|
|
|
|
{
|
|
|
|
let _ = self.batcher.push(post_similar_documents).await;
|
|
|
|
}
|
2021-10-28 16:28:41 +02:00
|
|
|
let _ = self.batcher.flush().await;
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Default)]
|
2021-10-28 16:28:41 +02:00
|
|
|
pub struct SearchAggregator {
|
2022-02-14 15:32:41 +01:00
|
|
|
timestamp: Option<OffsetDateTime>,
|
2022-01-20 19:01:37 +01:00
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
// context
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
|
|
|
// requests
|
|
|
|
total_received: usize,
|
|
|
|
total_succeeded: usize,
|
2024-03-05 11:21:46 +01:00
|
|
|
total_degraded: usize,
|
2024-03-26 18:01:27 +01:00
|
|
|
total_used_negative_operator: usize,
|
2021-11-10 16:52:24 +01:00
|
|
|
time_spent: BinaryHeap<usize>,
|
2021-10-27 18:16:13 +02:00
|
|
|
|
|
|
|
// sort
|
|
|
|
sort_with_geo_point: bool,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
2021-10-27 18:16:13 +02:00
|
|
|
sort_sum_of_criteria_terms: usize,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by one
|
2021-10-27 18:16:13 +02:00
|
|
|
sort_total_number_of_criteria: usize,
|
|
|
|
|
|
|
|
// filter
|
|
|
|
filter_with_geo_radius: bool,
|
2023-02-22 15:35:26 +01:00
|
|
|
filter_with_geo_bounding_box: bool,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
2021-10-27 18:16:13 +02:00
|
|
|
filter_sum_of_criteria_terms: usize,
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a request has a filter, this field must be incremented by one
|
2021-10-27 18:16:13 +02:00
|
|
|
filter_total_number_of_criteria: usize,
|
|
|
|
used_syntax: HashMap<String, usize>,
|
|
|
|
|
2023-07-10 13:12:12 +02:00
|
|
|
// attributes_to_search_on
|
|
|
|
// every time a search is done using attributes_to_search_on
|
2023-07-13 11:34:50 +02:00
|
|
|
attributes_to_search_on_total_number_of_uses: usize,
|
2023-07-10 13:12:12 +02:00
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
// q
|
2021-12-02 16:03:26 +01:00
|
|
|
// The maximum number of terms in a q request
|
|
|
|
max_terms_number: usize,
|
2021-10-27 18:16:13 +02:00
|
|
|
|
2023-06-27 10:19:30 +02:00
|
|
|
// vector
|
|
|
|
// The maximum number of floats in a vector request
|
|
|
|
max_vector_size: usize,
|
2023-12-12 21:19:48 +01:00
|
|
|
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
|
|
|
semantic_ratio: bool,
|
|
|
|
// Whether a non-default embedder was specified
|
|
|
|
embedder: bool,
|
|
|
|
hybrid: bool,
|
2023-06-27 10:19:30 +02:00
|
|
|
|
2022-09-20 16:39:35 +02:00
|
|
|
// every time a search is done, we increment the counter linked to the used settings
|
2022-08-23 16:30:56 +02:00
|
|
|
matching_strategy: HashMap<String, usize>,
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
// pagination
|
|
|
|
max_limit: usize,
|
|
|
|
max_offset: usize,
|
2022-08-08 15:20:03 +02:00
|
|
|
finite_pagination: usize,
|
2022-04-11 14:18:47 +02:00
|
|
|
|
|
|
|
// formatting
|
2022-11-28 16:27:41 +01:00
|
|
|
max_attributes_to_retrieve: usize,
|
|
|
|
max_attributes_to_highlight: usize,
|
2022-04-11 14:18:47 +02:00
|
|
|
highlight_pre_tag: bool,
|
|
|
|
highlight_post_tag: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
max_attributes_to_crop: usize,
|
2022-04-11 14:18:47 +02:00
|
|
|
crop_marker: bool,
|
2022-05-18 13:17:56 +02:00
|
|
|
show_matches_position: bool,
|
2022-04-11 14:18:47 +02:00
|
|
|
crop_length: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
|
|
|
|
// facets
|
|
|
|
facets_sum_of_terms: usize,
|
|
|
|
facets_total_number_of_facets: usize,
|
2023-06-19 09:40:52 +02:00
|
|
|
|
|
|
|
// scoring
|
|
|
|
show_ranking_score: bool,
|
|
|
|
show_ranking_score_details: bool,
|
2024-05-30 11:22:12 +02:00
|
|
|
ranking_score_threshold: bool,
|
2021-10-27 18:16:13 +02:00
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
impl SearchAggregator {
|
2023-09-21 17:41:12 +02:00
|
|
|
#[allow(clippy::field_reassign_with_default)]
|
2021-10-28 16:28:41 +02:00
|
|
|
pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
|
2023-09-11 16:19:09 +02:00
|
|
|
let SearchQuery {
|
|
|
|
q,
|
|
|
|
vector,
|
|
|
|
offset,
|
|
|
|
limit,
|
|
|
|
page,
|
|
|
|
hits_per_page,
|
|
|
|
attributes_to_retrieve: _,
|
2024-05-29 17:22:58 +02:00
|
|
|
retrieve_vectors: _,
|
2023-09-11 16:19:09 +02:00
|
|
|
attributes_to_crop: _,
|
|
|
|
crop_length,
|
|
|
|
attributes_to_highlight: _,
|
|
|
|
show_matches_position,
|
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
|
|
|
filter,
|
|
|
|
sort,
|
|
|
|
facets: _,
|
|
|
|
highlight_pre_tag,
|
|
|
|
highlight_post_tag,
|
|
|
|
crop_marker,
|
|
|
|
matching_strategy,
|
|
|
|
attributes_to_search_on,
|
2023-12-12 21:19:48 +01:00
|
|
|
hybrid,
|
2024-04-11 19:04:43 +02:00
|
|
|
ranking_score_threshold,
|
2023-09-11 16:19:09 +02:00
|
|
|
} = query;
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
let mut ret = Self::default();
|
2022-02-14 15:32:41 +01:00
|
|
|
ret.timestamp = Some(OffsetDateTime::now_utc());
|
2022-01-20 19:01:37 +01:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
ret.total_received = 1;
|
|
|
|
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
|
|
|
|
2023-09-11 16:19:09 +02:00
|
|
|
if let Some(ref sort) = sort {
|
2021-10-28 16:28:41 +02:00
|
|
|
ret.sort_total_number_of_criteria = 1;
|
|
|
|
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
|
|
|
|
ret.sort_sum_of_criteria_terms = sort.len();
|
|
|
|
}
|
|
|
|
|
2023-09-11 16:19:09 +02:00
|
|
|
if let Some(ref filter) = filter {
|
2021-10-28 16:28:41 +02:00
|
|
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
|
|
|
ret.filter_total_number_of_criteria = 1;
|
|
|
|
|
|
|
|
let syntax = match filter {
|
|
|
|
Value::String(_) => "string".to_string(),
|
|
|
|
Value::Array(values) => {
|
2022-10-20 18:00:07 +02:00
|
|
|
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
2021-10-28 16:28:41 +02:00
|
|
|
"mixed".to_string()
|
|
|
|
} else {
|
|
|
|
"array".to_string()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => "none".to_string(),
|
|
|
|
};
|
|
|
|
// convert the string to a HashMap
|
|
|
|
ret.used_syntax.insert(syntax, 1);
|
|
|
|
|
|
|
|
let stringified_filters = filter.to_string();
|
|
|
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
2023-02-22 15:35:26 +01:00
|
|
|
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
2021-10-28 16:28:41 +02:00
|
|
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
|
|
|
}
|
|
|
|
|
2023-07-10 13:12:12 +02:00
|
|
|
// attributes_to_search_on
|
2023-09-21 17:41:12 +02:00
|
|
|
if attributes_to_search_on.is_some() {
|
2023-07-13 11:34:50 +02:00
|
|
|
ret.attributes_to_search_on_total_number_of_uses = 1;
|
2023-07-10 13:12:12 +02:00
|
|
|
}
|
|
|
|
|
2023-09-11 16:19:09 +02:00
|
|
|
if let Some(ref q) = q {
|
2021-12-02 16:03:26 +01:00
|
|
|
ret.max_terms_number = q.split_whitespace().count();
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
2023-12-14 12:42:37 +01:00
|
|
|
if let Some(ref vector) = vector {
|
2023-06-27 10:19:30 +02:00
|
|
|
ret.max_vector_size = vector.len();
|
|
|
|
}
|
|
|
|
|
2022-10-26 18:08:29 +02:00
|
|
|
if query.is_finite_pagination() {
|
2023-09-11 16:19:09 +02:00
|
|
|
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
2022-10-24 13:56:26 +02:00
|
|
|
ret.max_limit = limit;
|
2023-09-11 16:19:09 +02:00
|
|
|
ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
|
2022-08-08 15:20:03 +02:00
|
|
|
ret.finite_pagination = 1;
|
2022-07-11 17:27:07 +02:00
|
|
|
} else {
|
2023-09-11 16:19:09 +02:00
|
|
|
ret.max_limit = *limit;
|
|
|
|
ret.max_offset = *offset;
|
2022-08-08 15:20:03 +02:00
|
|
|
ret.finite_pagination = 0;
|
2022-07-11 17:27:07 +02:00
|
|
|
}
|
|
|
|
|
2023-09-11 16:19:09 +02:00
|
|
|
ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
|
2022-08-23 16:30:56 +02:00
|
|
|
|
2023-09-11 16:19:09 +02:00
|
|
|
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
|
|
|
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
|
|
|
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
|
|
|
ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
|
|
|
|
ret.show_matches_position = *show_matches_position;
|
2022-04-11 14:18:47 +02:00
|
|
|
|
2023-09-11 16:19:09 +02:00
|
|
|
ret.show_ranking_score = *show_ranking_score;
|
|
|
|
ret.show_ranking_score_details = *show_ranking_score_details;
|
2024-05-30 11:22:12 +02:00
|
|
|
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
2023-06-19 09:40:52 +02:00
|
|
|
|
2023-12-12 21:19:48 +01:00
|
|
|
if let Some(hybrid) = hybrid {
|
|
|
|
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
|
|
|
ret.embedder = hybrid.embedder.is_some();
|
|
|
|
ret.hybrid = true;
|
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
ret
|
|
|
|
}
|
|
|
|
|
2021-11-02 12:38:01 +01:00
|
|
|
pub fn succeed(&mut self, result: &SearchResult) {
|
2023-09-11 16:19:09 +02:00
|
|
|
let SearchResult {
|
|
|
|
hits: _,
|
|
|
|
query: _,
|
|
|
|
processing_time_ms,
|
|
|
|
hits_info: _,
|
2024-04-03 09:35:07 +02:00
|
|
|
semantic_hit_count: _,
|
2023-09-11 16:19:09 +02:00
|
|
|
facet_distribution: _,
|
|
|
|
facet_stats: _,
|
2024-03-05 11:21:46 +01:00
|
|
|
degraded,
|
2024-03-26 18:01:27 +01:00
|
|
|
used_negative_operator,
|
2023-09-11 16:19:09 +02:00
|
|
|
} = result;
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
2024-03-05 11:21:46 +01:00
|
|
|
if *degraded {
|
|
|
|
self.total_degraded = self.total_degraded.saturating_add(1);
|
|
|
|
}
|
2024-03-26 18:01:27 +01:00
|
|
|
if *used_negative_operator {
|
|
|
|
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
|
|
|
|
}
|
2023-09-11 16:19:09 +02:00
|
|
|
self.time_spent.push(*processing_time_ms as usize);
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Aggregate one [SearchAggregator] into another.
|
|
|
|
pub fn aggregate(&mut self, mut other: Self) {
|
2023-09-11 16:19:09 +02:00
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
ref mut time_spent,
|
|
|
|
sort_with_geo_point,
|
|
|
|
sort_sum_of_criteria_terms,
|
|
|
|
sort_total_number_of_criteria,
|
|
|
|
filter_with_geo_radius,
|
|
|
|
filter_with_geo_bounding_box,
|
|
|
|
filter_sum_of_criteria_terms,
|
|
|
|
filter_total_number_of_criteria,
|
|
|
|
used_syntax,
|
|
|
|
attributes_to_search_on_total_number_of_uses,
|
|
|
|
max_terms_number,
|
|
|
|
max_vector_size,
|
|
|
|
matching_strategy,
|
|
|
|
max_limit,
|
|
|
|
max_offset,
|
|
|
|
finite_pagination,
|
|
|
|
max_attributes_to_retrieve,
|
|
|
|
max_attributes_to_highlight,
|
|
|
|
highlight_pre_tag,
|
|
|
|
highlight_post_tag,
|
|
|
|
max_attributes_to_crop,
|
|
|
|
crop_marker,
|
|
|
|
show_matches_position,
|
|
|
|
crop_length,
|
|
|
|
facets_sum_of_terms,
|
|
|
|
facets_total_number_of_facets,
|
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
2023-12-12 21:19:48 +01:00
|
|
|
semantic_ratio,
|
|
|
|
embedder,
|
|
|
|
hybrid,
|
2024-03-05 11:21:46 +01:00
|
|
|
total_degraded,
|
2024-03-26 18:01:27 +01:00
|
|
|
total_used_negative_operator,
|
2024-05-30 11:22:12 +02:00
|
|
|
ranking_score_threshold,
|
2023-09-11 16:19:09 +02:00
|
|
|
} = other;
|
|
|
|
|
2022-01-20 19:01:37 +01:00
|
|
|
if self.timestamp.is_none() {
|
2023-09-11 16:19:09 +02:00
|
|
|
self.timestamp = timestamp;
|
2022-01-20 19:01:37 +01:00
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
// context
|
2023-09-11 16:19:09 +02:00
|
|
|
for user_agent in user_agents.into_iter() {
|
2021-10-28 16:28:41 +02:00
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
2022-11-28 16:27:41 +01:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
// request
|
2023-09-11 16:19:09 +02:00
|
|
|
self.total_received = self.total_received.saturating_add(total_received);
|
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
2024-03-05 11:21:46 +01:00
|
|
|
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
|
2024-03-26 18:01:27 +01:00
|
|
|
self.total_used_negative_operator =
|
|
|
|
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
|
2023-09-11 16:19:09 +02:00
|
|
|
self.time_spent.append(time_spent);
|
2022-11-28 16:27:41 +01:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
// sort
|
2023-09-11 16:19:09 +02:00
|
|
|
self.sort_with_geo_point |= sort_with_geo_point;
|
2022-10-20 18:00:07 +02:00
|
|
|
self.sort_sum_of_criteria_terms =
|
2023-09-11 16:19:09 +02:00
|
|
|
self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
|
2022-10-20 18:00:07 +02:00
|
|
|
self.sort_total_number_of_criteria =
|
2023-09-11 16:19:09 +02:00
|
|
|
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
|
2022-11-28 16:27:41 +01:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
// filter
|
2023-09-11 16:19:09 +02:00
|
|
|
self.filter_with_geo_radius |= filter_with_geo_radius;
|
|
|
|
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
2022-10-20 18:00:07 +02:00
|
|
|
self.filter_sum_of_criteria_terms =
|
2023-09-11 16:19:09 +02:00
|
|
|
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
|
|
|
self.filter_total_number_of_criteria =
|
|
|
|
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
|
|
|
for (key, value) in used_syntax.into_iter() {
|
2021-12-02 16:03:26 +01:00
|
|
|
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
|
|
|
*used_syntax = used_syntax.saturating_add(value);
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
2023-07-10 13:12:12 +02:00
|
|
|
|
|
|
|
// attributes_to_search_on
|
2023-07-13 11:36:03 +02:00
|
|
|
self.attributes_to_search_on_total_number_of_uses = self
|
|
|
|
.attributes_to_search_on_total_number_of_uses
|
2023-09-11 16:19:09 +02:00
|
|
|
.saturating_add(attributes_to_search_on_total_number_of_uses);
|
2023-07-10 13:12:12 +02:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
// q
|
2023-09-11 16:19:09 +02:00
|
|
|
self.max_terms_number = self.max_terms_number.max(max_terms_number);
|
2022-08-23 16:30:56 +02:00
|
|
|
|
2023-07-10 16:50:37 +02:00
|
|
|
// vector
|
2023-09-11 16:19:09 +02:00
|
|
|
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
2023-12-12 21:19:48 +01:00
|
|
|
self.semantic_ratio |= semantic_ratio;
|
|
|
|
self.hybrid |= hybrid;
|
|
|
|
self.embedder |= embedder;
|
2023-07-10 16:50:37 +02:00
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
// pagination
|
2023-09-11 16:19:09 +02:00
|
|
|
self.max_limit = self.max_limit.max(max_limit);
|
|
|
|
self.max_offset = self.max_offset.max(max_offset);
|
|
|
|
self.finite_pagination += finite_pagination;
|
2022-04-11 14:18:47 +02:00
|
|
|
|
2022-11-28 16:27:41 +01:00
|
|
|
// formatting
|
|
|
|
self.max_attributes_to_retrieve =
|
2023-09-11 16:19:09 +02:00
|
|
|
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
2022-11-28 16:27:41 +01:00
|
|
|
self.max_attributes_to_highlight =
|
2023-09-11 16:19:09 +02:00
|
|
|
self.max_attributes_to_highlight.max(max_attributes_to_highlight);
|
|
|
|
self.highlight_pre_tag |= highlight_pre_tag;
|
|
|
|
self.highlight_post_tag |= highlight_post_tag;
|
|
|
|
self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
|
|
|
|
self.crop_marker |= crop_marker;
|
|
|
|
self.show_matches_position |= show_matches_position;
|
|
|
|
self.crop_length |= crop_length;
|
2022-11-28 16:27:41 +01:00
|
|
|
|
|
|
|
// facets
|
2023-09-11 16:19:09 +02:00
|
|
|
self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
|
2022-11-28 16:27:41 +01:00
|
|
|
self.facets_total_number_of_facets =
|
2023-09-11 16:19:09 +02:00
|
|
|
self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
|
2022-11-28 16:27:41 +01:00
|
|
|
|
|
|
|
// matching strategy
|
2023-09-11 16:19:09 +02:00
|
|
|
for (key, value) in matching_strategy.into_iter() {
|
2022-11-28 16:27:41 +01:00
|
|
|
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
|
|
|
|
*matching_strategy = matching_strategy.saturating_add(value);
|
|
|
|
}
|
2023-06-19 09:40:52 +02:00
|
|
|
|
|
|
|
// scoring
|
2023-09-11 16:19:09 +02:00
|
|
|
self.show_ranking_score |= show_ranking_score;
|
|
|
|
self.show_ranking_score_details |= show_ranking_score_details;
|
2024-05-30 11:22:12 +02:00
|
|
|
self.ranking_score_threshold |= ranking_score_threshold;
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
2021-11-10 16:52:24 +01:00
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
2023-09-11 16:19:09 +02:00
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
time_spent,
|
|
|
|
sort_with_geo_point,
|
|
|
|
sort_sum_of_criteria_terms,
|
|
|
|
sort_total_number_of_criteria,
|
|
|
|
filter_with_geo_radius,
|
|
|
|
filter_with_geo_bounding_box,
|
|
|
|
filter_sum_of_criteria_terms,
|
|
|
|
filter_total_number_of_criteria,
|
|
|
|
used_syntax,
|
|
|
|
attributes_to_search_on_total_number_of_uses,
|
|
|
|
max_terms_number,
|
|
|
|
max_vector_size,
|
|
|
|
matching_strategy,
|
|
|
|
max_limit,
|
|
|
|
max_offset,
|
|
|
|
finite_pagination,
|
|
|
|
max_attributes_to_retrieve,
|
|
|
|
max_attributes_to_highlight,
|
|
|
|
highlight_pre_tag,
|
|
|
|
highlight_post_tag,
|
|
|
|
max_attributes_to_crop,
|
|
|
|
crop_marker,
|
|
|
|
show_matches_position,
|
|
|
|
crop_length,
|
|
|
|
facets_sum_of_terms,
|
|
|
|
facets_total_number_of_facets,
|
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
2023-12-12 21:19:48 +01:00
|
|
|
semantic_ratio,
|
|
|
|
embedder,
|
|
|
|
hybrid,
|
2024-03-05 11:21:46 +01:00
|
|
|
total_degraded,
|
2024-03-26 18:01:27 +01:00
|
|
|
total_used_negative_operator,
|
2024-05-30 11:22:12 +02:00
|
|
|
ranking_score_threshold,
|
2023-09-11 16:19:09 +02:00
|
|
|
} = self;
|
|
|
|
|
2023-09-11 19:34:33 +02:00
|
|
|
if total_received == 0 {
|
2021-10-27 18:16:13 +02:00
|
|
|
None
|
|
|
|
} else {
|
2021-11-10 16:52:24 +01:00
|
|
|
// we get all the values in a sorted manner
|
2023-09-11 16:19:09 +02:00
|
|
|
let time_spent = time_spent.into_sorted_vec();
|
2023-07-05 17:53:09 +02:00
|
|
|
// the index of the 99th percentage of value
|
|
|
|
let percentile_99th = time_spent.len() * 99 / 100;
|
2022-06-11 17:21:05 +02:00
|
|
|
// We are only interested by the slowest value of the 99th fastest results
|
2023-07-05 17:53:09 +02:00
|
|
|
let time_spent = time_spent.get(percentile_99th);
|
2021-10-27 18:16:13 +02:00
|
|
|
|
|
|
|
let properties = json!({
|
2023-09-11 16:19:09 +02:00
|
|
|
"user-agent": user_agents,
|
2021-10-27 18:16:13 +02:00
|
|
|
"requests": {
|
2023-07-05 17:53:09 +02:00
|
|
|
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
2023-09-11 16:19:09 +02:00
|
|
|
"total_succeeded": total_succeeded,
|
|
|
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
|
|
"total_received": total_received,
|
2024-03-05 11:21:46 +01:00
|
|
|
"total_degraded": total_degraded,
|
2024-03-26 18:01:27 +01:00
|
|
|
"total_used_negative_operator": total_used_negative_operator,
|
2021-10-27 18:16:13 +02:00
|
|
|
},
|
|
|
|
"sort": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"with_geoPoint": sort_with_geo_point,
|
|
|
|
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
|
2021-10-27 18:16:13 +02:00
|
|
|
},
|
|
|
|
"filter": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"with_geoRadius": filter_with_geo_radius,
|
|
|
|
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
|
|
|
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
|
|
|
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
2021-10-27 18:16:13 +02:00
|
|
|
},
|
2023-07-10 13:12:12 +02:00
|
|
|
"attributes_to_search_on": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"total_number_of_uses": attributes_to_search_on_total_number_of_uses,
|
2023-07-10 13:12:12 +02:00
|
|
|
},
|
2021-10-27 18:16:13 +02:00
|
|
|
"q": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"max_terms_number": max_terms_number,
|
2021-10-27 18:16:13 +02:00
|
|
|
},
|
2023-07-10 16:50:37 +02:00
|
|
|
"vector": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"max_vector_size": max_vector_size,
|
2023-07-10 16:50:37 +02:00
|
|
|
},
|
2023-12-12 21:19:48 +01:00
|
|
|
"hybrid": {
|
|
|
|
"enabled": hybrid,
|
|
|
|
"semantic_ratio": semantic_ratio,
|
|
|
|
"embedder": embedder,
|
|
|
|
},
|
2021-10-27 18:16:13 +02:00
|
|
|
"pagination": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"max_limit": max_limit,
|
|
|
|
"max_offset": max_offset,
|
|
|
|
"most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
|
2021-10-27 18:16:13 +02:00
|
|
|
},
|
2022-04-11 14:18:47 +02:00
|
|
|
"formatting": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
|
|
|
"max_attributes_to_highlight": max_attributes_to_highlight,
|
|
|
|
"highlight_pre_tag": highlight_pre_tag,
|
|
|
|
"highlight_post_tag": highlight_post_tag,
|
|
|
|
"max_attributes_to_crop": max_attributes_to_crop,
|
|
|
|
"crop_marker": crop_marker,
|
|
|
|
"show_matches_position": show_matches_position,
|
|
|
|
"crop_length": crop_length,
|
2022-04-11 14:18:47 +02:00
|
|
|
},
|
2022-11-28 16:27:41 +01:00
|
|
|
"facets": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
|
2022-11-28 16:27:41 +01:00
|
|
|
},
|
|
|
|
"matching_strategy": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
2023-06-19 09:40:52 +02:00
|
|
|
},
|
|
|
|
"scoring": {
|
2023-09-11 16:19:09 +02:00
|
|
|
"show_ranking_score": show_ranking_score,
|
|
|
|
"show_ranking_score_details": show_ranking_score_details,
|
2024-05-30 11:22:12 +02:00
|
|
|
"ranking_score_threshold": ranking_score_threshold,
|
2023-06-19 09:40:52 +02:00
|
|
|
},
|
2021-10-27 18:16:13 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
Some(Track {
|
2023-09-21 17:41:12 +02:00
|
|
|
timestamp,
|
2021-10-27 18:16:13 +02:00
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
|
|
|
properties,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-20 09:21:52 +01:00
|
|
|
#[derive(Default)]
|
|
|
|
pub struct MultiSearchAggregator {
|
|
|
|
timestamp: Option<OffsetDateTime>,
|
|
|
|
|
|
|
|
// requests
|
|
|
|
total_received: usize,
|
|
|
|
total_succeeded: usize,
|
|
|
|
|
|
|
|
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
|
|
|
|
total_distinct_index_count: usize,
|
|
|
|
// number of queries with a single index, use with total_received to compute a proportion
|
|
|
|
total_single_index: usize,
|
|
|
|
|
|
|
|
// sum of the number of search queries in the requests, use with total_received to compute an average
|
|
|
|
total_search_count: usize,
|
|
|
|
|
2023-07-10 11:45:43 +02:00
|
|
|
// scoring
|
|
|
|
show_ranking_score: bool,
|
|
|
|
show_ranking_score_details: bool,
|
|
|
|
|
2023-02-20 09:21:52 +01:00
|
|
|
// context
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl MultiSearchAggregator {
|
|
|
|
pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
|
|
|
|
let timestamp = Some(OffsetDateTime::now_utc());
|
|
|
|
|
|
|
|
let user_agents = extract_user_agents(request).into_iter().collect();
|
|
|
|
|
2023-09-11 19:34:33 +02:00
|
|
|
let distinct_indexes: HashSet<_> = query
|
|
|
|
.iter()
|
|
|
|
.map(|query| {
|
|
|
|
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
|
|
|
let SearchQueryWithIndex {
|
|
|
|
index_uid,
|
|
|
|
q: _,
|
|
|
|
vector: _,
|
|
|
|
offset: _,
|
|
|
|
limit: _,
|
|
|
|
page: _,
|
|
|
|
hits_per_page: _,
|
|
|
|
attributes_to_retrieve: _,
|
2024-05-29 17:22:58 +02:00
|
|
|
retrieve_vectors: _,
|
2023-09-11 19:34:33 +02:00
|
|
|
attributes_to_crop: _,
|
|
|
|
crop_length: _,
|
|
|
|
attributes_to_highlight: _,
|
|
|
|
show_ranking_score: _,
|
|
|
|
show_ranking_score_details: _,
|
|
|
|
show_matches_position: _,
|
|
|
|
filter: _,
|
|
|
|
sort: _,
|
|
|
|
facets: _,
|
|
|
|
highlight_pre_tag: _,
|
|
|
|
highlight_post_tag: _,
|
|
|
|
crop_marker: _,
|
|
|
|
matching_strategy: _,
|
|
|
|
attributes_to_search_on: _,
|
2023-12-12 21:19:48 +01:00
|
|
|
hybrid: _,
|
2024-04-11 19:04:43 +02:00
|
|
|
ranking_score_threshold: _,
|
2023-09-11 19:34:33 +02:00
|
|
|
} = query;
|
|
|
|
|
|
|
|
index_uid.as_str()
|
|
|
|
})
|
|
|
|
.collect();
|
2023-02-20 09:21:52 +01:00
|
|
|
|
2023-07-10 11:45:43 +02:00
|
|
|
let show_ranking_score = query.iter().any(|query| query.show_ranking_score);
|
|
|
|
let show_ranking_score_details = query.iter().any(|query| query.show_ranking_score_details);
|
|
|
|
|
2023-02-20 09:21:52 +01:00
|
|
|
Self {
|
|
|
|
timestamp,
|
|
|
|
total_received: 1,
|
|
|
|
total_succeeded: 0,
|
|
|
|
total_distinct_index_count: distinct_indexes.len(),
|
|
|
|
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
|
|
|
total_search_count: query.len(),
|
2023-07-10 11:45:43 +02:00
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
2023-02-20 09:21:52 +01:00
|
|
|
user_agents,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn succeed(&mut self) {
|
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
|
|
}
|
|
|
|
|
2023-09-11 19:34:33 +02:00
|
|
|
/// Aggregate one [MultiSearchAggregator] into another.
|
2023-02-20 09:21:52 +01:00
|
|
|
pub fn aggregate(&mut self, other: Self) {
|
|
|
|
// write the aggregate in a way that will cause a compilation error if a field is added.
|
|
|
|
|
|
|
|
// get ownership of self, replacing it by a default value.
|
|
|
|
let this = std::mem::take(self);
|
|
|
|
|
|
|
|
let timestamp = this.timestamp.or(other.timestamp);
|
|
|
|
let total_received = this.total_received.saturating_add(other.total_received);
|
|
|
|
let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
|
|
|
|
let total_distinct_index_count =
|
|
|
|
this.total_distinct_index_count.saturating_add(other.total_distinct_index_count);
|
|
|
|
let total_single_index = this.total_single_index.saturating_add(other.total_single_index);
|
|
|
|
let total_search_count = this.total_search_count.saturating_add(other.total_search_count);
|
2023-07-10 11:45:43 +02:00
|
|
|
let show_ranking_score = this.show_ranking_score || other.show_ranking_score;
|
|
|
|
let show_ranking_score_details =
|
|
|
|
this.show_ranking_score_details || other.show_ranking_score_details;
|
2023-02-20 09:21:52 +01:00
|
|
|
let mut user_agents = this.user_agents;
|
|
|
|
|
|
|
|
for user_agent in other.user_agents.into_iter() {
|
|
|
|
user_agents.insert(user_agent);
|
|
|
|
}
|
|
|
|
|
|
|
|
// need all fields or compile error
|
|
|
|
let mut aggregated = Self {
|
|
|
|
timestamp,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
total_distinct_index_count,
|
|
|
|
total_single_index,
|
|
|
|
total_search_count,
|
|
|
|
user_agents,
|
2023-07-10 11:45:43 +02:00
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
2023-02-20 09:21:52 +01:00
|
|
|
// do not add _ or ..Default::default() here
|
|
|
|
};
|
|
|
|
|
|
|
|
// replace the default self with the aggregated value
|
|
|
|
std::mem::swap(self, &mut aggregated);
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
2023-09-11 19:34:33 +02:00
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
total_distinct_index_count,
|
|
|
|
total_single_index,
|
|
|
|
total_search_count,
|
|
|
|
user_agents,
|
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
|
|
|
} = self;
|
|
|
|
|
|
|
|
if total_received == 0 {
|
2023-02-20 09:21:52 +01:00
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let properties = json!({
|
2023-09-11 19:34:33 +02:00
|
|
|
"user-agent": user_agents,
|
2023-02-20 09:21:52 +01:00
|
|
|
"requests": {
|
2023-09-11 19:34:33 +02:00
|
|
|
"total_succeeded": total_succeeded,
|
|
|
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
|
|
"total_received": total_received,
|
2023-02-20 09:21:52 +01:00
|
|
|
},
|
|
|
|
"indexes": {
|
2023-09-11 19:34:33 +02:00
|
|
|
"total_single_index": total_single_index,
|
|
|
|
"total_distinct_index_count": total_distinct_index_count,
|
|
|
|
"avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
|
2023-02-20 09:21:52 +01:00
|
|
|
},
|
|
|
|
"searches": {
|
2023-09-11 19:34:33 +02:00
|
|
|
"total_search_count": total_search_count,
|
|
|
|
"avg_search_count": (total_search_count as f64) / (total_received as f64),
|
2023-07-10 11:45:43 +02:00
|
|
|
},
|
|
|
|
"scoring": {
|
2023-09-11 19:34:33 +02:00
|
|
|
"show_ranking_score": show_ranking_score,
|
|
|
|
"show_ranking_score_details": show_ranking_score_details,
|
2023-02-20 09:21:52 +01:00
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
Some(Track {
|
2023-09-21 17:41:12 +02:00
|
|
|
timestamp,
|
2023-02-20 09:21:52 +01:00
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
2023-04-26 17:08:55 +02:00
|
|
|
properties,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Default)]
|
|
|
|
pub struct FacetSearchAggregator {
|
|
|
|
timestamp: Option<OffsetDateTime>,
|
|
|
|
|
|
|
|
// context
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
|
|
|
// requests
|
|
|
|
total_received: usize,
|
|
|
|
total_succeeded: usize,
|
|
|
|
time_spent: BinaryHeap<usize>,
|
|
|
|
|
|
|
|
// The set of all facetNames that were used
|
|
|
|
facet_names: HashSet<String>,
|
|
|
|
|
|
|
|
// As there been any other parameter than the facetName or facetQuery ones?
|
|
|
|
additional_search_parameters_provided: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl FacetSearchAggregator {
|
2023-09-21 17:41:12 +02:00
|
|
|
#[allow(clippy::field_reassign_with_default)]
|
2023-04-26 17:08:55 +02:00
|
|
|
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
|
2023-06-28 16:41:56 +02:00
|
|
|
let FacetSearchQuery {
|
|
|
|
facet_query: _,
|
|
|
|
facet_name,
|
|
|
|
vector,
|
|
|
|
q,
|
|
|
|
filter,
|
|
|
|
matching_strategy,
|
|
|
|
attributes_to_search_on,
|
2023-12-12 21:19:48 +01:00
|
|
|
hybrid,
|
2024-04-11 19:04:43 +02:00
|
|
|
ranking_score_threshold,
|
2023-06-28 16:41:56 +02:00
|
|
|
} = query;
|
2023-04-26 17:08:55 +02:00
|
|
|
|
|
|
|
let mut ret = Self::default();
|
|
|
|
ret.timestamp = Some(OffsetDateTime::now_utc());
|
|
|
|
|
|
|
|
ret.total_received = 1;
|
|
|
|
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
|
|
|
ret.facet_names = Some(facet_name.clone()).into_iter().collect();
|
|
|
|
|
2023-06-28 15:43:38 +02:00
|
|
|
ret.additional_search_parameters_provided = q.is_some()
|
|
|
|
|| vector.is_some()
|
|
|
|
|| filter.is_some()
|
|
|
|
|| *matching_strategy != MatchingStrategy::default()
|
2023-12-12 21:19:48 +01:00
|
|
|
|| attributes_to_search_on.is_some()
|
2024-05-30 11:22:12 +02:00
|
|
|
|| hybrid.is_some()
|
|
|
|
|| ranking_score_threshold.is_some();
|
2023-04-26 17:08:55 +02:00
|
|
|
|
|
|
|
ret
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn succeed(&mut self, result: &FacetSearchResult) {
|
2023-09-11 19:51:14 +02:00
|
|
|
let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result;
|
2023-04-26 17:08:55 +02:00
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
2023-09-11 19:51:14 +02:00
|
|
|
self.time_spent.push(*processing_time_ms as usize);
|
2023-04-26 17:08:55 +02:00
|
|
|
}
|
|
|
|
|
2023-09-11 19:51:14 +02:00
|
|
|
/// Aggregate one [FacetSearchAggregator] into another.
|
2023-04-26 17:08:55 +02:00
|
|
|
pub fn aggregate(&mut self, mut other: Self) {
|
2023-09-11 19:51:14 +02:00
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
ref mut time_spent,
|
|
|
|
facet_names,
|
|
|
|
additional_search_parameters_provided,
|
|
|
|
} = other;
|
|
|
|
|
2023-04-26 17:08:55 +02:00
|
|
|
if self.timestamp.is_none() {
|
2023-09-11 19:51:14 +02:00
|
|
|
self.timestamp = timestamp;
|
2023-04-26 17:08:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// context
|
2023-09-11 19:51:14 +02:00
|
|
|
for user_agent in user_agents.into_iter() {
|
2023-04-26 17:08:55 +02:00
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
|
|
|
|
|
|
|
// request
|
2023-09-11 19:51:14 +02:00
|
|
|
self.total_received = self.total_received.saturating_add(total_received);
|
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
|
|
|
self.time_spent.append(time_spent);
|
2023-04-26 17:08:55 +02:00
|
|
|
|
|
|
|
// facet_names
|
2023-09-11 19:51:14 +02:00
|
|
|
for facet_name in facet_names.into_iter() {
|
2023-04-26 17:08:55 +02:00
|
|
|
self.facet_names.insert(facet_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
// additional_search_parameters_provided
|
2023-09-21 17:41:12 +02:00
|
|
|
self.additional_search_parameters_provided |= additional_search_parameters_provided;
|
2023-04-26 17:08:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
2023-09-11 19:51:14 +02:00
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
time_spent,
|
|
|
|
facet_names,
|
|
|
|
additional_search_parameters_provided,
|
|
|
|
} = self;
|
|
|
|
|
|
|
|
if total_received == 0 {
|
2023-04-26 17:08:55 +02:00
|
|
|
None
|
|
|
|
} else {
|
|
|
|
// the index of the 99th percentage of value
|
2023-09-11 19:51:14 +02:00
|
|
|
let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.;
|
2023-04-26 17:08:55 +02:00
|
|
|
// we get all the values in a sorted manner
|
2023-09-11 19:51:14 +02:00
|
|
|
let time_spent = time_spent.into_sorted_vec();
|
2023-04-26 17:08:55 +02:00
|
|
|
// We are only interested by the slowest value of the 99th fastest results
|
|
|
|
let time_spent = time_spent.get(percentile_99th as usize);
|
|
|
|
|
|
|
|
let properties = json!({
|
2023-09-11 19:51:14 +02:00
|
|
|
"user-agent": user_agents,
|
2023-04-26 17:08:55 +02:00
|
|
|
"requests": {
|
|
|
|
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
2023-09-11 19:51:14 +02:00
|
|
|
"total_succeeded": total_succeeded,
|
|
|
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
|
|
"total_received": total_received,
|
2023-04-26 17:08:55 +02:00
|
|
|
},
|
|
|
|
"facets": {
|
2023-09-11 19:51:14 +02:00
|
|
|
"total_distinct_facet_count": facet_names.len(),
|
|
|
|
"additional_search_parameters_provided": additional_search_parameters_provided,
|
2023-04-26 17:08:55 +02:00
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
Some(Track {
|
2023-09-21 17:41:12 +02:00
|
|
|
timestamp,
|
2023-04-26 17:08:55 +02:00
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
2023-02-20 09:21:52 +01:00
|
|
|
properties,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
#[derive(Default)]
|
2021-10-28 16:28:41 +02:00
|
|
|
pub struct DocumentsAggregator {
|
2022-02-14 15:32:41 +01:00
|
|
|
timestamp: Option<OffsetDateTime>,
|
2022-01-20 19:01:37 +01:00
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
// set to true when at least one request was received
|
|
|
|
updated: bool,
|
|
|
|
|
|
|
|
// context
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
|
|
|
content_types: HashSet<String>,
|
|
|
|
primary_keys: HashSet<String>,
|
|
|
|
index_creation: bool,
|
|
|
|
}
|
|
|
|
|
2021-10-28 16:28:41 +02:00
|
|
|
impl DocumentsAggregator {
|
|
|
|
pub fn from_query(
|
|
|
|
documents_query: &UpdateDocumentsQuery,
|
|
|
|
index_creation: bool,
|
|
|
|
request: &HttpRequest,
|
|
|
|
) -> Self {
|
2023-09-11 19:59:52 +02:00
|
|
|
let UpdateDocumentsQuery { primary_key, csv_delimiter: _ } = documents_query;
|
|
|
|
|
2023-09-21 17:41:12 +02:00
|
|
|
let mut primary_keys = HashSet::new();
|
2023-09-11 19:59:52 +02:00
|
|
|
if let Some(primary_key) = primary_key.clone() {
|
2023-09-21 17:41:12 +02:00
|
|
|
primary_keys.insert(primary_key);
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
2023-09-11 19:59:52 +02:00
|
|
|
|
2023-09-21 17:41:12 +02:00
|
|
|
let mut content_types = HashSet::new();
|
2021-10-28 16:28:41 +02:00
|
|
|
let content_type = request
|
|
|
|
.headers()
|
|
|
|
.get(CONTENT_TYPE)
|
2022-07-07 10:56:02 +02:00
|
|
|
.and_then(|s| s.to_str().ok())
|
2022-06-11 17:21:05 +02:00
|
|
|
.unwrap_or("unknown")
|
2021-10-28 16:28:41 +02:00
|
|
|
.to_string();
|
2023-09-21 17:41:12 +02:00
|
|
|
content_types.insert(content_type);
|
2021-10-28 16:28:41 +02:00
|
|
|
|
2023-09-21 17:41:12 +02:00
|
|
|
Self {
|
|
|
|
timestamp: Some(OffsetDateTime::now_utc()),
|
|
|
|
updated: true,
|
|
|
|
user_agents: extract_user_agents(request).into_iter().collect(),
|
|
|
|
content_types,
|
|
|
|
primary_keys,
|
|
|
|
index_creation,
|
|
|
|
}
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Aggregate one [DocumentsAggregator] into another.
|
|
|
|
pub fn aggregate(&mut self, other: Self) {
|
2023-09-11 19:59:52 +02:00
|
|
|
let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } =
|
|
|
|
other;
|
|
|
|
|
2022-01-20 19:01:37 +01:00
|
|
|
if self.timestamp.is_none() {
|
2023-09-11 19:59:52 +02:00
|
|
|
self.timestamp = timestamp;
|
2022-01-20 19:01:37 +01:00
|
|
|
}
|
|
|
|
|
2023-09-11 19:59:52 +02:00
|
|
|
self.updated |= updated;
|
2021-10-28 16:28:41 +02:00
|
|
|
// we can't create a union because there is no `into_union` method
|
2023-09-11 19:59:52 +02:00
|
|
|
for user_agent in user_agents {
|
2021-10-28 16:28:41 +02:00
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
2023-09-11 19:59:52 +02:00
|
|
|
for primary_key in primary_keys {
|
2021-10-28 16:28:41 +02:00
|
|
|
self.primary_keys.insert(primary_key);
|
|
|
|
}
|
2023-09-11 19:59:52 +02:00
|
|
|
for content_type in content_types {
|
2021-10-28 16:28:41 +02:00
|
|
|
self.content_types.insert(content_type);
|
|
|
|
}
|
2023-09-11 19:59:52 +02:00
|
|
|
self.index_creation |= index_creation;
|
2021-10-28 16:28:41 +02:00
|
|
|
}
|
|
|
|
|
2021-10-27 18:16:13 +02:00
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
2023-09-11 19:59:52 +02:00
|
|
|
let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } =
|
|
|
|
self;
|
|
|
|
|
|
|
|
if !updated {
|
2021-10-27 18:16:13 +02:00
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let properties = json!({
|
2023-09-11 19:59:52 +02:00
|
|
|
"user-agent": user_agents,
|
|
|
|
"payload_type": content_types,
|
|
|
|
"primary_key": primary_keys,
|
|
|
|
"index_creation": index_creation,
|
2021-10-27 18:16:13 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
Some(Track {
|
2023-09-21 17:41:12 +02:00
|
|
|
timestamp,
|
2021-10-27 18:16:13 +02:00
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
|
|
|
properties,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-11-28 16:27:41 +01:00
|
|
|
|
|
|
|
#[derive(Default, Serialize)]
|
|
|
|
pub struct DocumentsDeletionAggregator {
|
|
|
|
#[serde(skip)]
|
|
|
|
timestamp: Option<OffsetDateTime>,
|
|
|
|
|
|
|
|
// context
|
|
|
|
#[serde(rename = "user-agent")]
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
2023-06-06 12:58:50 +02:00
|
|
|
#[serde(rename = "requests.total_received")]
|
2022-11-28 16:27:41 +01:00
|
|
|
total_received: usize,
|
|
|
|
per_document_id: bool,
|
|
|
|
clear_all: bool,
|
|
|
|
per_batch: bool,
|
2023-05-02 21:32:56 +02:00
|
|
|
per_filter: bool,
|
2022-11-28 16:27:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl DocumentsDeletionAggregator {
|
|
|
|
pub fn from_query(kind: DocumentDeletionKind, request: &HttpRequest) -> Self {
|
2023-09-21 17:41:12 +02:00
|
|
|
Self {
|
|
|
|
timestamp: Some(OffsetDateTime::now_utc()),
|
|
|
|
user_agents: extract_user_agents(request).into_iter().collect(),
|
|
|
|
total_received: 1,
|
|
|
|
per_document_id: matches!(kind, DocumentDeletionKind::PerDocumentId),
|
|
|
|
clear_all: matches!(kind, DocumentDeletionKind::ClearAll),
|
|
|
|
per_batch: matches!(kind, DocumentDeletionKind::PerBatch),
|
|
|
|
per_filter: matches!(kind, DocumentDeletionKind::PerFilter),
|
2022-11-28 16:27:41 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Aggregate one [DocumentsAggregator] into another.
|
|
|
|
pub fn aggregate(&mut self, other: Self) {
|
2023-09-12 06:52:49 +02:00
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
per_document_id,
|
|
|
|
clear_all,
|
|
|
|
per_batch,
|
|
|
|
per_filter,
|
|
|
|
} = other;
|
|
|
|
|
2022-11-28 16:27:41 +01:00
|
|
|
if self.timestamp.is_none() {
|
2023-09-12 06:52:49 +02:00
|
|
|
self.timestamp = timestamp;
|
2022-11-28 16:27:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// we can't create a union because there is no `into_union` method
|
2023-09-12 06:52:49 +02:00
|
|
|
for user_agent in user_agents {
|
2022-11-28 16:27:41 +01:00
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
2023-09-12 06:52:49 +02:00
|
|
|
self.total_received = self.total_received.saturating_add(total_received);
|
|
|
|
self.per_document_id |= per_document_id;
|
|
|
|
self.clear_all |= clear_all;
|
|
|
|
self.per_batch |= per_batch;
|
|
|
|
self.per_filter |= per_filter;
|
2022-11-28 16:27:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
|
|
|
// if we had no timestamp it means we never encountered any events and
|
|
|
|
// thus we don't need to send this event.
|
|
|
|
let timestamp = self.timestamp?;
|
|
|
|
|
|
|
|
Some(Track {
|
|
|
|
timestamp: Some(timestamp),
|
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
|
|
|
properties: serde_json::to_value(self).ok()?,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2023-05-09 19:52:11 +02:00
|
|
|
|
|
|
|
#[derive(Default, Serialize)]
|
|
|
|
pub struct DocumentsFetchAggregator {
|
|
|
|
#[serde(skip)]
|
|
|
|
timestamp: Option<OffsetDateTime>,
|
|
|
|
|
|
|
|
// context
|
|
|
|
#[serde(rename = "user-agent")]
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
2023-06-06 12:58:50 +02:00
|
|
|
#[serde(rename = "requests.total_received")]
|
2023-05-09 19:52:11 +02:00
|
|
|
total_received: usize,
|
|
|
|
|
|
|
|
// a call on ../documents/:doc_id
|
|
|
|
per_document_id: bool,
|
|
|
|
// if a filter was used
|
|
|
|
per_filter: bool,
|
|
|
|
|
|
|
|
// pagination
|
2023-05-10 14:02:42 +02:00
|
|
|
#[serde(rename = "pagination.max_limit")]
|
2023-05-09 19:52:11 +02:00
|
|
|
max_limit: usize,
|
2023-05-10 14:02:42 +02:00
|
|
|
#[serde(rename = "pagination.max_offset")]
|
2023-05-09 19:52:11 +02:00
|
|
|
max_offset: usize,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl DocumentsFetchAggregator {
|
|
|
|
pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
|
|
|
|
let (limit, offset) = match query {
|
|
|
|
DocumentFetchKind::PerDocumentId => (1, 0),
|
|
|
|
DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
|
|
|
|
};
|
|
|
|
Self {
|
|
|
|
timestamp: Some(OffsetDateTime::now_utc()),
|
|
|
|
user_agents: extract_user_agents(request).into_iter().collect(),
|
|
|
|
total_received: 1,
|
|
|
|
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
|
|
|
|
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
|
|
|
|
max_limit: limit,
|
|
|
|
max_offset: offset,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Aggregate one [DocumentsFetchAggregator] into another.
|
|
|
|
pub fn aggregate(&mut self, other: Self) {
|
2023-09-12 07:09:01 +02:00
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
per_document_id,
|
|
|
|
per_filter,
|
|
|
|
max_limit,
|
|
|
|
max_offset,
|
|
|
|
} = other;
|
|
|
|
|
2023-05-09 19:52:11 +02:00
|
|
|
if self.timestamp.is_none() {
|
2023-09-12 07:09:01 +02:00
|
|
|
self.timestamp = timestamp;
|
2023-05-09 19:52:11 +02:00
|
|
|
}
|
2023-09-12 07:09:01 +02:00
|
|
|
for user_agent in user_agents {
|
2023-05-09 19:52:11 +02:00
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
|
|
|
|
2023-09-12 07:09:01 +02:00
|
|
|
self.total_received = self.total_received.saturating_add(total_received);
|
|
|
|
self.per_document_id |= per_document_id;
|
|
|
|
self.per_filter |= per_filter;
|
2023-05-09 19:52:11 +02:00
|
|
|
|
2023-09-12 07:09:01 +02:00
|
|
|
self.max_limit = self.max_limit.max(max_limit);
|
|
|
|
self.max_offset = self.max_offset.max(max_offset);
|
2023-05-09 19:52:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
|
|
|
// if we had no timestamp it means we never encountered any events and
|
|
|
|
// thus we don't need to send this event.
|
|
|
|
let timestamp = self.timestamp?;
|
|
|
|
|
|
|
|
Some(Track {
|
|
|
|
timestamp: Some(timestamp),
|
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
|
|
|
properties: serde_json::to_value(self).ok()?,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2024-05-27 10:54:12 +02:00
|
|
|
|
|
|
|
#[derive(Default)]
|
|
|
|
pub struct SimilarAggregator {
|
|
|
|
timestamp: Option<OffsetDateTime>,
|
|
|
|
|
|
|
|
// context
|
|
|
|
user_agents: HashSet<String>,
|
|
|
|
|
|
|
|
// requests
|
|
|
|
total_received: usize,
|
|
|
|
total_succeeded: usize,
|
|
|
|
time_spent: BinaryHeap<usize>,
|
|
|
|
|
|
|
|
// filter
|
|
|
|
filter_with_geo_radius: bool,
|
|
|
|
filter_with_geo_bounding_box: bool,
|
|
|
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
|
|
|
filter_sum_of_criteria_terms: usize,
|
|
|
|
// every time a request has a filter, this field must be incremented by one
|
|
|
|
filter_total_number_of_criteria: usize,
|
|
|
|
used_syntax: HashMap<String, usize>,
|
|
|
|
|
|
|
|
// Whether a non-default embedder was specified
|
|
|
|
embedder: bool,
|
|
|
|
|
|
|
|
// pagination
|
|
|
|
max_limit: usize,
|
|
|
|
max_offset: usize,
|
|
|
|
|
|
|
|
// formatting
|
|
|
|
max_attributes_to_retrieve: usize,
|
|
|
|
|
|
|
|
// scoring
|
|
|
|
show_ranking_score: bool,
|
|
|
|
show_ranking_score_details: bool,
|
2024-05-30 11:22:12 +02:00
|
|
|
ranking_score_threshold: bool,
|
2024-05-27 10:54:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl SimilarAggregator {
|
|
|
|
#[allow(clippy::field_reassign_with_default)]
|
|
|
|
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
|
|
|
|
let SimilarQuery {
|
|
|
|
id: _,
|
|
|
|
embedder,
|
|
|
|
offset,
|
|
|
|
limit,
|
|
|
|
attributes_to_retrieve: _,
|
2024-05-29 17:22:58 +02:00
|
|
|
retrieve_vectors: _,
|
2024-05-27 10:54:12 +02:00
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
|
|
|
filter,
|
2024-05-30 11:22:12 +02:00
|
|
|
ranking_score_threshold,
|
2024-05-27 10:54:12 +02:00
|
|
|
} = query;
|
|
|
|
|
|
|
|
let mut ret = Self::default();
|
|
|
|
ret.timestamp = Some(OffsetDateTime::now_utc());
|
|
|
|
|
|
|
|
ret.total_received = 1;
|
|
|
|
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
|
|
|
|
|
|
|
if let Some(ref filter) = filter {
|
|
|
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
|
|
|
ret.filter_total_number_of_criteria = 1;
|
|
|
|
|
|
|
|
let syntax = match filter {
|
|
|
|
Value::String(_) => "string".to_string(),
|
|
|
|
Value::Array(values) => {
|
|
|
|
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
|
|
|
"mixed".to_string()
|
|
|
|
} else {
|
|
|
|
"array".to_string()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => "none".to_string(),
|
|
|
|
};
|
|
|
|
// convert the string to a HashMap
|
|
|
|
ret.used_syntax.insert(syntax, 1);
|
|
|
|
|
|
|
|
let stringified_filters = filter.to_string();
|
|
|
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
|
|
|
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
|
|
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
|
|
|
}
|
|
|
|
|
|
|
|
ret.max_limit = *limit;
|
|
|
|
ret.max_offset = *offset;
|
|
|
|
|
|
|
|
ret.show_ranking_score = *show_ranking_score;
|
|
|
|
ret.show_ranking_score_details = *show_ranking_score_details;
|
2024-05-30 11:22:12 +02:00
|
|
|
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
2024-05-27 10:54:12 +02:00
|
|
|
|
|
|
|
ret.embedder = embedder.is_some();
|
|
|
|
|
|
|
|
ret
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn succeed(&mut self, result: &SimilarResult) {
|
|
|
|
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
|
|
|
|
|
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
|
|
|
|
|
|
self.time_spent.push(*processing_time_ms as usize);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Aggregate one [SimilarAggregator] into another.
|
|
|
|
pub fn aggregate(&mut self, mut other: Self) {
|
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
ref mut time_spent,
|
|
|
|
filter_with_geo_radius,
|
|
|
|
filter_with_geo_bounding_box,
|
|
|
|
filter_sum_of_criteria_terms,
|
|
|
|
filter_total_number_of_criteria,
|
|
|
|
used_syntax,
|
|
|
|
max_limit,
|
|
|
|
max_offset,
|
|
|
|
max_attributes_to_retrieve,
|
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
|
|
|
embedder,
|
2024-05-30 11:22:12 +02:00
|
|
|
ranking_score_threshold,
|
2024-05-27 10:54:12 +02:00
|
|
|
} = other;
|
|
|
|
|
|
|
|
if self.timestamp.is_none() {
|
|
|
|
self.timestamp = timestamp;
|
|
|
|
}
|
|
|
|
|
|
|
|
// context
|
|
|
|
for user_agent in user_agents.into_iter() {
|
|
|
|
self.user_agents.insert(user_agent);
|
|
|
|
}
|
|
|
|
|
|
|
|
// request
|
|
|
|
self.total_received = self.total_received.saturating_add(total_received);
|
|
|
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
|
|
|
self.time_spent.append(time_spent);
|
|
|
|
|
|
|
|
// filter
|
|
|
|
self.filter_with_geo_radius |= filter_with_geo_radius;
|
|
|
|
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
|
|
|
self.filter_sum_of_criteria_terms =
|
|
|
|
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
|
|
|
self.filter_total_number_of_criteria =
|
|
|
|
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
|
|
|
for (key, value) in used_syntax.into_iter() {
|
|
|
|
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
|
|
|
*used_syntax = used_syntax.saturating_add(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
self.embedder |= embedder;
|
|
|
|
|
|
|
|
// pagination
|
|
|
|
self.max_limit = self.max_limit.max(max_limit);
|
|
|
|
self.max_offset = self.max_offset.max(max_offset);
|
|
|
|
|
|
|
|
// formatting
|
|
|
|
self.max_attributes_to_retrieve =
|
|
|
|
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
|
|
|
|
|
|
|
// scoring
|
|
|
|
self.show_ranking_score |= show_ranking_score;
|
|
|
|
self.show_ranking_score_details |= show_ranking_score_details;
|
2024-05-30 11:22:12 +02:00
|
|
|
self.ranking_score_threshold |= ranking_score_threshold;
|
2024-05-27 10:54:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
|
|
|
let Self {
|
|
|
|
timestamp,
|
|
|
|
user_agents,
|
|
|
|
total_received,
|
|
|
|
total_succeeded,
|
|
|
|
time_spent,
|
|
|
|
filter_with_geo_radius,
|
|
|
|
filter_with_geo_bounding_box,
|
|
|
|
filter_sum_of_criteria_terms,
|
|
|
|
filter_total_number_of_criteria,
|
|
|
|
used_syntax,
|
|
|
|
max_limit,
|
|
|
|
max_offset,
|
|
|
|
max_attributes_to_retrieve,
|
|
|
|
show_ranking_score,
|
|
|
|
show_ranking_score_details,
|
|
|
|
embedder,
|
2024-05-30 11:22:12 +02:00
|
|
|
ranking_score_threshold,
|
2024-05-27 10:54:12 +02:00
|
|
|
} = self;
|
|
|
|
|
|
|
|
if total_received == 0 {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
// we get all the values in a sorted manner
|
|
|
|
let time_spent = time_spent.into_sorted_vec();
|
|
|
|
// the index of the 99th percentage of value
|
|
|
|
let percentile_99th = time_spent.len() * 99 / 100;
|
|
|
|
// We are only interested by the slowest value of the 99th fastest results
|
|
|
|
let time_spent = time_spent.get(percentile_99th);
|
|
|
|
|
|
|
|
let properties = json!({
|
|
|
|
"user-agent": user_agents,
|
|
|
|
"requests": {
|
|
|
|
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
|
|
|
"total_succeeded": total_succeeded,
|
|
|
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
|
|
"total_received": total_received,
|
|
|
|
},
|
|
|
|
"filter": {
|
|
|
|
"with_geoRadius": filter_with_geo_radius,
|
|
|
|
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
|
|
|
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
|
|
|
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
|
|
},
|
|
|
|
"hybrid": {
|
|
|
|
"embedder": embedder,
|
|
|
|
},
|
|
|
|
"pagination": {
|
|
|
|
"max_limit": max_limit,
|
|
|
|
"max_offset": max_offset,
|
|
|
|
},
|
|
|
|
"formatting": {
|
|
|
|
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
|
|
|
},
|
|
|
|
"scoring": {
|
|
|
|
"show_ranking_score": show_ranking_score,
|
|
|
|
"show_ranking_score_details": show_ranking_score_details,
|
2024-05-30 11:22:12 +02:00
|
|
|
"ranking_score_threshold": ranking_score_threshold,
|
2024-05-27 10:54:12 +02:00
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
Some(Track {
|
|
|
|
timestamp,
|
|
|
|
user: user.clone(),
|
|
|
|
event: event_name.to_string(),
|
|
|
|
properties,
|
|
|
|
..Default::default()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|