From 73e87c152a4bd35fd4309141615676210c6b279c Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 16 Oct 2024 15:43:27 +0200 Subject: [PATCH 01/22] rewrite most of the analytics especially the settings --- meilisearch/src/analytics/mock_analytics.rs | 109 -- meilisearch/src/analytics/mod.rs | 179 ++-- .../src/analytics/segment_analytics.rs | 211 ++-- meilisearch/src/lib.rs | 4 +- meilisearch/src/routes/dump.rs | 7 +- meilisearch/src/routes/features.rs | 58 +- meilisearch/src/routes/indexes/documents.rs | 318 +++++- .../src/routes/indexes/facet_search.rs | 112 +- meilisearch/src/routes/indexes/mod.rs | 53 +- meilisearch/src/routes/indexes/search.rs | 13 +- meilisearch/src/routes/indexes/settings.rs | 962 +++++++++++++----- meilisearch/src/routes/swap_indexes.rs | 2 +- 12 files changed, 1381 insertions(+), 647 deletions(-) delete mode 100644 meilisearch/src/analytics/mock_analytics.rs diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs deleted file mode 100644 index 54b8d4f1b..000000000 --- a/meilisearch/src/analytics/mock_analytics.rs +++ /dev/null @@ -1,109 +0,0 @@ -use std::any::Any; -use std::sync::Arc; - -use actix_web::HttpRequest; -use meilisearch_types::InstanceUid; -use serde_json::Value; - -use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind}; -use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; -use crate::Opt; - -pub struct MockAnalytics { - instance_uid: Option, -} - -#[derive(Default)] -pub struct SearchAggregator; - -#[allow(dead_code)] -impl SearchAggregator { - pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self, _: &dyn Any) {} -} - -#[derive(Default)] -pub struct SimilarAggregator; - -#[allow(dead_code)] -impl SimilarAggregator { - pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self, _: &dyn Any) {} -} - -#[derive(Default)] -pub struct MultiSearchAggregator; - -#[allow(dead_code)] -impl MultiSearchAggregator { - pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self) {} -} - -#[derive(Default)] -pub struct FacetSearchAggregator; - -#[allow(dead_code)] -impl FacetSearchAggregator { - pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self, _: &dyn Any) {} -} - -impl MockAnalytics { - #[allow(clippy::new_ret_no_self)] - pub fn new(opt: &Opt) -> Arc { - let instance_uid = find_user_id(&opt.db_path); - Arc::new(Self { instance_uid }) - } -} - -impl Analytics for MockAnalytics { - fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> { - self.instance_uid.as_ref() - } - - // These methods are noop and should be optimized out - fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} - fn get_search(&self, _aggregate: super::SearchAggregator) {} - fn post_search(&self, _aggregate: super::SearchAggregator) {} - fn get_similar(&self, _aggregate: super::SimilarAggregator) {} - fn post_similar(&self, _aggregate: super::SimilarAggregator) {} - fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {} - fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {} - fn add_documents( - &self, - _documents_query: &UpdateDocumentsQuery, - _index_creation: bool, - _request: &HttpRequest, - ) { - } - fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {} - fn update_documents( - &self, - _documents_query: &UpdateDocumentsQuery, - _index_creation: bool, - _request: &HttpRequest, - ) { - } - fn update_documents_by_function( - &self, - _documents_query: &DocumentEditionByFunction, - _index_creation: bool, - _request: &HttpRequest, - ) { - } - fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} - fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} -} diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 3c7ca0ed3..a8658d830 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -1,45 +1,51 @@ -mod mock_analytics; -#[cfg(feature = "analytics")] -mod segment_analytics; +pub mod segment_analytics; +use std::any::TypeId; +use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; use std::str::FromStr; use actix_web::HttpRequest; use meilisearch_types::InstanceUid; -pub use mock_analytics::MockAnalytics; use once_cell::sync::Lazy; use platform_dirs::AppDirs; -use serde_json::Value; - -use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; - -// if the analytics feature is disabled -// the `SegmentAnalytics` point to the mock instead of the real analytics -#[cfg(not(feature = "analytics"))] -pub type SegmentAnalytics = mock_analytics::MockAnalytics; -#[cfg(not(feature = "analytics"))] -pub type SearchAggregator = mock_analytics::SearchAggregator; -#[cfg(not(feature = "analytics"))] -pub type SimilarAggregator = mock_analytics::SimilarAggregator; -#[cfg(not(feature = "analytics"))] -pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator; -#[cfg(not(feature = "analytics"))] -pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator; +use segment::message::User; +use serde::Serialize; // if the feature analytics is enabled we use the real analytics -#[cfg(feature = "analytics")] pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; -#[cfg(feature = "analytics")] -pub type SearchAggregator = segment_analytics::SearchAggregator; -#[cfg(feature = "analytics")] +pub use segment_analytics::SearchAggregator; pub type SimilarAggregator = segment_analytics::SimilarAggregator; -#[cfg(feature = "analytics")] pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; -#[cfg(feature = "analytics")] pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; +/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name. +#[macro_export] +macro_rules! empty_analytics { + ($struct_name:ident, $event_name:literal) => { + #[derive(Default)] + struct $struct_name {} + + impl $crate::analytics::Aggregate for $struct_name { + fn event_name(&self) -> &'static str { + $event_name + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + self + } + + fn into_event(self) -> serde_json::Value { + serde_json::json!({}) + } + } + }; +} + /// The Meilisearch config dir: /// `~/.config/Meilisearch` on *NIX or *BSD. /// `~/Library/ApplicationSupport` on macOS. @@ -78,60 +84,73 @@ pub enum DocumentFetchKind { Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, } -pub trait Analytics: Sync + Send { - fn instance_uid(&self) -> Option<&InstanceUid>; +pub trait Aggregate { + fn event_name(&self) -> &'static str; + + fn aggregate(self, other: Self) -> Self + where + Self: Sized; + + fn into_event(self) -> impl Serialize + where + Self: Sized; +} + +/// Helper trait to define multiple aggregate with the same content but a different name. +/// Commonly used when you must aggregate a search with POST or with GET for example. +pub trait AggregateMethod { + fn event_name() -> &'static str; +} + +/// A macro used to quickly define multiple aggregate method with their name +#[macro_export] +macro_rules! aggregate_methods { + ($method:ident => $event_name:literal) => { + pub enum $method {} + + impl $crate::analytics::AggregateMethod for $method { + fn event_name() -> &'static str { + $event_name + } + } + }; + ($($method:ident => $event_name:literal,)+) => { + $( + aggregate_methods!($method => $event_name); + )+ + + }; +} + +pub struct Analytics { + // TODO: TAMO: remove + inner: Option, + + instance_uid: Option, + user: Option, + events: HashMap>, +} + +impl Analytics { + fn no_analytics() -> Self { + Self { inner: None, events: HashMap::new(), instance_uid: None, user: None } + } + + fn segment_analytics(segment: SegmentAnalytics) -> Self { + Self { + instance_uid: Some(segment.instance_uid), + user: Some(segment.user), + inner: Some(segment), + events: HashMap::new(), + } + } + + pub fn instance_uid(&self) -> Option<&InstanceUid> { + self.instance_uid + } /// The method used to publish most analytics that do not need to be batched every hours - fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); - - /// This method should be called to aggregate a get search - fn get_search(&self, aggregate: SearchAggregator); - - /// This method should be called to aggregate a post search - fn post_search(&self, aggregate: SearchAggregator); - - /// This method should be called to aggregate a get similar request - fn get_similar(&self, aggregate: SimilarAggregator); - - /// This method should be called to aggregate a post similar request - fn post_similar(&self, aggregate: SimilarAggregator); - - /// This method should be called to aggregate a post array of searches - fn post_multi_search(&self, aggregate: MultiSearchAggregator); - - /// This method should be called to aggregate post facet values searches - fn post_facet_search(&self, aggregate: FacetSearchAggregator); - - // this method should be called to aggregate an add documents request - fn add_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); - - // this method should be called to aggregate a fetch documents request - fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); - - // this method should be called to aggregate a fetch documents request - fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); - - // this method should be called to aggregate a add documents request - fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest); - - // this method should be called to batch an update documents request - fn update_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); - - // this method should be called to batch an update documents by function request - fn update_documents_by_function( - &self, - documents_query: &DocumentEditionByFunction, - index_creation: bool, - request: &HttpRequest, - ); + pub fn publish(&self, send: impl Aggregate, request: Option<&HttpRequest>) { + let Some(segment) = self.inner else { return }; + } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 476b3264e..8a6dfd780 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -25,7 +25,8 @@ use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; use super::{ - config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, + config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind, + MEILISEARCH_CONFIG_PATH, }; use crate::analytics::Analytics; use crate::option::{ @@ -40,7 +41,7 @@ use crate::search::{ DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO, }; -use crate::Opt; +use crate::{aggregate_methods, Opt}; const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; @@ -87,9 +88,9 @@ pub enum AnalyticsMsg { } pub struct SegmentAnalytics { - instance_uid: InstanceUid, + pub instance_uid: InstanceUid, sender: Sender, - user: User, + pub user: User, } impl SegmentAnalytics { @@ -98,7 +99,7 @@ impl SegmentAnalytics { opt: &Opt, index_scheduler: Arc, auth_controller: Arc, - ) -> Arc { + ) -> Arc { let instance_uid = super::find_user_id(&opt.db_path); let first_time_run = instance_uid.is_none(); let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4); @@ -108,7 +109,7 @@ impl SegmentAnalytics { // if reqwest throws an error we won't be able to send analytics if client.is_err() { - return super::MockAnalytics::new(opt); + return Arc::new(Analytics::no_analytics()); } let client = @@ -161,10 +162,11 @@ impl SegmentAnalytics { let this = Self { instance_uid, sender, user: user.clone() }; - Arc::new(this) + Arc::new(Analytics::segment_analytics(this)) } } +/* impl super::Analytics for SegmentAnalytics { fn instance_uid(&self) -> Option<&InstanceUid> { Some(&self.instance_uid) @@ -253,6 +255,7 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate)); } } +*/ /// This structure represent the `infos` field we send in the analytics. /// It's quite close to the `Opt` structure except all sensitive informations @@ -607,12 +610,7 @@ impl Segment { } #[derive(Default)] -pub struct SearchAggregator { - timestamp: Option, - - // context - user_agents: HashSet, - +pub struct SearchAggregator { // requests total_received: usize, total_succeeded: usize, @@ -684,9 +682,11 @@ pub struct SearchAggregator { show_ranking_score: bool, show_ranking_score_details: bool, ranking_score_threshold: bool, + + marker: std::marker::PhantomData, } -impl SearchAggregator { +impl SearchAggregator { #[allow(clippy::field_reassign_with_default)] pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self { let SearchQuery { @@ -827,12 +827,21 @@ impl SearchAggregator { } self.time_spent.push(*processing_time_ms as usize); } +} - /// Aggregate one [SearchAggregator] into another. - pub fn aggregate(&mut self, mut other: Self) { +aggregate_methods!( + SearchGET => "Documents Searched GET", + SearchPOST => "Documents Searched POST", + +); + +impl Aggregate for SearchAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(mut self, mut other: Self) -> Self { let Self { - timestamp, - user_agents, total_received, total_succeeded, ref mut time_spent, @@ -871,17 +880,9 @@ impl SearchAggregator { total_used_negative_operator, ranking_score_threshold, ref mut locales, + marker: _, } = other; - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // context - for user_agent in user_agents.into_iter() { - self.user_agents.insert(user_agent); - } - // request self.total_received = self.total_received.saturating_add(total_received); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); @@ -961,12 +962,12 @@ impl SearchAggregator { // locales self.locales.append(locales); + + self } - pub fn into_event(self, user: &User, event_name: &str) -> Option { + fn into_event(self) -> Option { let Self { - timestamp, - user_agents, total_received, total_succeeded, time_spent, @@ -1005,90 +1006,78 @@ impl SearchAggregator { total_used_negative_operator, ranking_score_threshold, locales, + marker: _, } = self; - if total_received == 0 { - None - } else { - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // the index of the 99th percentage of value - let percentile_99th = time_spent.len() * 99 / 100; - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th); + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // the index of the 99th percentage of value + let percentile_99th = time_spent.len() * 99 / 100; + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th); - let properties = json!({ - "user-agent": user_agents, - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - "total_degraded": total_degraded, - "total_used_negative_operator": total_used_negative_operator, - }, - "sort": { - "with_geoPoint": sort_with_geo_point, - "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), - }, - "distinct": distinct, - "filter": { - "with_geoRadius": filter_with_geo_radius, - "with_geoBoundingBox": filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), - "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "attributes_to_search_on": { - "total_number_of_uses": attributes_to_search_on_total_number_of_uses, - }, - "q": { - "max_terms_number": max_terms_number, - }, - "vector": { - "max_vector_size": max_vector_size, - "retrieve_vectors": retrieve_vectors, - }, - "hybrid": { - "enabled": hybrid, - "semantic_ratio": semantic_ratio, - }, - "pagination": { - "max_limit": max_limit, - "max_offset": max_offset, - "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, - }, - "formatting": { - "max_attributes_to_retrieve": max_attributes_to_retrieve, - "max_attributes_to_highlight": max_attributes_to_highlight, - "highlight_pre_tag": highlight_pre_tag, - "highlight_post_tag": highlight_post_tag, - "max_attributes_to_crop": max_attributes_to_crop, - "crop_marker": crop_marker, - "show_matches_position": show_matches_position, - "crop_length": crop_length, - }, - "facets": { - "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), - }, - "matching_strategy": { - "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "locales": locales, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - "ranking_score_threshold": ranking_score_threshold, - }, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } + json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + "total_degraded": total_degraded, + "total_used_negative_operator": total_used_negative_operator, + }, + "sort": { + "with_geoPoint": sort_with_geo_point, + "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), + }, + "distinct": distinct, + "filter": { + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "attributes_to_search_on": { + "total_number_of_uses": attributes_to_search_on_total_number_of_uses, + }, + "q": { + "max_terms_number": max_terms_number, + }, + "vector": { + "max_vector_size": max_vector_size, + "retrieve_vectors": retrieve_vectors, + }, + "hybrid": { + "enabled": hybrid, + "semantic_ratio": semantic_ratio, + }, + "pagination": { + "max_limit": max_limit, + "max_offset": max_offset, + "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, + }, + "formatting": { + "max_attributes_to_retrieve": max_attributes_to_retrieve, + "max_attributes_to_highlight": max_attributes_to_highlight, + "highlight_pre_tag": highlight_pre_tag, + "highlight_post_tag": highlight_post_tag, + "max_attributes_to_crop": max_attributes_to_crop, + "crop_marker": crop_marker, + "show_matches_position": show_matches_position, + "crop_length": crop_length, + }, + "facets": { + "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), + }, + "matching_strategy": { + "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "locales": locales, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + "ranking_score_threshold": ranking_score_threshold, + }, + }) } } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index b24f18fae..80177876a 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -120,7 +120,7 @@ pub fn create_app( search_queue: Data, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Arc, enable_dashboard: bool, ) -> actix_web::App< impl ServiceFactory< @@ -473,7 +473,7 @@ pub fn configure_data( search_queue: Data, opt: &Opt, (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; config diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 7f3cd06a5..0fdeef5ed 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -4,7 +4,6 @@ use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; -use serde_json::json; use tracing::debug; use crate::analytics::Analytics; @@ -18,14 +17,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); } +crate::empty_analytics!(DumpAnalytics, "Dump Created"); + pub async fn create_dump( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { - analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); + analytics.publish(DumpAnalytics::default(), Some(&req)); let task = KindWithContent::DumpCreation { keys: auth_controller.list_keys()?, diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index bc656bdbb..24c89938d 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -6,10 +6,11 @@ use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; +use serde::Serialize; use serde_json::json; use tracing::debug; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; @@ -22,17 +23,19 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } +crate::empty_analytics!(GetExperimentalFeatureAnalytics, "Experimental features Seen"); + async fn get_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>, Data, >, req: HttpRequest, - analytics: Data, + analytics: Data, ) -> HttpResponse { let features = index_scheduler.features(); - analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); + analytics.publish(GetExperimentalFeatureAnalytics::default(), Some(&req)); let features = features.runtime_features(); debug!(returns = ?features, "Get features"); HttpResponse::Ok().json(features) @@ -53,6 +56,38 @@ pub struct RuntimeTogglableFeatures { pub contains_filter: Option, } +#[derive(Serialize)] +pub struct PatchExperimentalFeatureAnalytics { + vector_store: bool, + metrics: bool, + logs_route: bool, + edit_documents_by_function: bool, + contains_filter: bool, +} + +impl Aggregate for PatchExperimentalFeatureAnalytics { + fn event_name(&self) -> &'static str { + "Experimental features Updated" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + vector_store: other.vector_store, + metrics: other.metrics, + logs_route: other.logs_route, + edit_documents_by_function: other.edit_documents_by_function, + contains_filter: other.contains_filter, + } + } + + fn into_event(self) -> serde_json::Value { + serde_json::to_value(self).unwrap() + } +} + async fn patch_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>, @@ -60,7 +95,7 @@ async fn patch_features( >, new_features: AwebJson, req: HttpRequest, - analytics: Data, + analytics: Data, ) -> Result { let features = index_scheduler.features(); debug!(parameters = ?new_features, "Patch features"); @@ -89,14 +124,13 @@ async fn patch_features( } = new_features; analytics.publish( - "Experimental features Updated".to_string(), - json!({ - "vector_store": vector_store, - "metrics": metrics, - "logs_route": logs_route, - "edit_documents_by_function": edit_documents_by_function, - "contains_filter": contains_filter, - }), + PatchExperimentalFeatureAnalytics { + vector_store, + metrics, + logs_route, + edit_documents_by_function, + contains_filter, + }, Some(&req), ); index_scheduler.put_runtime_features(new_features)?; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 85cf33c54..8f4cd026d 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -1,4 +1,6 @@ +use std::collections::HashSet; use std::io::ErrorKind; +use std::marker::PhantomData; use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; @@ -23,14 +25,14 @@ use meilisearch_types::tasks::KindWithContent; use meilisearch_types::{milli, Document, Index}; use mime::Mime; use once_cell::sync::Lazy; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tracing::debug; -use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; +use crate::analytics::{Aggregate, AggregateMethod, Analytics, DocumentDeletionKind}; use crate::error::MeilisearchHttpError; use crate::error::PayloadError::ReceivePayload; use crate::extractors::authentication::policies::*; @@ -41,7 +43,7 @@ use crate::routes::{ get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, }; use crate::search::{parse_filter, RetrieveVectors}; -use crate::Opt; +use crate::{aggregate_methods, Opt}; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] @@ -100,12 +102,82 @@ pub struct GetDocument { retrieve_vectors: Param, } +#[derive(Default, Serialize)] +pub struct DocumentsFetchAggregator { + #[serde(rename = "requests.total_received")] + total_received: usize, + + // a call on ../documents/:doc_id + per_document_id: bool, + // if a filter was used + per_filter: bool, + + #[serde(rename = "vector.retrieve_vectors")] + retrieve_vectors: bool, + + // pagination + #[serde(rename = "pagination.max_limit")] + max_limit: usize, + #[serde(rename = "pagination.max_offset")] + max_offset: usize, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentFetchKind { + PerDocumentId { retrieve_vectors: bool }, + Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, +} + +impl DocumentsFetchAggregator { + pub fn from_query(query: &DocumentFetchKind) -> Self { + let (limit, offset, retrieve_vectors) = match query { + DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), + DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { + (*limit, *offset, *retrieve_vectors) + } + }; + Self { + total_received: 1, + per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), + per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), + max_limit: limit, + max_offset: offset, + retrieve_vectors, + } + } +} + +impl Aggregate for DocumentsFetchAggregator { + // TODO: TAMO: Should we do the same event for the GET requests + fn event_name(&self) -> &'static str { + "Documents Fetched POST" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + total_received: self.total_received.saturating_add(other.total_received), + per_document_id: self.per_document_id | other.per_document_id, + per_filter: self.per_filter | other.per_filter, + retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors, + max_limit: self.max_limit.max(other.max_limit), + max_offset: self.max_offset.max(other.max_offset), + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn get_document( index_scheduler: GuardedData, Data>, document_param: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); debug!(parameters = ?params, "Get document"); @@ -117,9 +189,12 @@ pub async fn get_document( let features = index_scheduler.features(); let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?; - analytics.get_fetch_documents( - &DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 }, - &req, + analytics.publish( + DocumentsFetchAggregator { + retrieve_vectors: param_retrieve_vectors.0, + ..Default::default() + }, + Some(&req), ); let index = index_scheduler.index(&index_uid)?; @@ -129,17 +204,57 @@ pub async fn get_document( Ok(HttpResponse::Ok().json(document)) } +#[derive(Default, Serialize)] +pub struct DocumentsDeletionAggregator { + #[serde(rename = "requests.total_received")] + total_received: usize, + per_document_id: bool, + clear_all: bool, + per_batch: bool, + per_filter: bool, +} + +impl Aggregate for DocumentsDeletionAggregator { + fn event_name(&self) -> &'static str { + "Documents Deleted" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + total_received: self.total_received.saturating_add(other.total_received), + per_document_id: self.per_document_id | other.per_document_id, + clear_all: self.clear_all | other.clear_all, + per_batch: self.per_batch | other.per_batch, + per_filter: self.per_filter | other.per_filter, + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn delete_document( index_scheduler: GuardedData, Data>, path: web::Path, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = path.into_inner(); let index_uid = IndexUid::try_from(index_uid)?; - analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req); + analytics.publish( + DocumentsDeletionAggregator { + total_received: 1, + per_document_id: true, + ..Default::default() + }, + Some(&req), + ); let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), @@ -190,19 +305,21 @@ pub async fn documents_by_query_post( index_uid: web::Path, body: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let body = body.into_inner(); debug!(parameters = ?body, "Get documents POST"); - analytics.post_fetch_documents( - &DocumentFetchKind::Normal { - with_filter: body.filter.is_some(), - limit: body.limit, - offset: body.offset, + analytics.publish( + DocumentsFetchAggregator { + total_received: 1, + per_filter: body.filter.is_some(), retrieve_vectors: body.retrieve_vectors, + max_limit: body.limit, + max_offset: body.offset, + ..Default::default() }, - &req, + Some(&req), ); documents_by_query(&index_scheduler, index_uid, body) @@ -213,7 +330,7 @@ pub async fn get_documents( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Get documents GET"); @@ -235,14 +352,16 @@ pub async fn get_documents( filter, }; - analytics.get_fetch_documents( - &DocumentFetchKind::Normal { - with_filter: query.filter.is_some(), - limit: query.limit, - offset: query.offset, + analytics.publish( + DocumentsFetchAggregator { + total_received: 1, + per_filter: query.filter.is_some(), retrieve_vectors: query.retrieve_vectors, + max_limit: query.limit, + max_offset: query.offset, + ..Default::default() }, - &req, + Some(&req), ); documents_by_query(&index_scheduler, index_uid, query) @@ -298,6 +417,42 @@ fn from_char_csv_delimiter( } } +aggregate_methods!( + Replaced => "Documents Added", + Updated => "Documents Updated", +); + +#[derive(Default, Serialize)] +pub struct DocumentsAggregator { + payload_types: HashSet, + primary_key: HashSet, + index_creation: bool, + #[serde(skip)] + method: PhantomData, +} + +impl Aggregate for DocumentsAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(mut self, other: Self) -> Self + where + Self: Sized, + { + Self { + payload_types: self.payload_types.union(&other.payload_types).collect(), + primary_key: self.primary_key.union(&other.primary_key).collect(), + index_creation: self.index_creation | other.index_creation, + method: PhantomData, + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn replace_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -305,17 +460,33 @@ pub async fn replace_documents( body: Payload, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; debug!(parameters = ?params, "Replace documents"); let params = params.into_inner(); - analytics.add_documents( - ¶ms, - index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), - &req, + let mut content_types = HashSet::new(); + let content_type = req + .headers() + .get(CONTENT_TYPE) + .and_then(|s| s.to_str().ok()) + .unwrap_or("unknown") + .to_string(); + content_types.insert(content_type); + let mut primary_keys = HashSet::new(); + if let Some(primary_key) = params.primary_key.clone() { + primary_keys.insert(primary_key); + } + analytics.publish( + DocumentsAggregator:: { + payload_types: content_types, + primary_key: primary_keys, + index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + method: PhantomData, + }, + Some(&req), ); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); @@ -346,17 +517,33 @@ pub async fn update_documents( body: Payload, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let params = params.into_inner(); debug!(parameters = ?params, "Update documents"); - analytics.add_documents( - ¶ms, - index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), - &req, + let mut content_types = HashSet::new(); + let content_type = req + .headers() + .get(CONTENT_TYPE) + .and_then(|s| s.to_str().ok()) + .unwrap_or("unknown") + .to_string(); + content_types.insert(content_type); + let mut primary_keys = HashSet::new(); + if let Some(primary_key) = params.primary_key.clone() { + primary_keys.insert(primary_key); + } + analytics.publish( + DocumentsAggregator:: { + payload_types: content_types, + primary_key: primary_keys, + index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + method: PhantomData, + }, + Some(&req), ); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); @@ -524,12 +711,15 @@ pub async fn delete_documents_batch( body: web::Json>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by batch"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); + analytics.publish( + DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() }, + Some(&req), + ); let ids = body .iter() @@ -562,14 +752,17 @@ pub async fn delete_documents_by_filter( body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by filter"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; - analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); + analytics.publish( + DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() }, + Some(&req), + ); // we ensure the filter is well formed before enqueuing it crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())? @@ -599,13 +792,44 @@ pub struct DocumentEditionByFunction { pub function: String, } +#[derive(Default, Serialize)] +struct EditDocumentsByFunctionAggregator { + // Set to true if at least one request was filtered + filtered: bool, + // Set to true if at least one request contained a context + with_context: bool, + + index_creation: bool, +} + +impl Aggregate for EditDocumentsByFunctionAggregator { + fn event_name(&self) -> &'static str { + "Documents Edited By Function" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + filtered: self.filtered | other.filtered, + with_context: self.with_context | other.with_context, + index_creation: self.index_creation | other.index_creation, + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn edit_documents_by_function( index_scheduler: GuardedData, Data>, index_uid: web::Path, params: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Edit documents by function"); @@ -617,10 +841,13 @@ pub async fn edit_documents_by_function( let index_uid = index_uid.into_inner(); let params = params.into_inner(); - analytics.update_documents_by_function( - ¶ms, - index_scheduler.index(&index_uid).is_err(), - &req, + analytics.publish( + EditDocumentsByFunctionAggregator { + filtered: params.filter.is_some(), + with_context: params.context.is_some(), + index_creation: index_scheduler.index(&index_uid).is_err(), + }, + Some(&req), ); let DocumentEditionByFunction { filter, context, function } = params; @@ -670,10 +897,13 @@ pub async fn clear_all_documents( index_uid: web::Path, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); + analytics.publish( + DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() }, + Some(&req), + ); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; let uid = get_task_id(&req, &opt)?; diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 1df80711d..1e9d0e15e 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -1,3 +1,5 @@ +use std::collections::{BinaryHeap, HashSet}; + use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; @@ -10,14 +12,15 @@ use meilisearch_types::locales::Locale; use serde_json::Value; use tracing::debug; -use crate::analytics::{Analytics, FacetSearchAggregator}; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::indexes::search::search_kind; use crate::search::{ - add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, - SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy, + RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEARCH_OFFSET, }; use crate::search_queue::SearchQueue; @@ -53,13 +56,110 @@ pub struct FacetSearchQuery { pub locales: Option>, } +#[derive(Default)] +pub struct FacetSearchAggregator { + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // The set of all facetNames that were used + facet_names: HashSet, + + // As there been any other parameter than the facetName or facetQuery ones? + additional_search_parameters_provided: bool, +} + +impl FacetSearchAggregator { + #[allow(clippy::field_reassign_with_default)] + pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name, + vector, + q, + filter, + matching_strategy, + attributes_to_search_on, + hybrid, + ranking_score_threshold, + locales, + } = query; + + Self { + total_received: 1, + facet_names: Some(facet_name.clone()).into_iter().collect(), + additional_search_parameters_provided: q.is_some() + || vector.is_some() + || filter.is_some() + || *matching_strategy != MatchingStrategy::default() + || attributes_to_search_on.is_some() + || hybrid.is_some() + || ranking_score_threshold.is_some() + || locales.is_some(), + ..Default::default() + } + } + + pub fn succeed(&mut self, result: &FacetSearchResult) { + let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result; + self.total_succeeded = 1; + self.time_spent.push(*processing_time_ms as usize); + } +} + +impl Aggregate for FacetSearchAggregator { + fn event_name(&self) -> &'static str { + "Facet Searched POST" + } + + fn aggregate(mut self, other: Self) -> Self + where + Self: Sized, + { + self.time_spent.insert(other.time_spent); + + Self { + total_received: self.total_received.saturating_add(other.total_received), + total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded), + time_spent: self.time_spent, + facet_names: self.facet_names.union(&other.facet_names).collect(), + additional_search_parameters_provided: self.additional_search_parameters_provided + | other.additional_search_parameters_provided, + } + } + + fn into_event(self) -> Value { + let Self { + total_received, + total_succeeded, + time_spent, + facet_names, + additional_search_parameters_provided, + } = self; + + serde_json::json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "facets": { + "total_distinct_facet_count": facet_names.len(), + "additional_search_parameters_provided": additional_search_parameters_provided, + }, + }) + } +} + pub async fn search( index_scheduler: GuardedData, Data>, search_queue: Data, index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -100,7 +200,7 @@ pub async fn search( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.post_facet_search(aggregate); + analytics.publish(aggregate, Some(&req)); let search_result = search_result?; diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 35b747ccf..483a48a16 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeSet; use std::convert::Infallible; use actix_web::web::Data; @@ -18,7 +19,7 @@ use time::OffsetDateTime; use tracing::debug; use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; @@ -123,12 +124,34 @@ pub struct IndexCreateRequest { primary_key: Option, } +#[derive(Serialize)] +struct IndexCreatedAggregate { + primary_key: BTreeSet, +} + +impl Aggregate for IndexCreatedAggregate { + fn event_name(&self) -> &'static str { + "Index Created" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { primary_key: self.primary_key.union(&other.primary_key).collect() } + } + + fn into_event(self) -> impl Serialize { + self + } +} + pub async fn create_index( index_scheduler: GuardedData, Data>, body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Create index"); let IndexCreateRequest { primary_key, uid } = body.into_inner(); @@ -136,8 +159,7 @@ pub async fn create_index( let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); if allow_index_creation { analytics.publish( - "Index Created".to_string(), - json!({ "primary_key": primary_key }), + IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() }, Some(&req), ); @@ -194,20 +216,37 @@ pub async fn get_index( Ok(HttpResponse::Ok().json(index_view)) } +#[derive(Serialize)] +struct IndexUpdatedAggregate { + primary_key: BTreeSet, +} + +impl Aggregate for IndexUpdatedAggregate { + fn event_name(&self) -> &'static str { + "Index Updated" + } + + fn aggregate(self, other: Self) -> Self { + Self { primary_key: self.primary_key.union(&other.primary_key).collect() } + } + + fn into_event(self) -> impl Serialize { + self + } +} pub async fn update_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Update index"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); analytics.publish( - "Index Updated".to_string(), - json!({ "primary_key": body.primary_key }), + IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() }, Some(&req), ); diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 6a8eee521..f833a57d2 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -13,6 +13,7 @@ use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use tracing::debug; +use crate::analytics::segment_analytics::{SearchGET, SearchPOST}; use crate::analytics::{Analytics, SearchAggregator}; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; @@ -225,7 +226,7 @@ pub async fn search_with_url_query( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -237,7 +238,7 @@ pub async fn search_with_url_query( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query, &req); let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); @@ -254,7 +255,7 @@ pub async fn search_with_url_query( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.get_search(aggregate); + analytics.publish(aggregate, Some(&req)); let search_result = search_result?; @@ -268,7 +269,7 @@ pub async fn search_with_post( index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -280,7 +281,7 @@ pub async fn search_with_post( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query, &req); let index = index_scheduler.index(&index_uid)?; @@ -302,7 +303,7 @@ pub async fn search_with_post( MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); } } - analytics.post_search(aggregate); + analytics.publish(aggregate, Some(&req)); let search_result = search_result?; diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index aaf8673d0..112f8671b 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -1,3 +1,5 @@ +use std::collections::{BTreeSet, HashSet}; + use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; @@ -7,12 +9,15 @@ use meilisearch_types::error::ResponseError; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::Setting; -use meilisearch_types::settings::{settings, RankingRuleView, SecretPolicy, Settings, Unchecked}; +use meilisearch_types::settings::{ + settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked, +}; use meilisearch_types::tasks::KindWithContent; +use serde::Serialize; use serde_json::json; use tracing::debug; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; @@ -80,7 +85,7 @@ macro_rules! make_setting_route { body: deserr::actix_web::AwebJson, $err_ty>, req: HttpRequest, opt: web::Data, - $analytics_var: web::Data, + $analytics_var: web::Data, ) -> std::result::Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -162,16 +167,8 @@ make_setting_route!( "filterableAttributes", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "FilterableAttributes Updated".to_string(), - json!({ - "filterable_attributes": { - "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), - } - }), + crate::routes::indexes::settings::FilterableAttributesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -188,16 +185,8 @@ make_setting_route!( "sortableAttributes", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "SortableAttributes Updated".to_string(), - json!({ - "sortable_attributes": { - "total": setting.as_ref().map(|sort| sort.len()), - "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")), - }, - }), + crate::routes::indexes::settings::SortableAttributesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -214,16 +203,8 @@ make_setting_route!( "displayedAttributes", analytics, |displayed: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "DisplayedAttributes Updated".to_string(), - json!({ - "displayed_attributes": { - "total": displayed.as_ref().map(|displayed| displayed.len()), - "with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - }, - }), + crate::routes::indexes::settings::DisplayedAttributesAnalytics::new(displayed.as_ref()).to_settings(), Some(req), ); } @@ -240,35 +221,8 @@ make_setting_route!( "typoTolerance", analytics, |setting: &Option, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "TypoTolerance Updated".to_string(), - json!({ - "typo_tolerance": { - "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), - "disable_on_attributes": setting - .as_ref() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": setting - .as_ref() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.one_typo.set())) - .flatten(), - "min_word_size_for_two_typos": setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.two_typos.set())) - .flatten(), - }, - }), + crate::routes::indexes::settings::TypoToleranceAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -285,16 +239,8 @@ make_setting_route!( "searchableAttributes", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "SearchableAttributes Updated".to_string(), - json!({ - "searchable_attributes": { - "total": setting.as_ref().map(|searchable| searchable.len()), - "with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - }, - }), + crate::routes::indexes::settings::SearchableAttributesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -311,15 +257,8 @@ make_setting_route!( "stopWords", analytics, |stop_words: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "StopWords Updated".to_string(), - json!({ - "stop_words": { - "total": stop_words.as_ref().map(|stop_words| stop_words.len()), - }, - }), + crate::routes::indexes::settings::StopWordsAnalytics::new(stop_words.as_ref()).to_settings(), Some(req), ); } @@ -336,15 +275,8 @@ make_setting_route!( "nonSeparatorTokens", analytics, |non_separator_tokens: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "nonSeparatorTokens Updated".to_string(), - json!({ - "non_separator_tokens": { - "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()), - }, - }), + crate::routes::indexes::settings::NonSeparatorTokensAnalytics::new(non_separator_tokens.as_ref()).to_settings(), Some(req), ); } @@ -361,15 +293,8 @@ make_setting_route!( "separatorTokens", analytics, |separator_tokens: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "separatorTokens Updated".to_string(), - json!({ - "separator_tokens": { - "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()), - }, - }), + crate::routes::indexes::settings::SeparatorTokensAnalytics::new(separator_tokens.as_ref()).to_settings(), Some(req), ); } @@ -386,15 +311,8 @@ make_setting_route!( "dictionary", analytics, |dictionary: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "dictionary Updated".to_string(), - json!({ - "dictionary": { - "total": dictionary.as_ref().map(|dictionary| dictionary.len()), - }, - }), + crate::routes::indexes::settings::DictionaryAnalytics::new(dictionary.as_ref()).to_settings(), Some(req), ); } @@ -411,15 +329,8 @@ make_setting_route!( "synonyms", analytics, |synonyms: &Option>>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "Synonyms Updated".to_string(), - json!({ - "synonyms": { - "total": synonyms.as_ref().map(|synonyms| synonyms.len()), - }, - }), + crate::routes::indexes::settings::SynonymsAnalytics::new(synonyms.as_ref()).to_settings(), Some(req), ); } @@ -436,14 +347,8 @@ make_setting_route!( "distinctAttribute", analytics, |distinct: &Option, req: &HttpRequest| { - use serde_json::json; analytics.publish( - "DistinctAttribute Updated".to_string(), - json!({ - "distinct_attribute": { - "set": distinct.is_some(), - } - }), + crate::routes::indexes::settings::DistinctAttributeAnalytics::new(distinct.as_ref()).to_settings(), Some(req), ); } @@ -460,15 +365,8 @@ make_setting_route!( "proximityPrecision", analytics, |precision: &Option, req: &HttpRequest| { - use serde_json::json; analytics.publish( - "ProximityPrecision Updated".to_string(), - json!({ - "proximity_precision": { - "set": precision.is_some(), - "value": precision.unwrap_or_default(), - } - }), + crate::routes::indexes::settings::ProximityPrecisionAnalytics::new(precision.as_ref()).to_settings(), Some(req), ); } @@ -485,12 +383,8 @@ make_setting_route!( "localizedAttributes", analytics, |rules: &Option>, req: &HttpRequest| { - use serde_json::json; analytics.publish( - "LocalizedAttributesRules Updated".to_string(), - json!({ - "locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::>()) - }), + crate::routes::indexes::settings::LocalesAnalytics::new(rules.as_ref()).to_settings(), Some(req), ); } @@ -507,21 +401,8 @@ make_setting_route!( "rankingRules", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "RankingRules Updated".to_string(), - json!({ - "ranking_rules": { - "words_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))), - "typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))), - "proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Proximity))), - "attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Attribute))), - "sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))), - "exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Exactness))), - "values": setting.as_ref().map(|rr| rr.iter().filter(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::>().join(", ")), - } - }), + crate::routes::indexes::settings::RankingRulesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -538,20 +419,8 @@ make_setting_route!( "faceting", analytics, |setting: &Option, req: &HttpRequest| { - use serde_json::json; - use meilisearch_types::facet_values_sort::FacetValuesSort; - analytics.publish( - "Faceting Updated".to_string(), - json!({ - "faceting": { - "max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()), - "sort_facet_values_by_star_count": setting.as_ref().and_then(|s| { - s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - "sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - }, - }), + crate::routes::indexes::settings::FacetingAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -568,15 +437,8 @@ make_setting_route!( "pagination", analytics, |setting: &Option, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "Pagination Updated".to_string(), - json!({ - "pagination": { - "max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()), - }, - }), + crate::routes::indexes::settings::PaginationAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -593,11 +455,8 @@ make_setting_route!( "embedders", analytics, |setting: &Option>>, req: &HttpRequest| { - - analytics.publish( - "Embedders Updated".to_string(), - serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}), + crate::routes::indexes::settings::EmbeddersAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -651,10 +510,15 @@ fn embedder_analytics( json!( { + // last "total": setting.as_ref().map(|s| s.len()), + // Merge the sources "sources": sources, + // |= "document_template_used": document_template_used, + // max "document_template_max_bytes": document_template_max_bytes, + // |= "binary_quantization_used": binary_quantization_used, } ) @@ -672,8 +536,7 @@ make_setting_route!( analytics, |setting: &Option, req: &HttpRequest| { analytics.publish( - "Search Cutoff Updated".to_string(), - serde_json::json!({"search_cutoff_ms": setting }), + crate::routes::indexes::settings::SearchCutoffMsAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -714,13 +577,639 @@ generate_configure!( search_cutoff_ms ); +#[derive(Serialize, Default)] +struct SettingsAnalytics { + ranking_rules: RankingRulesAnalytics, + searchable_attributes: SearchableAttributesAnalytics, + displayed_attributes: DisplayedAttributesAnalytics, + sortable_attributes: SortableAttributesAnalytics, + filterable_attributes: FilterableAttributesAnalytics, + distinct_attribute: DistinctAttributeAnalytics, + proximity_precision: ProximityPrecisionAnalytics, + typo_tolerance: TypoToleranceAnalytics, + faceting: FacetingAnalytics, + pagination: PaginationAnalytics, + stop_words: StopWordsAnalytics, + synonyms: SynonymsAnalytics, + embedders: EmbeddersAnalytics, + search_cutoff_ms: SearchCutoffMsAnalytics, + locales: LocalesAnalytics, + dictionary: DictionaryAnalytics, + separator_tokens: SeparatorTokensAnalytics, + non_separator_tokens: NonSeparatorTokensAnalytics, +} + +impl Aggregate for SettingsAnalytics { + fn event_name(&self) -> &'static str { + "Settings Updated" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + ranking_rules: RankingRulesAnalytics { + words_position: self + .ranking_rules + .words_position + .or(other.ranking_rules.words_position), + typo_position: self + .ranking_rules + .typo_position + .or(other.ranking_rules.typo_position), + proximity_position: self + .ranking_rules + .proximity_position + .or(other.ranking_rules.proximity_position), + attribute_position: self + .ranking_rules + .attribute_position + .or(other.ranking_rules.attribute_position), + sort_position: self + .ranking_rules + .sort_position + .or(other.ranking_rules.sort_position), + exactness_position: self + .ranking_rules + .exactness_position + .or(other.ranking_rules.exactness_position), + values: self.ranking_rules.values.or(other.ranking_rules.values), + }, + searchable_attributes: SearchableAttributesAnalytics { + total: self.searchable_attributes.total.or(other.searchable_attributes.total), + with_wildcard: self + .searchable_attributes + .with_wildcard + .or(other.searchable_attributes.with_wildcard), + }, + displayed_attributes: DisplayedAttributesAnalytics { + total: self.displayed_attributes.total.or(other.displayed_attributes.total), + with_wildcard: self + .displayed_attributes + .with_wildcard + .or(other.displayed_attributes.with_wildcard), + }, + sortable_attributes: SortableAttributesAnalytics { + total: self.sortable_attributes.total.or(other.sortable_attributes.total), + has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo), + }, + filterable_attributes: FilterableAttributesAnalytics { + total: self.filterable_attributes.total.or(other.filterable_attributes.total), + has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), + }, + distinct_attribute: DistinctAttributeAnalytics { + set: self.distinct_attribute.set.or(other.distinct_attribute.set), + }, + proximity_precision: ProximityPrecisionAnalytics { + set: self.proximity_precision.set(other.proximity_precision.set), + value: self.proximity_precision.value(other.proximity_precision.value), + }, + typo_tolerance: TypoToleranceAnalytics { + enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), + disable_on_attributes: self + .typo_tolerance + .disable_on_attributes + .or(other.typo_tolerance.disable_on_attributes), + disable_on_words: self + .typo_tolerance + .disable_on_words + .or(other.typo_tolerance.disable_on_words), + min_word_size_for_one_typo: self + .typo_tolerance + .min_word_size_for_one_typo + .or(other.typo_tolerance.min_word_size_for_one_typo), + min_word_size_for_two_typos: self + .typo_tolerance + .min_word_size_for_two_typos + .or(other.typo_tolerance.min_word_size_for_two_typos), + }, + faceting: FacetingAnalytics { + max_values_per_facet: self + .faceting + .max_values_per_facet + .or(other.faceting.max_values_per_facet), + sort_facet_values_by_star_count: self + .faceting + .sort_facet_values_by_star_count + .or(other.faceting.sort_facet_values_by_star_count), + sort_facet_values_by_total: self + .faceting + .sort_facet_values_by_total + .or(other.faceting.sort_facet_values_by_total), + }, + pagination: PaginationAnalytics { + max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits), + }, + stop_words: StopWordsAnalytics { + total: self.stop_words.total.or(other.stop_words.total), + }, + synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) }, + embedders: EmbeddersAnalytics { + total: self.embedders.total.or(other.embedders.total), + sources: match (self.embedders.sources, other.embedders.sources) { + (None, None) => None, + (Some(sources), None) | (None, Some(sources)) => Some(sources), + (Some(this), Some(other)) => Some(this.union(&other).collect()), + }, + document_template_used: match ( + self.embedders.document_template_used, + other.embedders.document_template_used, + ) { + (None, None) => None, + (Some(used), None) | (None, Some(used)) => Some(used), + (Some(this), Some(other)) => Some(this | other), + }, + document_template_max_bytes: match ( + self.embedders.document_template_max_bytes, + other.embedders.document_template_max_bytes, + ) { + (None, None) => None, + (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), + (Some(this), Some(other)) => Some(this.max(other)), + }, + binary_quantization_used: match ( + self.embedders.binary_quantization_used, + other.embedders.binary_quantization_used, + ) { + (None, None) => None, + (Some(bq), None) | (None, Some(bq)) => Some(bq), + (Some(this), Some(other)) => Some(this | other), + }, + }, + search_cutoff_ms: SearchCutoffMsAnalytics { + search_cutoff_ms: self + .search_cutoff_ms + .search_cutoff_ms + .or(other.search_cutoff_ms.search_cutoff_ms), + }, + locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) }, + dictionary: DictionaryAnalytics { + total: self.dictionary.total.or(other.dictionary.total), + }, + separator_tokens: SeparatorTokensAnalytics { + total: self.separator_tokens.total.or(other.non_separator_tokens.total), + }, + non_separator_tokens: NonSeparatorTokensAnalytics { + total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), + }, + } + } + + fn into_event(self) -> impl Serialize + where + Self: Sized, + { + self + } +} + +#[derive(Serialize, Default)] +struct RankingRulesAnalytics { + words_position: Option, + typo_position: Option, + proximity_position: Option, + attribute_position: Option, + sort_position: Option, + exactness_position: Option, + values: Option, +} + +impl RankingRulesAnalytics { + pub fn new(rr: Option<&Vec>) -> Self { + RankingRulesAnalytics { + words_position: rr.as_ref().map(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) + }), + typo_position: rr.as_ref().map(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) + }), + proximity_position: rr.as_ref().map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) + }) + }), + attribute_position: rr.as_ref().map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) + }) + }), + sort_position: rr.as_ref().map(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) + }), + exactness_position: rr.as_ref().map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) + }) + }), + values: rr.as_ref().map(|rr| { + rr.iter() + .filter(|s| { + matches!( + s, + meilisearch_types::settings::RankingRuleView::Asc(_) + | meilisearch_types::settings::RankingRuleView::Desc(_) + ) + }) + .map(|x| x.to_string()) + .collect::>() + .join(", ") + }), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { ranking_rules: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SearchableAttributesAnalytics { + total: Option, + with_wildcard: bool, +} + +impl SearchableAttributesAnalytics { + pub fn new(setting: Option<&Vec>) -> Self { + Self { + total: setting.as_ref().map(|searchable| searchable.len()), + with_wildcard: setting + .as_ref() + .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { searchable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct DisplayedAttributesAnalytics { + total: usize, + with_wildcard: bool, +} + +impl DisplayedAttributesAnalytics { + pub fn new(displayed: Option<&Vec>) -> Self { + Self { + total: displayed.as_ref().map(|displayed| displayed.len()), + with_wildcard: displayed + .as_ref() + .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { displayed_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SortableAttributesAnalytics { + total: usize, + has_geo: bool, +} + +impl SortableAttributesAnalytics { + pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|sort| sort.len()), + has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { sortable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct FilterableAttributesAnalytics { + total: usize, + has_geo: bool, +} + +impl FilterableAttributesAnalytics { + pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|filter| filter.len()).unwrap_or(0), + has_geo: setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { filterable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct DistinctAttributeAnalytics { + set: bool, +} + +impl DistinctAttributeAnalytics { + pub fn new(distinct: Option<&String>) -> Self { + Self { set: distinct.is_some() } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { distinct_attribute: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct ProximityPrecisionAnalytics { + set: bool, + value: Option, +} + +impl ProximityPrecisionAnalytics { + pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self { + Self { set: precision.is_some(), value: precision.unwrap_or_default() } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { proximity_precision: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct TypoToleranceAnalytics { + enabled: Option, + disable_on_attributes: Option, + disable_on_words: Option, + min_word_size_for_one_typo: Option, + min_word_size_for_two_typos: Option, +} + +impl TypoToleranceAnalytics { + pub fn new(setting: Option<&meilisearch_types::settings::TypoSettings>) -> Self { + Self { + enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), + disable_on_attributes: setting + .as_ref() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + disable_on_words: setting + .as_ref() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + min_word_size_for_one_typo: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) + .flatten(), + min_word_size_for_two_typos: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) + .flatten(), + } + } + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { typo_tolerance: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct FacetingAnalytics { + max_values_per_facet: Option, + sort_facet_values_by_star_count: Option, + sort_facet_values_by_total: Option, +} + +impl FacetingAnalytics { + pub fn new(setting: Option<&meilisearch_types::settings::FacetingSettings>) -> Self { + Self { + max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), + sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { + s.sort_facet_values_by + .as_ref() + .set() + .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) + }), + sort_facet_values_by_total: setting + .as_ref() + .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { faceting: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct PaginationAnalytics { + max_total_hits: Option, +} + +impl PaginationAnalytics { + pub fn new(setting: Option<&meilisearch_types::settings::PaginationSettings>) -> Self { + Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { pagination: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct StopWordsAnalytics { + total: Option, +} + +impl StopWordsAnalytics { + pub fn new(stop_words: Option<&BTreeSet>) -> Self { + Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { stop_words: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SynonymsAnalytics { + total: Option, +} + +impl SynonymsAnalytics { + pub fn new(synonyms: Option<&std::collections::BTreeMap>>) -> Self { + Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { synonyms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct EmbeddersAnalytics { + // last + total: Option, + // Merge the sources + sources: Option>, + // |= + document_template_used: Option, + // max + document_template_max_bytes: Option, + // |= + binary_quantization_used: Option, +} + +impl EmbeddersAnalytics { + pub fn new( + setting: Option< + &std::collections::BTreeMap< + String, + Setting, + >, + >, + ) -> Self { + let mut sources = std::collections::HashSet::new(); + + if let Some(s) = &setting { + for source in s + .values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.source.set()) + { + use meilisearch_types::milli::vector::settings::EmbedderSource; + match source { + EmbedderSource::OpenAi => sources.insert("openAi"), + EmbedderSource::HuggingFace => sources.insert("huggingFace"), + EmbedderSource::UserProvided => sources.insert("userProvided"), + EmbedderSource::Ollama => sources.insert("ollama"), + EmbedderSource::Rest => sources.insert("rest"), + }; + } + }; + + Self { + total: setting.as_ref().map(|s| s.len()), + sources, + document_template_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.document_template.set().is_some()) + }), + document_template_max_bytes: setting.as_ref().and_then(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.document_template_max_bytes.set()) + .max() + }), + binary_quantization_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { embedders: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +struct SearchCutoffMsAnalytics { + search_cutoff_ms: Option, +} + +impl SearchCutoffMsAnalytics { + pub fn new(setting: Option<&u64>) -> Self { + Self { search_cutoff_ms: setting } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +struct LocalesAnalytics { + locales: BTreeSet, +} + +impl LocalesAnalytics { + pub fn new( + rules: Option<&Vec>, + ) -> Self { + LocalesAnalytics { + locales: rules.as_ref().map(|rules| { + rules + .iter() + .flat_map(|rule| rule.locales.iter().cloned()) + .collect::>() + }), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { locales: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct DictionaryAnalytics { + total: usize, +} + +impl DictionaryAnalytics { + pub fn new(dictionary: Option<&std::collections::BTreeSet>) -> Self { + Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { dictionary: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SeparatorTokensAnalytics { + total: usize, +} + +impl SeparatorTokensAnalytics { + pub fn new(separator_tokens: Option<&std::collections::BTreeSet>) -> Self { + Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { separator_tokens: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct NonSeparatorTokensAnalytics { + total: usize, +} + +impl NonSeparatorTokensAnalytics { + pub fn new(non_separator_tokens: Option<&std::collections::BTreeSet>) -> Self { + Self { + total: non_separator_tokens + .as_ref() + .map(|non_separator_tokens| non_separator_tokens.len()), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { non_separator_tokens: self, ..Default::default() } + } +} + pub async fn update_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: AwebJson, DeserrJsonError>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -729,103 +1218,44 @@ pub async fn update_all( let new_settings = validate_settings(new_settings, &index_scheduler)?; analytics.publish( - "Settings Updated".to_string(), - json!({ - "ranking_rules": { - "words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Words))), - "typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Typo))), - "proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Proximity))), - "attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Attribute))), - "sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Sort))), - "exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Exactness))), - "values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !matches!(s, RankingRuleView::Asc(_) | RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::>().join(", ")), - }, - "searchable_attributes": { - "total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), - "with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - }, - "displayed_attributes": { - "total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()), - "with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - }, - "sortable_attributes": { - "total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), - "has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), - }, - "filterable_attributes": { - "total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), - "has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), - }, - "distinct_attribute": { - "set": new_settings.distinct_attribute.as_ref().set().is_some() - }, - "proximity_precision": { - "set": new_settings.proximity_precision.as_ref().set().is_some(), - "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default() - }, - "typo_tolerance": { - "enabled": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.enabled.as_ref().set()) - .copied(), - "disable_on_attributes": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.one_typo.set())) - .flatten(), - "min_word_size_for_two_typos": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.two_typos.set())) - .flatten(), - }, - "faceting": { - "max_values_per_facet": new_settings.faceting - .as_ref() - .set() - .and_then(|s| s.max_values_per_facet.as_ref().set()), - "sort_facet_values_by_star_count": new_settings.faceting - .as_ref() - .set() - .and_then(|s| { - s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - "sort_facet_values_by_total": new_settings.faceting - .as_ref() - .set() - .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - }, - "pagination": { - "max_total_hits": new_settings.pagination - .as_ref() - .set() - .and_then(|s| s.max_total_hits.as_ref().set()), - }, - "stop_words": { - "total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()), - }, - "synonyms": { - "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), - }, - "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()), - "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(), - "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::>()), - }), + SettingsAnalytics { + ranking_rules: RankingRulesAnalytics::new(new_settings.ranking_rules.as_ref().set()), + searchable_attributes: SearchableAttributesAnalytics::new( + new_settings.searchable_attributes.as_ref().set(), + ), + displayed_attributes: DisplayedAttributesAnalytics::new( + new_settings.displayed_attributes.as_ref().set(), + ), + sortable_attributes: SortableAttributesAnalytics::new( + new_settings.sortable_attributes.as_ref().set(), + ), + filterable_attributes: FilterableAttributesAnalytics::new( + new_settings.filterable_attributes.as_ref().set(), + ), + distinct_attribute: DistinctAttributeAnalytics::new( + new_settings.distinct_attribute.as_ref().set(), + ), + proximity_precision: ProximityPrecisionAnalytics::new( + new_settings.proximity_precision.as_ref().set(), + ), + typo_tolerance: TypoToleranceAnalytics::new(new_settings.typo_tolerance.as_ref().set()), + faceting: FacetingAnalytics::new(new_settings.faceting.as_ref().set()), + pagination: PaginationAnalytics::new(new_settings.pagination.as_ref().set()), + stop_words: StopWordsAnalytics::new(new_settings.stop_words.as_ref().set()), + synonyms: SynonymsAnalytics::new(new_settings.synonyms.as_ref().set()), + embedders: EmbeddersAnalytics::new(new_settings.embedders.as_ref().set()), + search_cutoff_ms: SearchCutoffMsAnalytics::new( + new_settings.search_cutoff_ms.as_ref().set(), + ), + locales: LocalesAnalytics::new(new_settings.localized_attributes.as_ref().set()), + dictionary: DictionaryAnalytics::new(new_settings.dictionary.as_ref().set()), + separator_tokens: SeparatorTokensAnalytics::new( + new_settings.separator_tokens.as_ref().set(), + ), + non_separator_tokens: NonSeparatorTokensAnalytics::new( + new_settings.non_separator_tokens.as_ref().set(), + ), + }, Some(&req), ); diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 51a7b0707..34e904230 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -40,7 +40,7 @@ pub async fn swap_indexes( analytics.publish( "Indexes Swapped".to_string(), json!({ - "swap_operation_number": params.len(), + "swap_operation_number": params.len(), // Return the max ever encountered }), Some(&req), ); From e66fccc3f2e8c9ef9f576f9484d1135bf02716e6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 16 Oct 2024 15:51:48 +0200 Subject: [PATCH 02/22] get rids of the analytics closure --- meilisearch/src/routes/indexes/settings.rs | 216 +++------------------ 1 file changed, 24 insertions(+), 192 deletions(-) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 112f8671b..db83cb39b 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -14,7 +14,6 @@ use meilisearch_types::settings::{ }; use meilisearch_types::tasks::KindWithContent; use serde::Serialize; -use serde_json::json; use tracing::debug; use crate::analytics::{Aggregate, Analytics}; @@ -25,7 +24,7 @@ use crate::Opt; #[macro_export] macro_rules! make_setting_route { - ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => { + ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => { pub mod $attr { use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse, Resource}; @@ -85,7 +84,7 @@ macro_rules! make_setting_route { body: deserr::actix_web::AwebJson, $err_ty>, req: HttpRequest, opt: web::Data, - $analytics_var: web::Data, + analytics: web::Data, ) -> std::result::Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -93,7 +92,10 @@ macro_rules! make_setting_route { debug!(parameters = ?body, "Update settings"); #[allow(clippy::redundant_closure_call)] - $analytics(&body, &req); + analytics.publish( + $crate::routes::indexes::settings::$analytics::new(body.as_ref()).to_settings(), + Some(&req), + ); let new_settings = Settings { $attr: match body { @@ -165,13 +167,7 @@ make_setting_route!( >, filterable_attributes, "filterableAttributes", - analytics, - |setting: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::FilterableAttributesAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + FilterableAttributesAnalytics ); make_setting_route!( @@ -183,13 +179,7 @@ make_setting_route!( >, sortable_attributes, "sortableAttributes", - analytics, - |setting: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::SortableAttributesAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + SortableAttributesAnalytics ); make_setting_route!( @@ -201,13 +191,7 @@ make_setting_route!( >, displayed_attributes, "displayedAttributes", - analytics, - |displayed: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::DisplayedAttributesAnalytics::new(displayed.as_ref()).to_settings(), - Some(req), - ); - } + DisplayedAttributesAnalytics ); make_setting_route!( @@ -219,13 +203,7 @@ make_setting_route!( >, typo_tolerance, "typoTolerance", - analytics, - |setting: &Option, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::TypoToleranceAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + TypoToleranceAnalytics ); make_setting_route!( @@ -237,13 +215,7 @@ make_setting_route!( >, searchable_attributes, "searchableAttributes", - analytics, - |setting: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::SearchableAttributesAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + SearchableAttributesAnalytics ); make_setting_route!( @@ -255,13 +227,7 @@ make_setting_route!( >, stop_words, "stopWords", - analytics, - |stop_words: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::StopWordsAnalytics::new(stop_words.as_ref()).to_settings(), - Some(req), - ); - } + StopWordsAnalytics ); make_setting_route!( @@ -273,13 +239,7 @@ make_setting_route!( >, non_separator_tokens, "nonSeparatorTokens", - analytics, - |non_separator_tokens: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::NonSeparatorTokensAnalytics::new(non_separator_tokens.as_ref()).to_settings(), - Some(req), - ); - } + NonSeparatorTokensAnalytics ); make_setting_route!( @@ -291,13 +251,7 @@ make_setting_route!( >, separator_tokens, "separatorTokens", - analytics, - |separator_tokens: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::SeparatorTokensAnalytics::new(separator_tokens.as_ref()).to_settings(), - Some(req), - ); - } + SeparatorTokensAnalytics ); make_setting_route!( @@ -309,13 +263,7 @@ make_setting_route!( >, dictionary, "dictionary", - analytics, - |dictionary: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::DictionaryAnalytics::new(dictionary.as_ref()).to_settings(), - Some(req), - ); - } + DictionaryAnalytics ); make_setting_route!( @@ -327,13 +275,7 @@ make_setting_route!( >, synonyms, "synonyms", - analytics, - |synonyms: &Option>>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::SynonymsAnalytics::new(synonyms.as_ref()).to_settings(), - Some(req), - ); - } + SynonymsAnalytics ); make_setting_route!( @@ -345,13 +287,7 @@ make_setting_route!( >, distinct_attribute, "distinctAttribute", - analytics, - |distinct: &Option, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::DistinctAttributeAnalytics::new(distinct.as_ref()).to_settings(), - Some(req), - ); - } + DistinctAttributeAnalytics ); make_setting_route!( @@ -363,13 +299,7 @@ make_setting_route!( >, proximity_precision, "proximityPrecision", - analytics, - |precision: &Option, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::ProximityPrecisionAnalytics::new(precision.as_ref()).to_settings(), - Some(req), - ); - } + ProximityPrecisionAnalytics ); make_setting_route!( @@ -381,13 +311,7 @@ make_setting_route!( >, localized_attributes, "localizedAttributes", - analytics, - |rules: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::LocalesAnalytics::new(rules.as_ref()).to_settings(), - Some(req), - ); - } + LocalesAnalytics ); make_setting_route!( @@ -399,13 +323,7 @@ make_setting_route!( >, ranking_rules, "rankingRules", - analytics, - |setting: &Option>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::RankingRulesAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + RankingRulesAnalytics ); make_setting_route!( @@ -417,13 +335,7 @@ make_setting_route!( >, faceting, "faceting", - analytics, - |setting: &Option, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::FacetingAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + FacetingAnalytics ); make_setting_route!( @@ -435,13 +347,7 @@ make_setting_route!( >, pagination, "pagination", - analytics, - |setting: &Option, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::PaginationAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + PaginationAnalytics ); make_setting_route!( @@ -453,77 +359,9 @@ make_setting_route!( >, embedders, "embedders", - analytics, - |setting: &Option>>, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::EmbeddersAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + EmbeddersAnalytics ); -fn embedder_analytics( - setting: Option< - &std::collections::BTreeMap< - String, - Setting, - >, - >, -) -> serde_json::Value { - let mut sources = std::collections::HashSet::new(); - - if let Some(s) = &setting { - for source in s - .values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.source.set()) - { - use meilisearch_types::milli::vector::settings::EmbedderSource; - match source { - EmbedderSource::OpenAi => sources.insert("openAi"), - EmbedderSource::HuggingFace => sources.insert("huggingFace"), - EmbedderSource::UserProvided => sources.insert("userProvided"), - EmbedderSource::Ollama => sources.insert("ollama"), - EmbedderSource::Rest => sources.insert("rest"), - }; - } - }; - - let document_template_used = setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.document_template.set().is_some()) - }); - - let document_template_max_bytes = setting.as_ref().and_then(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.document_template_max_bytes.set()) - .max() - }); - - let binary_quantization_used = setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.binary_quantized.set().is_some()) - }); - - json!( - { - // last - "total": setting.as_ref().map(|s| s.len()), - // Merge the sources - "sources": sources, - // |= - "document_template_used": document_template_used, - // max - "document_template_max_bytes": document_template_max_bytes, - // |= - "binary_quantization_used": binary_quantization_used, - } - ) -} - make_setting_route!( "/search-cutoff-ms", put, @@ -533,13 +371,7 @@ make_setting_route!( >, search_cutoff_ms, "searchCutoffMs", - analytics, - |setting: &Option, req: &HttpRequest| { - analytics.publish( - crate::routes::indexes::settings::SearchCutoffMsAnalytics::new(setting.as_ref()).to_settings(), - Some(req), - ); - } + SearchCutoffMsAnalytics ); macro_rules! generate_configure { From fdeb47fb549a242d318a17195e1a804e50aef5dd Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 16 Oct 2024 17:16:33 +0200 Subject: [PATCH 03/22] implements all routes --- meilisearch/src/analytics/mod.rs | 14 +- .../src/analytics/segment_analytics.rs | 239 +++++++----------- meilisearch/src/routes/dump.rs | 2 +- meilisearch/src/routes/features.rs | 8 +- meilisearch/src/routes/indexes/documents.rs | 20 +- .../src/routes/indexes/facet_search.rs | 2 +- meilisearch/src/routes/indexes/mod.rs | 4 +- meilisearch/src/routes/indexes/search.rs | 4 +- meilisearch/src/routes/indexes/settings.rs | 152 ++++++----- meilisearch/src/routes/indexes/similar.rs | 13 +- meilisearch/src/routes/multi_search.rs | 6 +- meilisearch/src/routes/snapshot.rs | 7 +- meilisearch/src/routes/swap_indexes.rs | 32 ++- meilisearch/src/routes/tasks.rs | 129 +++++++--- 14 files changed, 337 insertions(+), 295 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index a8658d830..a0ca47d8f 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -16,7 +16,9 @@ use serde::Serialize; // if the feature analytics is enabled we use the real analytics pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; pub use segment_analytics::SearchAggregator; -pub type SimilarAggregator = segment_analytics::SimilarAggregator; +pub use segment_analytics::SimilarAggregator; + +use self::segment_analytics::extract_user_agents; pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; @@ -32,14 +34,11 @@ macro_rules! empty_analytics { $event_name } - fn aggregate(self, other: Self) -> Self - where - Self: Sized, - { + fn aggregate(self, _other: Self) -> Self { self } - fn into_event(self) -> serde_json::Value { + fn into_event(self) -> impl serde::Serialize { serde_json::json!({}) } } @@ -150,7 +149,8 @@ impl Analytics { } /// The method used to publish most analytics that do not need to be batched every hours - pub fn publish(&self, send: impl Aggregate, request: Option<&HttpRequest>) { + pub fn publish(&self, send: impl Aggregate, request: &HttpRequest) { let Some(segment) = self.inner else { return }; + let user_agents = extract_user_agents(request); } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 8a6dfd780..0572267e1 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -71,25 +71,8 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec { .collect() } -pub enum AnalyticsMsg { - BatchMessage(Track), - AggregateGetSearch(SearchAggregator), - AggregatePostSearch(SearchAggregator), - AggregateGetSimilar(SimilarAggregator), - AggregatePostSimilar(SimilarAggregator), - AggregatePostMultiSearch(MultiSearchAggregator), - AggregatePostFacetSearch(FacetSearchAggregator), - AggregateAddDocuments(DocumentsAggregator), - AggregateDeleteDocuments(DocumentsDeletionAggregator), - AggregateUpdateDocuments(DocumentsAggregator), - AggregateEditDocumentsByFunction(EditDocumentsByFunctionAggregator), - AggregateGetFetchDocuments(DocumentsFetchAggregator), - AggregatePostFetchDocuments(DocumentsFetchAggregator), -} - pub struct SegmentAnalytics { pub instance_uid: InstanceUid, - sender: Sender, pub user: User, } @@ -1083,8 +1066,6 @@ impl Aggregate for SearchAggregator { #[derive(Default)] pub struct MultiSearchAggregator { - timestamp: Option, - // requests total_received: usize, total_succeeded: usize, @@ -1103,9 +1084,6 @@ pub struct MultiSearchAggregator { // federation use_federation: bool, - - // context - user_agents: HashSet, } impl MultiSearchAggregator { @@ -1113,10 +1091,6 @@ impl MultiSearchAggregator { federated_search: &FederatedSearch, request: &HttpRequest, ) -> Self { - let timestamp = Some(OffsetDateTime::now_utc()); - - let user_agents = extract_user_agents(request).into_iter().collect(); - let use_federation = federated_search.federation.is_some(); let distinct_indexes: HashSet<_> = federated_search @@ -1166,7 +1140,6 @@ impl MultiSearchAggregator { federated_search.queries.iter().any(|query| query.show_ranking_score_details); Self { - timestamp, total_received: 1, total_succeeded: 0, total_distinct_index_count: distinct_indexes.len(), @@ -1174,7 +1147,6 @@ impl MultiSearchAggregator { total_search_count: federated_search.queries.len(), show_ranking_score, show_ranking_score_details, - user_agents, use_federation, } } @@ -1182,15 +1154,20 @@ impl MultiSearchAggregator { pub fn succeed(&mut self) { self.total_succeeded = self.total_succeeded.saturating_add(1); } +} + +impl Aggregate for MultiSearchAggregator { + fn event_name(&self) -> &'static str { + "Documents Searched by Multi-Search POST" + } /// Aggregate one [MultiSearchAggregator] into another. - pub fn aggregate(&mut self, other: Self) { + fn aggregate(mut self, other: Self) -> Self { // write the aggregate in a way that will cause a compilation error if a field is added. // get ownership of self, replacing it by a default value. - let this = std::mem::take(self); + let this = self; - let timestamp = this.timestamp.or(other.timestamp); let total_received = this.total_received.saturating_add(other.total_received); let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded); let total_distinct_index_count = @@ -1207,75 +1184,53 @@ impl MultiSearchAggregator { user_agents.insert(user_agent); } - // need all fields or compile error - let mut aggregated = Self { - timestamp, + Self { total_received, total_succeeded, total_distinct_index_count, total_single_index, total_search_count, - user_agents, show_ranking_score, show_ranking_score_details, use_federation, - // do not add _ or ..Default::default() here - }; - - // replace the default self with the aggregated value - std::mem::swap(self, &mut aggregated); + } } - pub fn into_event(self, user: &User, event_name: &str) -> Option { + fn into_event(self) -> impl Serialize { let Self { - timestamp, total_received, total_succeeded, total_distinct_index_count, total_single_index, total_search_count, - user_agents, show_ranking_score, show_ranking_score_details, use_federation, } = self; - if total_received == 0 { - None - } else { - let properties = json!({ - "user-agent": user_agents, - "requests": { - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "indexes": { - "total_single_index": total_single_index, - "total_distinct_index_count": total_distinct_index_count, - "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early - }, - "searches": { - "total_search_count": total_search_count, - "avg_search_count": (total_search_count as f64) / (total_received as f64), - }, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - }, - "federation": { - "use_federation": use_federation, - } - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } + json!({ + "requests": { + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "indexes": { + "total_single_index": total_single_index, + "total_distinct_index_count": total_distinct_index_count, + "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early + }, + "searches": { + "total_search_count": total_search_count, + "avg_search_count": (total_search_count as f64) / (total_received as f64), + }, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + }, + "federation": { + "use_federation": use_federation, + } + }) } } @@ -1752,13 +1707,13 @@ impl DocumentsFetchAggregator { } } +aggregate_methods!( + SimilarPOST => "Similar POST", + SimilarGET => "Similar GET", +); + #[derive(Default)] -pub struct SimilarAggregator { - timestamp: Option, - - // context - user_agents: HashSet, - +pub struct SimilarAggregator { // requests total_received: usize, total_succeeded: usize, @@ -1787,9 +1742,11 @@ pub struct SimilarAggregator { show_ranking_score: bool, show_ranking_score_details: bool, ranking_score_threshold: bool, + + marker: std::marker::PhantomData, } -impl SimilarAggregator { +impl SimilarAggregator { #[allow(clippy::field_reassign_with_default)] pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { let SimilarQuery { @@ -1854,12 +1811,16 @@ impl SimilarAggregator { self.time_spent.push(*processing_time_ms as usize); } +} + +impl Aggregate for SimilarAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } /// Aggregate one [SimilarAggregator] into another. - pub fn aggregate(&mut self, mut other: Self) { + fn aggregate(mut self, mut other: Self) -> Self { let Self { - timestamp, - user_agents, total_received, total_succeeded, ref mut time_spent, @@ -1875,17 +1836,9 @@ impl SimilarAggregator { show_ranking_score_details, ranking_score_threshold, retrieve_vectors, + marker: _, } = other; - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // context - for user_agent in user_agents.into_iter() { - self.user_agents.insert(user_agent); - } - // request self.total_received = self.total_received.saturating_add(total_received); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); @@ -1917,12 +1870,12 @@ impl SimilarAggregator { self.show_ranking_score |= show_ranking_score; self.show_ranking_score_details |= show_ranking_score_details; self.ranking_score_threshold |= ranking_score_threshold; + + self } - pub fn into_event(self, user: &User, event_name: &str) -> Option { + fn into_event(self) -> impl Serialize { let Self { - timestamp, - user_agents, total_received, total_succeeded, time_spent, @@ -1938,56 +1891,44 @@ impl SimilarAggregator { show_ranking_score_details, ranking_score_threshold, retrieve_vectors, + marker: _, } = self; - if total_received == 0 { - None - } else { - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // the index of the 99th percentage of value - let percentile_99th = time_spent.len() * 99 / 100; - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th); + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // the index of the 99th percentage of value + let percentile_99th = time_spent.len() * 99 / 100; + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th); - let properties = json!({ - "user-agent": user_agents, - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "filter": { - "with_geoRadius": filter_with_geo_radius, - "with_geoBoundingBox": filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), - "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "vector": { - "retrieve_vectors": retrieve_vectors, - }, - "pagination": { - "max_limit": max_limit, - "max_offset": max_offset, - }, - "formatting": { - "max_attributes_to_retrieve": max_attributes_to_retrieve, - }, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - "ranking_score_threshold": ranking_score_threshold, - }, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } + json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "filter": { + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "vector": { + "retrieve_vectors": retrieve_vectors, + }, + "pagination": { + "max_limit": max_limit, + "max_offset": max_offset, + }, + "formatting": { + "max_attributes_to_retrieve": max_attributes_to_retrieve, + }, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + "ranking_score_threshold": ranking_score_threshold, + } + }) } } diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 0fdeef5ed..c78dc4dad 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -26,7 +26,7 @@ pub async fn create_dump( opt: web::Data, analytics: web::Data, ) -> Result { - analytics.publish(DumpAnalytics::default(), Some(&req)); + analytics.publish(DumpAnalytics::default(), &req); let task = KindWithContent::DumpCreation { keys: auth_controller.list_keys()?, diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 24c89938d..4ee5b37b0 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -35,7 +35,7 @@ async fn get_features( ) -> HttpResponse { let features = index_scheduler.features(); - analytics.publish(GetExperimentalFeatureAnalytics::default(), Some(&req)); + analytics.publish(GetExperimentalFeatureAnalytics::default(), &req); let features = features.runtime_features(); debug!(returns = ?features, "Get features"); HttpResponse::Ok().json(features) @@ -83,8 +83,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { } } - fn into_event(self) -> serde_json::Value { - serde_json::to_value(self).unwrap() + fn into_event(self) -> impl Serialize { + self } } @@ -131,7 +131,7 @@ async fn patch_features( edit_documents_by_function, contains_filter, }, - Some(&req), + &req, ); index_scheduler.put_runtime_features(new_features)?; debug!(returns = ?new_features, "Patch features"); diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 8f4cd026d..6dece61e6 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -194,7 +194,7 @@ pub async fn get_document( retrieve_vectors: param_retrieve_vectors.0, ..Default::default() }, - Some(&req), + &req, ); let index = index_scheduler.index(&index_uid)?; @@ -253,7 +253,7 @@ pub async fn delete_document( per_document_id: true, ..Default::default() }, - Some(&req), + &req, ); let task = KindWithContent::DocumentDeletion { @@ -319,7 +319,7 @@ pub async fn documents_by_query_post( max_offset: body.offset, ..Default::default() }, - Some(&req), + &req, ); documents_by_query(&index_scheduler, index_uid, body) @@ -361,7 +361,7 @@ pub async fn get_documents( max_offset: query.offset, ..Default::default() }, - Some(&req), + &req, ); documents_by_query(&index_scheduler, index_uid, query) @@ -486,7 +486,7 @@ pub async fn replace_documents( index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), method: PhantomData, }, - Some(&req), + &req, ); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); @@ -543,7 +543,7 @@ pub async fn update_documents( index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), method: PhantomData, }, - Some(&req), + &req, ); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); @@ -718,7 +718,7 @@ pub async fn delete_documents_batch( analytics.publish( DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() }, - Some(&req), + &req, ); let ids = body @@ -761,7 +761,7 @@ pub async fn delete_documents_by_filter( analytics.publish( DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() }, - Some(&req), + &req, ); // we ensure the filter is well formed before enqueuing it @@ -847,7 +847,7 @@ pub async fn edit_documents_by_function( with_context: params.context.is_some(), index_creation: index_scheduler.index(&index_uid).is_err(), }, - Some(&req), + &req, ); let DocumentEditionByFunction { filter, context, function } = params; @@ -902,7 +902,7 @@ pub async fn clear_all_documents( let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.publish( DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() }, - Some(&req), + &req, ); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 1e9d0e15e..f3c74a388 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -200,7 +200,7 @@ pub async fn search( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.publish(aggregate, Some(&req)); + analytics.publish(aggregate, &req); let search_result = search_result?; diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 483a48a16..f926f663c 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -160,7 +160,7 @@ pub async fn create_index( if allow_index_creation { analytics.publish( IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() }, - Some(&req), + &req, ); let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; @@ -247,7 +247,7 @@ pub async fn update_index( let body = body.into_inner(); analytics.publish( IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() }, - Some(&req), + &req, ); let task = KindWithContent::IndexUpdate { diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index f833a57d2..538c46fd0 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -255,7 +255,7 @@ pub async fn search_with_url_query( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.publish(aggregate, Some(&req)); + analytics.publish(aggregate, &req); let search_result = search_result?; @@ -303,7 +303,7 @@ pub async fn search_with_post( MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); } } - analytics.publish(aggregate, Some(&req)); + analytics.publish(aggregate, &req); let search_result = search_result?; diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index db83cb39b..bb2f6792d 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -8,6 +8,7 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::locales::Locale; use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{ settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked, @@ -94,7 +95,7 @@ macro_rules! make_setting_route { #[allow(clippy::redundant_closure_call)] analytics.publish( $crate::routes::indexes::settings::$analytics::new(body.as_ref()).to_settings(), - Some(&req), + &req, ); let new_settings = Settings { @@ -491,11 +492,11 @@ impl Aggregate for SettingsAnalytics { has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), }, distinct_attribute: DistinctAttributeAnalytics { - set: self.distinct_attribute.set.or(other.distinct_attribute.set), + set: self.distinct_attribute.set | other.distinct_attribute.set, }, proximity_precision: ProximityPrecisionAnalytics { - set: self.proximity_precision.set(other.proximity_precision.set), - value: self.proximity_precision.value(other.proximity_precision.value), + set: self.proximity_precision.set | other.proximity_precision.set, + value: self.proximity_precision.value.or(other.proximity_precision.value), }, typo_tolerance: TypoToleranceAnalytics { enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), @@ -542,7 +543,7 @@ impl Aggregate for SettingsAnalytics { sources: match (self.embedders.sources, other.embedders.sources) { (None, None) => None, (Some(sources), None) | (None, Some(sources)) => Some(sources), - (Some(this), Some(other)) => Some(this.union(&other).collect()), + (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), }, document_template_used: match ( self.embedders.document_template_used, @@ -598,45 +599,70 @@ impl Aggregate for SettingsAnalytics { #[derive(Serialize, Default)] struct RankingRulesAnalytics { - words_position: Option, - typo_position: Option, - proximity_position: Option, - attribute_position: Option, - sort_position: Option, - exactness_position: Option, - values: Option, + words_position: Option, + typo_position: Option, + proximity_position: Option, + attribute_position: Option, + sort_position: Option, + exactness_position: Option, + values: Option, } impl RankingRulesAnalytics { pub fn new(rr: Option<&Vec>) -> Self { RankingRulesAnalytics { - words_position: rr.as_ref().map(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) - }), - typo_position: rr.as_ref().map(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) - }), - proximity_position: rr.as_ref().map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) + words_position: rr + .as_ref() + .map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Words) + }) }) - }), - attribute_position: rr.as_ref().map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) + .flatten(), + + typo_position: rr + .as_ref() + .map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Typo) + }) }) - }), - sort_position: rr.as_ref().map(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) - }), - exactness_position: rr.as_ref().map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) + .flatten(), + + proximity_position: rr + .as_ref() + .map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) + }) }) - }), + .flatten(), + + attribute_position: rr + .as_ref() + .map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) + }) + }) + .flatten(), + sort_position: rr + .as_ref() + .map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Sort) + }) + }) + .flatten(), + exactness_position: rr + .as_ref() + .map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) + }) + }) + .flatten(), + values: rr.as_ref().map(|rr| { rr.iter() .filter(|s| { @@ -661,7 +687,7 @@ impl RankingRulesAnalytics { #[derive(Serialize, Default)] struct SearchableAttributesAnalytics { total: Option, - with_wildcard: bool, + with_wildcard: Option, } impl SearchableAttributesAnalytics { @@ -681,8 +707,8 @@ impl SearchableAttributesAnalytics { #[derive(Serialize, Default)] struct DisplayedAttributesAnalytics { - total: usize, - with_wildcard: bool, + total: Option, + with_wildcard: Option, } impl DisplayedAttributesAnalytics { @@ -702,8 +728,8 @@ impl DisplayedAttributesAnalytics { #[derive(Serialize, Default)] struct SortableAttributesAnalytics { - total: usize, - has_geo: bool, + total: Option, + has_geo: Option, } impl SortableAttributesAnalytics { @@ -721,15 +747,15 @@ impl SortableAttributesAnalytics { #[derive(Serialize, Default)] struct FilterableAttributesAnalytics { - total: usize, - has_geo: bool, + total: Option, + has_geo: Option, } impl FilterableAttributesAnalytics { pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { Self { - total: setting.as_ref().map(|filter| filter.len()).unwrap_or(0), - has_geo: setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), + total: setting.as_ref().map(|filter| filter.len()), + has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), } } @@ -761,7 +787,7 @@ struct ProximityPrecisionAnalytics { impl ProximityPrecisionAnalytics { pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self { - Self { set: precision.is_some(), value: precision.unwrap_or_default() } + Self { set: precision.is_some(), value: precision.cloned() } } pub fn to_settings(self) -> SettingsAnalytics { @@ -774,8 +800,8 @@ struct TypoToleranceAnalytics { enabled: Option, disable_on_attributes: Option, disable_on_words: Option, - min_word_size_for_one_typo: Option, - min_word_size_for_two_typos: Option, + min_word_size_for_one_typo: Option, + min_word_size_for_two_typos: Option, } impl TypoToleranceAnalytics { @@ -805,9 +831,9 @@ impl TypoToleranceAnalytics { #[derive(Serialize, Default)] struct FacetingAnalytics { - max_values_per_facet: Option, + max_values_per_facet: Option, sort_facet_values_by_star_count: Option, - sort_facet_values_by_total: Option, + sort_facet_values_by_total: Option, } impl FacetingAnalytics { @@ -833,7 +859,7 @@ impl FacetingAnalytics { #[derive(Serialize, Default)] struct PaginationAnalytics { - max_total_hits: Option, + max_total_hits: Option, } impl PaginationAnalytics { @@ -909,18 +935,18 @@ impl EmbeddersAnalytics { { use meilisearch_types::milli::vector::settings::EmbedderSource; match source { - EmbedderSource::OpenAi => sources.insert("openAi"), - EmbedderSource::HuggingFace => sources.insert("huggingFace"), - EmbedderSource::UserProvided => sources.insert("userProvided"), - EmbedderSource::Ollama => sources.insert("ollama"), - EmbedderSource::Rest => sources.insert("rest"), + EmbedderSource::OpenAi => sources.insert("openAi".to_string()), + EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()), + EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), + EmbedderSource::Ollama => sources.insert("ollama".to_string()), + EmbedderSource::Rest => sources.insert("rest".to_string()), }; } }; Self { total: setting.as_ref().map(|s| s.len()), - sources, + sources: Some(sources), document_template_used: setting.as_ref().map(|map| { map.values() .filter_map(|config| config.clone().set()) @@ -953,7 +979,7 @@ struct SearchCutoffMsAnalytics { impl SearchCutoffMsAnalytics { pub fn new(setting: Option<&u64>) -> Self { - Self { search_cutoff_ms: setting } + Self { search_cutoff_ms: setting.copied() } } pub fn to_settings(self) -> SettingsAnalytics { @@ -964,7 +990,7 @@ impl SearchCutoffMsAnalytics { #[derive(Serialize, Default)] #[serde(transparent)] struct LocalesAnalytics { - locales: BTreeSet, + locales: Option>, } impl LocalesAnalytics { @@ -988,7 +1014,7 @@ impl LocalesAnalytics { #[derive(Serialize, Default)] struct DictionaryAnalytics { - total: usize, + total: Option, } impl DictionaryAnalytics { @@ -1003,7 +1029,7 @@ impl DictionaryAnalytics { #[derive(Serialize, Default)] struct SeparatorTokensAnalytics { - total: usize, + total: Option, } impl SeparatorTokensAnalytics { @@ -1018,7 +1044,7 @@ impl SeparatorTokensAnalytics { #[derive(Serialize, Default)] struct NonSeparatorTokensAnalytics { - total: usize, + total: Option, } impl NonSeparatorTokensAnalytics { @@ -1088,7 +1114,7 @@ pub async fn update_all( new_settings.non_separator_tokens.as_ref().set(), ), }, - Some(&req), + &req, ); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index f94a02987..91c435254 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -13,6 +13,7 @@ use serde_json::Value; use tracing::debug; use super::ActionPolicy; +use crate::analytics::segment_analytics::{SimilarGET, SimilarPOST}; use crate::analytics::{Analytics, SimilarAggregator}; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; @@ -34,13 +35,13 @@ pub async fn similar_get( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.0.try_into()?; - let mut aggregate = SimilarAggregator::from_query(&query, &req); + let mut aggregate = SimilarAggregator::::from_query(&query, &req); debug!(parameters = ?query, "Similar get"); @@ -49,7 +50,7 @@ pub async fn similar_get( if let Ok(similar) = &similar { aggregate.succeed(similar); } - analytics.get_similar(aggregate); + analytics.publish(aggregate, &req); let similar = similar?; @@ -62,21 +63,21 @@ pub async fn similar_post( index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.into_inner(); debug!(parameters = ?query, "Similar post"); - let mut aggregate = SimilarAggregator::from_query(&query, &req); + let mut aggregate = SimilarAggregator::::from_query(&query, &req); let similar = similar(index_scheduler, index_uid, query).await; if let Ok(similar) = &similar { aggregate.succeed(similar); } - analytics.post_similar(aggregate); + analytics.publish(aggregate, &req); let similar = similar?; diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 5fcb868c6..994c256d2 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -35,7 +35,7 @@ pub async fn multi_search_with_post( search_queue: Data, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { // Since we don't want to process half of the search requests and then get a permit refused // we're going to get one permit for the whole duration of the multi-search request. @@ -87,7 +87,7 @@ pub async fn multi_search_with_post( multi_aggregate.succeed(); } - analytics.post_multi_search(multi_aggregate); + analytics.publish(multi_aggregate, &req); HttpResponse::Ok().json(search_result??) } None => { @@ -149,7 +149,7 @@ pub async fn multi_search_with_post( if search_results.is_ok() { multi_aggregate.succeed(); } - analytics.post_multi_search(multi_aggregate); + analytics.publish(multi_aggregate, &req); let search_results = search_results.map_err(|(mut err, query_index)| { // Add the query index that failed as context for the error message. diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 84673729f..cacbc41af 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -3,7 +3,6 @@ use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; -use serde_json::json; use tracing::debug; use crate::analytics::Analytics; @@ -17,13 +16,15 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); } +crate::empty_analytics!(SnapshotAnalytics, "Snapshot Created"); + pub async fn create_snapshot( index_scheduler: GuardedData, Data>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { - analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req)); + analytics.publish(SnapshotAnalytics::default(), &req); let task = KindWithContent::SnapshotCreation; let uid = get_task_id(&req, &opt)?; diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 34e904230..42ebd7858 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -8,10 +8,11 @@ use meilisearch_types::error::deserr_codes::InvalidSwapIndexes; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; +use serde::Serialize; use serde_json::json; use super::{get_task_id, is_dry_run, SummarizedTaskView}; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; @@ -29,21 +30,34 @@ pub struct SwapIndexesPayload { indexes: Vec, } +#[derive(Serialize)] +struct IndexSwappedAnalytics { + swap_operation_number: usize, +} + +impl Aggregate for IndexSwappedAnalytics { + fn event_name(&self) -> &'static str { + "Indexes Swapped" + } + + fn aggregate(self, other: Self) -> Self { + Self { swap_operation_number: self.swap_operation_number.max(other.swap_operation_number) } + } + + fn into_event(self) -> impl Serialize { + self + } +} + pub async fn swap_indexes( index_scheduler: GuardedData, Data>, params: AwebJson, DeserrJsonError>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let params = params.into_inner(); - analytics.publish( - "Indexes Swapped".to_string(), - json!({ - "swap_operation_number": params.len(), // Return the max ever encountered - }), - Some(&req), - ); + analytics.publish(IndexSwappedAnalytics { swap_operation_number: params.len() }, &req); let filters = index_scheduler.filters(); let mut swaps = vec![]; diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 3dc6520af..162d19ca1 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -12,18 +12,17 @@ use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList}; use meilisearch_types::task_view::TaskView; use meilisearch_types::tasks::{Kind, KindWithContent, Status}; use serde::Serialize; -use serde_json::json; use time::format_description::well_known::Rfc3339; use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; use super::{get_task_id, is_dry_run, SummarizedTaskView}; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, AggregateMethod, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::Opt; +use crate::{aggregate_methods, Opt}; const DEFAULT_LIMIT: u32 = 20; @@ -158,12 +157,69 @@ impl TaskDeletionOrCancelationQuery { } } +aggregate_methods!( + CancelTasks => "Tasks Canceled", + DeleteTasks => "Tasks Deleted", +); + +#[derive(Serialize)] +struct TaskFilterAnalytics { + filtered_by_uid: bool, + filtered_by_index_uid: bool, + filtered_by_type: bool, + filtered_by_status: bool, + filtered_by_canceled_by: bool, + filtered_by_before_enqueued_at: bool, + filtered_by_after_enqueued_at: bool, + filtered_by_before_started_at: bool, + filtered_by_after_started_at: bool, + filtered_by_before_finished_at: bool, + filtered_by_after_finished_at: bool, + + #[serde(skip)] + marker: std::marker::PhantomData, +} + +impl Aggregate for TaskFilterAnalytics { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(self, other: Self) -> Self { + Self { + filtered_by_uid: self.filtered_by_uid | other.filtered_by_uid, + filtered_by_index_uid: self.filtered_by_index_uid | other.filtered_by_index_uid, + filtered_by_type: self.filtered_by_type | other.filtered_by_type, + filtered_by_status: self.filtered_by_status | other.filtered_by_status, + filtered_by_canceled_by: self.filtered_by_canceled_by | other.filtered_by_canceled_by, + filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at + | other.filtered_by_before_enqueued_at, + filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at + | other.filtered_by_after_enqueued_at, + filtered_by_before_started_at: self.filtered_by_before_started_at + | other.filtered_by_before_started_at, + filtered_by_after_started_at: self.filtered_by_after_started_at + | other.filtered_by_after_started_at, + filtered_by_before_finished_at: self.filtered_by_before_finished_at + | other.filtered_by_before_finished_at, + filtered_by_after_finished_at: self.filtered_by_after_finished_at + | other.filtered_by_after_finished_at, + + marker: std::marker::PhantomData, + } + } + + fn into_event(self) -> impl Serialize { + self + } +} + async fn cancel_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -172,21 +228,22 @@ async fn cancel_tasks( } analytics.publish( - "Tasks Canceled".to_string(), - json!({ - "filtered_by_uid": params.uids.is_some(), - "filtered_by_index_uid": params.index_uids.is_some(), - "filtered_by_type": params.types.is_some(), - "filtered_by_status": params.statuses.is_some(), - "filtered_by_canceled_by": params.canceled_by.is_some(), - "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(), - "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(), - "filtered_by_before_started_at": params.before_started_at.is_some(), - "filtered_by_after_started_at": params.after_started_at.is_some(), - "filtered_by_before_finished_at": params.before_finished_at.is_some(), - "filtered_by_after_finished_at": params.after_finished_at.is_some(), - }), - Some(&req), + TaskFilterAnalytics:: { + filtered_by_uid: params.uids.is_some(), + filtered_by_index_uid: params.index_uids.is_some(), + filtered_by_type: params.types.is_some(), + filtered_by_status: params.statuses.is_some(), + filtered_by_canceled_by: params.canceled_by.is_some(), + filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(), + filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(), + filtered_by_before_started_at: params.before_started_at.is_some(), + filtered_by_after_started_at: params.after_started_at.is_some(), + filtered_by_before_finished_at: params.before_finished_at.is_some(), + filtered_by_after_finished_at: params.after_finished_at.is_some(), + + marker: std::marker::PhantomData, + }, + &req, ); let query = params.into_query(); @@ -214,7 +271,7 @@ async fn delete_tasks( params: AwebQueryParameter, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -223,22 +280,24 @@ async fn delete_tasks( } analytics.publish( - "Tasks Deleted".to_string(), - json!({ - "filtered_by_uid": params.uids.is_some(), - "filtered_by_index_uid": params.index_uids.is_some(), - "filtered_by_type": params.types.is_some(), - "filtered_by_status": params.statuses.is_some(), - "filtered_by_canceled_by": params.canceled_by.is_some(), - "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(), - "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(), - "filtered_by_before_started_at": params.before_started_at.is_some(), - "filtered_by_after_started_at": params.after_started_at.is_some(), - "filtered_by_before_finished_at": params.before_finished_at.is_some(), - "filtered_by_after_finished_at": params.after_finished_at.is_some(), - }), - Some(&req), + TaskFilterAnalytics:: { + filtered_by_uid: params.uids.is_some(), + filtered_by_index_uid: params.index_uids.is_some(), + filtered_by_type: params.types.is_some(), + filtered_by_status: params.statuses.is_some(), + filtered_by_canceled_by: params.canceled_by.is_some(), + filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(), + filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(), + filtered_by_before_started_at: params.before_started_at.is_some(), + filtered_by_after_started_at: params.after_started_at.is_some(), + filtered_by_before_finished_at: params.before_finished_at.is_some(), + filtered_by_after_finished_at: params.after_finished_at.is_some(), + + marker: std::marker::PhantomData, + }, + &req, ); + let query = params.into_query(); let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes( From ea6883189ef73429b748473d436b71ea4a7a5a52 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 16 Oct 2024 21:17:06 +0200 Subject: [PATCH 04/22] finish the analytics in all the routes --- meilisearch/src/analytics/mod.rs | 33 ++-- .../src/analytics/segment_analytics.rs | 153 +++--------------- meilisearch/src/routes/features.rs | 1 - meilisearch/src/routes/indexes/documents.rs | 58 +++---- .../src/routes/indexes/facet_search.rs | 24 +-- meilisearch/src/routes/indexes/mod.rs | 5 +- meilisearch/src/routes/indexes/similar.rs | 4 +- meilisearch/src/routes/multi_search.rs | 2 +- meilisearch/src/routes/swap_indexes.rs | 1 - meilisearch/src/routes/tasks.rs | 2 +- 10 files changed, 84 insertions(+), 199 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index a0ca47d8f..ab6fd9993 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -1,7 +1,5 @@ pub mod segment_analytics; -use std::any::TypeId; -use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -10,7 +8,6 @@ use actix_web::HttpRequest; use meilisearch_types::InstanceUid; use once_cell::sync::Lazy; use platform_dirs::AppDirs; -use segment::message::User; use serde::Serialize; // if the feature analytics is enabled we use the real analytics @@ -83,7 +80,7 @@ pub enum DocumentFetchKind { Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, } -pub trait Aggregate { +pub trait Aggregate: 'static { fn event_name(&self) -> &'static str; fn aggregate(self, other: Self) -> Self @@ -97,7 +94,7 @@ pub trait Aggregate { /// Helper trait to define multiple aggregate with the same content but a different name. /// Commonly used when you must aggregate a search with POST or with GET for example. -pub trait AggregateMethod { +pub trait AggregateMethod: 'static + Default { fn event_name() -> &'static str; } @@ -105,7 +102,8 @@ pub trait AggregateMethod { #[macro_export] macro_rules! aggregate_methods { ($method:ident => $event_name:literal) => { - pub enum $method {} + #[derive(Default)] + pub struct $method {} impl $crate::analytics::AggregateMethod for $method { fn event_name() -> &'static str { @@ -122,35 +120,26 @@ macro_rules! aggregate_methods { } pub struct Analytics { - // TODO: TAMO: remove - inner: Option, - - instance_uid: Option, - user: Option, - events: HashMap>, + segment: Option, } impl Analytics { fn no_analytics() -> Self { - Self { inner: None, events: HashMap::new(), instance_uid: None, user: None } + Self { segment: None } } fn segment_analytics(segment: SegmentAnalytics) -> Self { - Self { - instance_uid: Some(segment.instance_uid), - user: Some(segment.user), - inner: Some(segment), - events: HashMap::new(), - } + Self { segment: Some(segment) } } pub fn instance_uid(&self) -> Option<&InstanceUid> { - self.instance_uid + self.segment.as_ref().map(|segment| segment.instance_uid.as_ref()) } /// The method used to publish most analytics that do not need to be batched every hours - pub fn publish(&self, send: impl Aggregate, request: &HttpRequest) { - let Some(segment) = self.inner else { return }; + pub fn publish(&self, event: impl Aggregate, request: &HttpRequest) { + let Some(ref segment) = self.segment else { return }; let user_agents = extract_user_agents(request); + let _ = segment.sender.try_send(Box::new(event)); } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 0572267e1..601fefa1e 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1,3 +1,4 @@ +use std::any::{Any, TypeId}; use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet}; use std::fs; use std::mem::take; @@ -74,6 +75,7 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec { pub struct SegmentAnalytics { pub instance_uid: InstanceUid, pub user: User, + pub sender: Sender>, } impl SegmentAnalytics { @@ -128,18 +130,7 @@ impl SegmentAnalytics { user: user.clone(), opt: opt.clone(), batcher, - post_search_aggregator: SearchAggregator::default(), - post_multi_search_aggregator: MultiSearchAggregator::default(), - post_facet_search_aggregator: FacetSearchAggregator::default(), - get_search_aggregator: SearchAggregator::default(), - add_documents_aggregator: DocumentsAggregator::default(), - delete_documents_aggregator: DocumentsDeletionAggregator::default(), - update_documents_aggregator: DocumentsAggregator::default(), - edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator::default(), - get_fetch_documents_aggregator: DocumentsFetchAggregator::default(), - post_fetch_documents_aggregator: DocumentsFetchAggregator::default(), - get_similar_aggregator: SimilarAggregator::default(), - post_similar_aggregator: SimilarAggregator::default(), + events: todo!(), }); tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone())); @@ -387,22 +378,11 @@ impl From for Infos { } pub struct Segment { - inbox: Receiver, + inbox: Receiver>, user: User, opt: Opt, batcher: AutoBatcher, - get_search_aggregator: SearchAggregator, - post_search_aggregator: SearchAggregator, - post_multi_search_aggregator: MultiSearchAggregator, - post_facet_search_aggregator: FacetSearchAggregator, - add_documents_aggregator: DocumentsAggregator, - delete_documents_aggregator: DocumentsDeletionAggregator, - update_documents_aggregator: DocumentsAggregator, - edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator, - get_fetch_documents_aggregator: DocumentsFetchAggregator, - post_fetch_documents_aggregator: DocumentsFetchAggregator, - get_similar_aggregator: SimilarAggregator, - post_similar_aggregator: SimilarAggregator, + events: HashMap>, } impl Segment { @@ -455,19 +435,8 @@ impl Segment { }, msg = self.inbox.recv() => { match msg { - Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await), - Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateEditDocumentsByFunction(agreg)) => self.edit_documents_by_function_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg), + // Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await), + Some(_) => todo!(), None => (), } } @@ -507,87 +476,19 @@ impl Segment { .await; } - let Segment { - inbox: _, - opt: _, - batcher: _, - user, - get_search_aggregator, - post_search_aggregator, - post_multi_search_aggregator, - post_facet_search_aggregator, - add_documents_aggregator, - delete_documents_aggregator, - update_documents_aggregator, - edit_documents_by_function_aggregator, - get_fetch_documents_aggregator, - post_fetch_documents_aggregator, - get_similar_aggregator, - post_similar_aggregator, - } = self; + // We empty the list of events + let events = std::mem::take(&mut self.events); - if let Some(get_search) = - take(get_search_aggregator).into_event(user, "Documents Searched GET") - { - let _ = self.batcher.push(get_search).await; - } - if let Some(post_search) = - take(post_search_aggregator).into_event(user, "Documents Searched POST") - { - let _ = self.batcher.push(post_search).await; - } - if let Some(post_multi_search) = take(post_multi_search_aggregator) - .into_event(user, "Documents Searched by Multi-Search POST") - { - let _ = self.batcher.push(post_multi_search).await; - } - if let Some(post_facet_search) = - take(post_facet_search_aggregator).into_event(user, "Facet Searched POST") - { - let _ = self.batcher.push(post_facet_search).await; - } - if let Some(add_documents) = - take(add_documents_aggregator).into_event(user, "Documents Added") - { - let _ = self.batcher.push(add_documents).await; - } - if let Some(delete_documents) = - take(delete_documents_aggregator).into_event(user, "Documents Deleted") - { - let _ = self.batcher.push(delete_documents).await; - } - if let Some(update_documents) = - take(update_documents_aggregator).into_event(user, "Documents Updated") - { - let _ = self.batcher.push(update_documents).await; - } - if let Some(edit_documents_by_function) = take(edit_documents_by_function_aggregator) - .into_event(user, "Documents Edited By Function") - { - let _ = self.batcher.push(edit_documents_by_function).await; - } - if let Some(get_fetch_documents) = - take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET") - { - let _ = self.batcher.push(get_fetch_documents).await; - } - if let Some(post_fetch_documents) = - take(post_fetch_documents_aggregator).into_event(user, "Documents Fetched POST") - { - let _ = self.batcher.push(post_fetch_documents).await; + for (_, mut event) in events { + self.batcher.push(Track { + user: self.user, + event: event.event_name().to_string(), + properties: event.into_event(), + timestamp: todo!(), + ..Default::default() + }); } - if let Some(get_similar_documents) = - take(get_similar_aggregator).into_event(user, "Similar GET") - { - let _ = self.batcher.push(get_similar_documents).await; - } - - if let Some(post_similar_documents) = - take(post_similar_aggregator).into_event(user, "Similar POST") - { - let _ = self.batcher.push(post_similar_documents).await; - } let _ = self.batcher.flush().await; } } @@ -702,10 +603,8 @@ impl SearchAggregator { } = query; let mut ret = Self::default(); - ret.timestamp = Some(OffsetDateTime::now_utc()); ret.total_received = 1; - ret.user_agents = extract_user_agents(request).into_iter().collect(); if let Some(ref sort) = sort { ret.sort_total_number_of_criteria = 1; @@ -949,7 +848,7 @@ impl Aggregate for SearchAggregator { self } - fn into_event(self) -> Option { + fn into_event(self) -> impl Serialize { let Self { total_received, total_succeeded, @@ -1087,10 +986,7 @@ pub struct MultiSearchAggregator { } impl MultiSearchAggregator { - pub fn from_federated_search( - federated_search: &FederatedSearch, - request: &HttpRequest, - ) -> Self { + pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { let use_federation = federated_search.federation.is_some(); let distinct_indexes: HashSet<_> = federated_search @@ -1162,7 +1058,7 @@ impl Aggregate for MultiSearchAggregator { } /// Aggregate one [MultiSearchAggregator] into another. - fn aggregate(mut self, other: Self) -> Self { + fn aggregate(self, other: Self) -> Self { // write the aggregate in a way that will cause a compilation error if a field is added. // get ownership of self, replacing it by a default value. @@ -1177,13 +1073,8 @@ impl Aggregate for MultiSearchAggregator { let show_ranking_score = this.show_ranking_score || other.show_ranking_score; let show_ranking_score_details = this.show_ranking_score_details || other.show_ranking_score_details; - let mut user_agents = this.user_agents; let use_federation = this.use_federation || other.use_federation; - for user_agent in other.user_agents.into_iter() { - user_agents.insert(user_agent); - } - Self { total_received, total_succeeded, @@ -1748,7 +1639,7 @@ pub struct SimilarAggregator { impl SimilarAggregator { #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { + pub fn from_query(query: &SimilarQuery) -> Self { let SimilarQuery { id: _, embedder: _, @@ -1763,10 +1654,8 @@ impl SimilarAggregator { } = query; let mut ret = Self::default(); - ret.timestamp = Some(OffsetDateTime::now_utc()); ret.total_received = 1; - ret.user_agents = extract_user_agents(request).into_iter().collect(); if let Some(ref filter) = filter { static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 4ee5b37b0..0b43c3f13 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -7,7 +7,6 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; -use serde_json::json; use tracing::debug; use crate::analytics::{Aggregate, Analytics}; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 6dece61e6..1573b768b 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -32,7 +32,7 @@ use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tracing::debug; -use crate::analytics::{Aggregate, AggregateMethod, Analytics, DocumentDeletionKind}; +use crate::analytics::{Aggregate, AggregateMethod, Analytics}; use crate::error::MeilisearchHttpError; use crate::error::PayloadError::ReceivePayload; use crate::extractors::authentication::policies::*; @@ -102,8 +102,13 @@ pub struct GetDocument { retrieve_vectors: Param, } +aggregate_methods!( + DocumentsGET => "Documents Fetched GET", + DocumentsPOST => "Documents Fetched POST", +); + #[derive(Default, Serialize)] -pub struct DocumentsFetchAggregator { +pub struct DocumentsFetchAggregator { #[serde(rename = "requests.total_received")] total_received: usize, @@ -120,6 +125,8 @@ pub struct DocumentsFetchAggregator { max_limit: usize, #[serde(rename = "pagination.max_offset")] max_offset: usize, + + marker: std::marker::PhantomData, } #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -128,7 +135,7 @@ pub enum DocumentFetchKind { Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, } -impl DocumentsFetchAggregator { +impl DocumentsFetchAggregator { pub fn from_query(query: &DocumentFetchKind) -> Self { let (limit, offset, retrieve_vectors) = match query { DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), @@ -136,6 +143,7 @@ impl DocumentsFetchAggregator { (*limit, *offset, *retrieve_vectors) } }; + Self { total_received: 1, per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), @@ -143,20 +151,18 @@ impl DocumentsFetchAggregator { max_limit: limit, max_offset: offset, retrieve_vectors, + + marker: PhantomData, } } } -impl Aggregate for DocumentsFetchAggregator { - // TODO: TAMO: Should we do the same event for the GET requests +impl Aggregate for DocumentsFetchAggregator { fn event_name(&self) -> &'static str { - "Documents Fetched POST" + Method::event_name() } - fn aggregate(self, other: Self) -> Self - where - Self: Sized, - { + fn aggregate(self, other: Self) -> Self { Self { total_received: self.total_received.saturating_add(other.total_received), per_document_id: self.per_document_id | other.per_document_id, @@ -164,11 +170,12 @@ impl Aggregate for DocumentsFetchAggregator { retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors, max_limit: self.max_limit.max(other.max_limit), max_offset: self.max_offset.max(other.max_offset), + marker: PhantomData, } } - fn into_event(self) -> Value { - serde_json::to_value(self).unwrap() + fn into_event(self) -> impl Serialize { + self } } @@ -190,7 +197,7 @@ pub async fn get_document( let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?; analytics.publish( - DocumentsFetchAggregator { + DocumentsFetchAggregator:: { retrieve_vectors: param_retrieve_vectors.0, ..Default::default() }, @@ -232,8 +239,8 @@ impl Aggregate for DocumentsDeletionAggregator { } } - fn into_event(self) -> Value { - serde_json::to_value(self).unwrap() + fn into_event(self) -> impl Serialize { + self } } @@ -311,7 +318,7 @@ pub async fn documents_by_query_post( debug!(parameters = ?body, "Get documents POST"); analytics.publish( - DocumentsFetchAggregator { + DocumentsFetchAggregator:: { total_received: 1, per_filter: body.filter.is_some(), retrieve_vectors: body.retrieve_vectors, @@ -353,7 +360,7 @@ pub async fn get_documents( }; analytics.publish( - DocumentsFetchAggregator { + DocumentsFetchAggregator:: { total_received: 1, per_filter: query.filter.is_some(), retrieve_vectors: query.retrieve_vectors, @@ -436,20 +443,17 @@ impl Aggregate for DocumentsAggregator { Method::event_name() } - fn aggregate(mut self, other: Self) -> Self - where - Self: Sized, - { + fn aggregate(self, other: Self) -> Self { Self { - payload_types: self.payload_types.union(&other.payload_types).collect(), - primary_key: self.primary_key.union(&other.primary_key).collect(), + payload_types: self.payload_types.union(&other.payload_types).cloned().collect(), + primary_key: self.primary_key.union(&other.primary_key).cloned().collect(), index_creation: self.index_creation | other.index_creation, method: PhantomData, } } - fn into_event(self) -> Value { - serde_json::to_value(self).unwrap() + fn into_event(self) -> impl Serialize { + self } } @@ -818,8 +822,8 @@ impl Aggregate for EditDocumentsByFunctionAggregator { } } - fn into_event(self) -> Value { - serde_json::to_value(self).unwrap() + fn into_event(self) -> impl Serialize { + self } } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index f3c74a388..08618970d 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -9,6 +9,7 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::locales::Locale; +use serde::Serialize; use serde_json::Value; use tracing::debug; @@ -72,7 +73,7 @@ pub struct FacetSearchAggregator { impl FacetSearchAggregator { #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { + pub fn from_query(query: &FacetSearchQuery) -> Self { let FacetSearchQuery { facet_query: _, facet_name, @@ -113,23 +114,22 @@ impl Aggregate for FacetSearchAggregator { "Facet Searched POST" } - fn aggregate(mut self, other: Self) -> Self - where - Self: Sized, - { - self.time_spent.insert(other.time_spent); + fn aggregate(mut self, other: Self) -> Self { + for time in other.time_spent { + self.time_spent.push(time); + } Self { total_received: self.total_received.saturating_add(other.total_received), total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded), time_spent: self.time_spent, - facet_names: self.facet_names.union(&other.facet_names).collect(), + facet_names: self.facet_names.union(&other.facet_names).cloned().collect(), additional_search_parameters_provided: self.additional_search_parameters_provided | other.additional_search_parameters_provided, } } - fn into_event(self) -> Value { + fn into_event(self) -> impl Serialize { let Self { total_received, total_succeeded, @@ -137,6 +137,12 @@ impl Aggregate for FacetSearchAggregator { facet_names, additional_search_parameters_provided, } = self; + // the index of the 99th percentage of value + let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.; + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th as usize); serde_json::json!({ "requests": { @@ -166,7 +172,7 @@ pub async fn search( let query = params.into_inner(); debug!(parameters = ?query, "Facet search"); - let mut aggregate = FacetSearchAggregator::from_query(&query, &req); + let mut aggregate = FacetSearchAggregator::from_query(&query); let facet_query = query.facet_query.clone(); let facet_name = query.facet_name.clone(); diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index f926f663c..3c41f36fe 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -14,7 +14,6 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::{self, FieldDistribution, Index}; use meilisearch_types::tasks::KindWithContent; use serde::Serialize; -use serde_json::json; use time::OffsetDateTime; use tracing::debug; @@ -138,7 +137,7 @@ impl Aggregate for IndexCreatedAggregate { where Self: Sized, { - Self { primary_key: self.primary_key.union(&other.primary_key).collect() } + Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() } } fn into_event(self) -> impl Serialize { @@ -227,7 +226,7 @@ impl Aggregate for IndexUpdatedAggregate { } fn aggregate(self, other: Self) -> Self { - Self { primary_key: self.primary_key.union(&other.primary_key).collect() } + Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() } } fn into_event(self) -> impl Serialize { diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 91c435254..33df6bdad 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -41,7 +41,7 @@ pub async fn similar_get( let query = params.0.try_into()?; - let mut aggregate = SimilarAggregator::::from_query(&query, &req); + let mut aggregate = SimilarAggregator::::from_query(&query); debug!(parameters = ?query, "Similar get"); @@ -70,7 +70,7 @@ pub async fn similar_post( let query = params.into_inner(); debug!(parameters = ?query, "Similar post"); - let mut aggregate = SimilarAggregator::::from_query(&query, &req); + let mut aggregate = SimilarAggregator::::from_query(&query); let similar = similar(index_scheduler, index_uid, query).await; diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 994c256d2..13a39cb44 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -43,7 +43,7 @@ pub async fn multi_search_with_post( let federated_search = params.into_inner(); - let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req); + let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search); let FederatedSearch { mut queries, federation } = federated_search; diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 42ebd7858..abdffbb73 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -9,7 +9,6 @@ use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; use serde::Serialize; -use serde_json::json; use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::{Aggregate, Analytics}; diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 162d19ca1..f04e2ead2 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -180,7 +180,7 @@ struct TaskFilterAnalytics { marker: std::marker::PhantomData, } -impl Aggregate for TaskFilterAnalytics { +impl Aggregate for TaskFilterAnalytics { fn event_name(&self) -> &'static str { Method::event_name() } From 6728cfbfac2a1b3e56b7bb7f13687dc610b48ca3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 00:38:18 +0200 Subject: [PATCH 05/22] fix the analytics --- Cargo.lock | 7 ++ meilisearch/Cargo.toml | 1 + meilisearch/src/analytics/mod.rs | 34 ++++++--- .../src/analytics/segment_analytics.rs | 76 ++++++++++++------- 4 files changed, 81 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c85a59952..733470384 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3415,6 +3415,7 @@ dependencies = [ "meilisearch-types", "mimalloc", "mime", + "mopa", "num_cpus", "obkv", "once_cell", @@ -3681,6 +3682,12 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "mopa" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a785740271256c230f57462d3b83e52f998433a7062fc18f96d5999474a9f915" + [[package]] name = "mutually_exclusive_features" version = "0.0.3" diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 6c2fb4060..322b333ac 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -104,6 +104,7 @@ tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-actix-web = "0.7.11" build-info = { version = "1.7.0", path = "../build-info" } roaring = "0.10.2" +mopa = "0.2.2" [dev-dependencies] actix-rt = "2.10.0" diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index ab6fd9993..8a0a68bad 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -6,9 +6,9 @@ use std::str::FromStr; use actix_web::HttpRequest; use meilisearch_types::InstanceUid; +use mopa::mopafy; use once_cell::sync::Lazy; use platform_dirs::AppDirs; -use serde::Serialize; // if the feature analytics is enabled we use the real analytics pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; @@ -31,11 +31,11 @@ macro_rules! empty_analytics { $event_name } - fn aggregate(self, _other: Self) -> Self { + fn aggregate(self: Box, _other: Box) -> Box { self } - fn into_event(self) -> impl serde::Serialize { + fn into_event(self: Box) -> serde_json::Value { serde_json::json!({}) } } @@ -80,18 +80,34 @@ pub enum DocumentFetchKind { Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, } -pub trait Aggregate: 'static { +pub trait Aggregate: 'static + mopa::Any + Send { fn event_name(&self) -> &'static str; - fn aggregate(self, other: Self) -> Self + fn aggregate(self: Box, other: Box) -> Box where Self: Sized; - fn into_event(self) -> impl Serialize + fn downcast_aggregate( + this: Box, + other: Box, + ) -> Option> where - Self: Sized; + Self: Sized, + { + if this.is::() && other.is::() { + let this = this.downcast::().ok()?; + let other = other.downcast::().ok()?; + Some(Self::aggregate(this, other)) + } else { + None + } + } + + fn into_event(self: Box) -> serde_json::Value; } +mopafy!(Aggregate); + /// Helper trait to define multiple aggregate with the same content but a different name. /// Commonly used when you must aggregate a search with POST or with GET for example. pub trait AggregateMethod: 'static + Default { @@ -137,9 +153,9 @@ impl Analytics { } /// The method used to publish most analytics that do not need to be batched every hours - pub fn publish(&self, event: impl Aggregate, request: &HttpRequest) { + pub fn publish(&self, event: T, request: &HttpRequest) { let Some(ref segment) = self.segment else { return }; let user_agents = extract_user_agents(request); - let _ = segment.sender.try_send(Box::new(event)); + let _ = segment.sender.try_send(segment_analytics::Message::new(event)); } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 601fefa1e..1a1bb9226 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1,7 +1,6 @@ -use std::any::{Any, TypeId}; +use std::any::TypeId; use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet}; use std::fs; -use std::mem::take; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -72,10 +71,26 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec { .collect() } +pub struct Message { + type_id: TypeId, + event: Box, + aggregator_function: fn(Box, Box) -> Option>, +} + +impl Message { + pub fn new(event: T) -> Self { + Self { + type_id: TypeId::of::(), + event: Box::new(event), + aggregator_function: T::downcast_aggregate, + } + } +} + pub struct SegmentAnalytics { pub instance_uid: InstanceUid, pub user: User, - pub sender: Sender>, + pub sender: Sender, } impl SegmentAnalytics { @@ -378,7 +393,7 @@ impl From for Infos { } pub struct Segment { - inbox: Receiver>, + inbox: Receiver, user: User, opt: Opt, batcher: AutoBatcher, @@ -435,8 +450,13 @@ impl Segment { }, msg = self.inbox.recv() => { match msg { - // Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await), - Some(_) => todo!(), + Some(Message { type_id, event, aggregator_function }) => { + let new_event = match self.events.remove(&type_id) { + Some(old) => (aggregator_function)(old, event).unwrap(), + None => event, + }; + self.events.insert(type_id, new_event); + }, None => (), } } @@ -479,9 +499,9 @@ impl Segment { // We empty the list of events let events = std::mem::take(&mut self.events); - for (_, mut event) in events { + for (_, event) in events { self.batcher.push(Track { - user: self.user, + user: self.user.clone(), event: event.event_name().to_string(), properties: event.into_event(), timestamp: todo!(), @@ -722,11 +742,11 @@ impl Aggregate for SearchAggregator { Method::event_name() } - fn aggregate(mut self, mut other: Self) -> Self { + fn aggregate(mut self: Box, other: Box) -> Box { let Self { total_received, total_succeeded, - ref mut time_spent, + mut time_spent, sort_with_geo_point, sort_sum_of_criteria_terms, sort_total_number_of_criteria, @@ -761,9 +781,9 @@ impl Aggregate for SearchAggregator { total_degraded, total_used_negative_operator, ranking_score_threshold, - ref mut locales, + mut locales, marker: _, - } = other; + } = *other; // request self.total_received = self.total_received.saturating_add(total_received); @@ -771,7 +791,7 @@ impl Aggregate for SearchAggregator { self.total_degraded = self.total_degraded.saturating_add(total_degraded); self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(total_used_negative_operator); - self.time_spent.append(time_spent); + self.time_spent.append(&mut time_spent); // sort self.sort_with_geo_point |= sort_with_geo_point; @@ -843,12 +863,12 @@ impl Aggregate for SearchAggregator { self.ranking_score_threshold |= ranking_score_threshold; // locales - self.locales.append(locales); + self.locales.append(&mut locales); self } - fn into_event(self) -> impl Serialize { + fn into_event(self: Box) -> serde_json::Value { let Self { total_received, total_succeeded, @@ -889,7 +909,7 @@ impl Aggregate for SearchAggregator { ranking_score_threshold, locales, marker: _, - } = self; + } = *self; // we get all the values in a sorted manner let time_spent = time_spent.into_sorted_vec(); @@ -1058,11 +1078,11 @@ impl Aggregate for MultiSearchAggregator { } /// Aggregate one [MultiSearchAggregator] into another. - fn aggregate(self, other: Self) -> Self { + fn aggregate(self: Box, other: Box) -> Box { // write the aggregate in a way that will cause a compilation error if a field is added. // get ownership of self, replacing it by a default value. - let this = self; + let this = *self; let total_received = this.total_received.saturating_add(other.total_received); let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded); @@ -1075,7 +1095,7 @@ impl Aggregate for MultiSearchAggregator { this.show_ranking_score_details || other.show_ranking_score_details; let use_federation = this.use_federation || other.use_federation; - Self { + Box::new(Self { total_received, total_succeeded, total_distinct_index_count, @@ -1084,10 +1104,10 @@ impl Aggregate for MultiSearchAggregator { show_ranking_score, show_ranking_score_details, use_federation, - } + }) } - fn into_event(self) -> impl Serialize { + fn into_event(self: Box) -> serde_json::Value { let Self { total_received, total_succeeded, @@ -1097,7 +1117,7 @@ impl Aggregate for MultiSearchAggregator { show_ranking_score, show_ranking_score_details, use_federation, - } = self; + } = *self; json!({ "requests": { @@ -1708,11 +1728,11 @@ impl Aggregate for SimilarAggregator { } /// Aggregate one [SimilarAggregator] into another. - fn aggregate(mut self, mut other: Self) -> Self { + fn aggregate(mut self: Box, other: Box) -> Box { let Self { total_received, total_succeeded, - ref mut time_spent, + mut time_spent, filter_with_geo_radius, filter_with_geo_bounding_box, filter_sum_of_criteria_terms, @@ -1726,12 +1746,12 @@ impl Aggregate for SimilarAggregator { ranking_score_threshold, retrieve_vectors, marker: _, - } = other; + } = *other; // request self.total_received = self.total_received.saturating_add(total_received); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); - self.time_spent.append(time_spent); + self.time_spent.append(&mut time_spent); // filter self.filter_with_geo_radius |= filter_with_geo_radius; @@ -1763,7 +1783,7 @@ impl Aggregate for SimilarAggregator { self } - fn into_event(self) -> impl Serialize { + fn into_event(self: Box) -> serde_json::Value { let Self { total_received, total_succeeded, @@ -1781,7 +1801,7 @@ impl Aggregate for SimilarAggregator { ranking_score_threshold, retrieve_vectors, marker: _, - } = self; + } = *self; // we get all the values in a sorted manner let time_spent = time_spent.into_sorted_vec(); From aa7a34ffe8b9572c44b4bd36c30f7cf3805a9ed7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 00:43:34 +0200 Subject: [PATCH 06/22] make the aggregate method send --- meilisearch/src/analytics/mod.rs | 2 +- meilisearch/src/analytics/segment_analytics.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 8a0a68bad..f8a589901 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -110,7 +110,7 @@ mopafy!(Aggregate); /// Helper trait to define multiple aggregate with the same content but a different name. /// Commonly used when you must aggregate a search with POST or with GET for example. -pub trait AggregateMethod: 'static + Default { +pub trait AggregateMethod: 'static + Default + Send { fn event_name() -> &'static str; } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 1a1bb9226..92f03e48e 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -72,9 +72,12 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec { } pub struct Message { + // Since the type_id is solved statically we cannot retrieve it from the Box. + // Thus we have to send it in the message directly. type_id: TypeId, - event: Box, + // Same for the aggregate function. aggregator_function: fn(Box, Box) -> Option>, + event: Box, } impl Message { From e4ace98004fff86e35fe8dd4a2cdccfa8b03ce9f Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 01:04:25 +0200 Subject: [PATCH 07/22] fix all the routes + move to a better version of mopa --- Cargo.lock | 8 ++-- meilisearch/Cargo.toml | 2 +- meilisearch/src/analytics/mod.rs | 2 + meilisearch/src/routes/features.rs | 13 ++---- meilisearch/src/routes/indexes/documents.rs | 46 ++++++++----------- .../src/routes/indexes/facet_search.rs | 10 ++-- meilisearch/src/routes/indexes/mod.rs | 23 +++++----- meilisearch/src/routes/indexes/settings.rs | 16 ++----- meilisearch/src/routes/swap_indexes.rs | 10 ++-- meilisearch/src/routes/tasks.rs | 10 ++-- 10 files changed, 65 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 733470384..500f28454 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3415,7 +3415,7 @@ dependencies = [ "meilisearch-types", "mimalloc", "mime", - "mopa", + "mopa-maintained", "num_cpus", "obkv", "once_cell", @@ -3683,10 +3683,10 @@ dependencies = [ ] [[package]] -name = "mopa" -version = "0.2.2" +name = "mopa-maintained" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a785740271256c230f57462d3b83e52f998433a7062fc18f96d5999474a9f915" +checksum = "79b7f3e22167862cc7c95b21a6f326c22e4bf40da59cbf000b368a310173ba11" [[package]] name = "mutually_exclusive_features" diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 322b333ac..07357e724 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -104,7 +104,7 @@ tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-actix-web = "0.7.11" build-info = { version = "1.7.0", path = "../build-info" } roaring = "0.10.2" -mopa = "0.2.2" +mopa-maintained = "0.2.3" [dev-dependencies] actix-rt = "2.10.0" diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index f8a589901..b3e8109a3 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -1,3 +1,5 @@ +#![allow(clippy::transmute_ptr_to_ref)] // mopify isn't updated with the latest version of clippy yet + pub mod segment_analytics; use std::fs; diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 0b43c3f13..1de00717d 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -69,21 +69,18 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { "Experimental features Updated" } - fn aggregate(self, other: Self) -> Self - where - Self: Sized, - { - Self { + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { vector_store: other.vector_store, metrics: other.metrics, logs_route: other.logs_route, edit_documents_by_function: other.edit_documents_by_function, contains_filter: other.contains_filter, - } + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 1573b768b..854fa5b69 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -162,8 +162,8 @@ impl Aggregate for DocumentsFetchAggregator { Method::event_name() } - fn aggregate(self, other: Self) -> Self { - Self { + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { total_received: self.total_received.saturating_add(other.total_received), per_document_id: self.per_document_id | other.per_document_id, per_filter: self.per_filter | other.per_filter, @@ -171,11 +171,11 @@ impl Aggregate for DocumentsFetchAggregator { max_limit: self.max_limit.max(other.max_limit), max_offset: self.max_offset.max(other.max_offset), marker: PhantomData, - } + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } @@ -226,21 +226,18 @@ impl Aggregate for DocumentsDeletionAggregator { "Documents Deleted" } - fn aggregate(self, other: Self) -> Self - where - Self: Sized, - { - Self { + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { total_received: self.total_received.saturating_add(other.total_received), per_document_id: self.per_document_id | other.per_document_id, clear_all: self.clear_all | other.clear_all, per_batch: self.per_batch | other.per_batch, per_filter: self.per_filter | other.per_filter, - } + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } @@ -443,17 +440,17 @@ impl Aggregate for DocumentsAggregator { Method::event_name() } - fn aggregate(self, other: Self) -> Self { - Self { + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { payload_types: self.payload_types.union(&other.payload_types).cloned().collect(), primary_key: self.primary_key.union(&other.primary_key).cloned().collect(), index_creation: self.index_creation | other.index_creation, method: PhantomData, - } + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(self).unwrap_or_default() } } @@ -811,19 +808,16 @@ impl Aggregate for EditDocumentsByFunctionAggregator { "Documents Edited By Function" } - fn aggregate(self, other: Self) -> Self - where - Self: Sized, - { - Self { + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { filtered: self.filtered | other.filtered, with_context: self.with_context | other.with_context, index_creation: self.index_creation | other.index_creation, - } + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 08618970d..715eaaaa7 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -114,29 +114,29 @@ impl Aggregate for FacetSearchAggregator { "Facet Searched POST" } - fn aggregate(mut self, other: Self) -> Self { + fn aggregate(mut self: Box, other: Box) -> Box { for time in other.time_spent { self.time_spent.push(time); } - Self { + Box::new(Self { total_received: self.total_received.saturating_add(other.total_received), total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded), time_spent: self.time_spent, facet_names: self.facet_names.union(&other.facet_names).cloned().collect(), additional_search_parameters_provided: self.additional_search_parameters_provided | other.additional_search_parameters_provided, - } + }) } - fn into_event(self) -> impl Serialize { + fn into_event(self: Box) -> serde_json::Value { let Self { total_received, total_succeeded, time_spent, facet_names, additional_search_parameters_provided, - } = self; + } = *self; // the index of the 99th percentage of value let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.; // we get all the values in a sorted manner diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 3c41f36fe..8972119d7 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -133,15 +133,14 @@ impl Aggregate for IndexCreatedAggregate { "Index Created" } - fn aggregate(self, other: Self) -> Self - where - Self: Sized, - { - Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() } + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { + primary_key: self.primary_key.union(&other.primary_key).cloned().collect(), + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } @@ -225,12 +224,14 @@ impl Aggregate for IndexUpdatedAggregate { "Index Updated" } - fn aggregate(self, other: Self) -> Self { - Self { primary_key: self.primary_key.union(&other.primary_key).cloned().collect() } + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { + primary_key: self.primary_key.union(&other.primary_key).cloned().collect(), + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } pub async fn update_index( diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index bb2f6792d..f31f52dc1 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -437,11 +437,8 @@ impl Aggregate for SettingsAnalytics { "Settings Updated" } - fn aggregate(self, other: Self) -> Self - where - Self: Sized, - { - Self { + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { ranking_rules: RankingRulesAnalytics { words_position: self .ranking_rules @@ -586,14 +583,11 @@ impl Aggregate for SettingsAnalytics { non_separator_tokens: NonSeparatorTokensAnalytics { total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), }, - } + }) } - fn into_event(self) -> impl Serialize - where - Self: Sized, - { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index abdffbb73..f7d8f4eff 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -39,12 +39,14 @@ impl Aggregate for IndexSwappedAnalytics { "Indexes Swapped" } - fn aggregate(self, other: Self) -> Self { - Self { swap_operation_number: self.swap_operation_number.max(other.swap_operation_number) } + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { + swap_operation_number: self.swap_operation_number.max(other.swap_operation_number), + }) } - fn into_event(self) -> impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index f04e2ead2..ff4aee998 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -185,8 +185,8 @@ impl Aggregate for TaskFilterAnalytics Self { - Self { + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { filtered_by_uid: self.filtered_by_uid | other.filtered_by_uid, filtered_by_index_uid: self.filtered_by_index_uid | other.filtered_by_index_uid, filtered_by_type: self.filtered_by_type | other.filtered_by_type, @@ -206,11 +206,11 @@ impl Aggregate for TaskFilterAnalytics impl Serialize { - self + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() } } From 7382fb21e41719a6be6dbf5f25b6c47ad7afc581 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 08:38:11 +0200 Subject: [PATCH 08/22] fix the main --- meilisearch/src/analytics/mod.rs | 24 +++++++++++++------ .../src/analytics/segment_analytics.rs | 10 ++++---- meilisearch/src/lib.rs | 6 ++--- meilisearch/src/main.rs | 22 +++++------------ meilisearch/src/routes/indexes/search.rs | 4 ++-- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index b3e8109a3..91139e1dd 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -5,8 +5,11 @@ pub mod segment_analytics; use std::fs; use std::path::{Path, PathBuf}; use std::str::FromStr; +use std::sync::Arc; use actix_web::HttpRequest; +use index_scheduler::IndexScheduler; +use meilisearch_auth::AuthController; use meilisearch_types::InstanceUid; use mopa::mopafy; use once_cell::sync::Lazy; @@ -17,6 +20,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; pub use segment_analytics::SearchAggregator; pub use segment_analytics::SimilarAggregator; +use crate::Opt; + use self::segment_analytics::extract_user_agents; pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; @@ -137,17 +142,22 @@ macro_rules! aggregate_methods { }; } +#[derive(Clone)] pub struct Analytics { - segment: Option, + segment: Option>, } impl Analytics { - fn no_analytics() -> Self { - Self { segment: None } - } - - fn segment_analytics(segment: SegmentAnalytics) -> Self { - Self { segment: Some(segment) } + pub async fn new( + opt: &Opt, + index_scheduler: Arc, + auth_controller: Arc, + ) -> Self { + if opt.no_analytics { + Self { segment: None } + } else { + Self { segment: SegmentAnalytics::new(opt, index_scheduler, auth_controller).await } + } } pub fn instance_uid(&self) -> Option<&InstanceUid> { diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 92f03e48e..3496853ff 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -102,7 +102,7 @@ impl SegmentAnalytics { opt: &Opt, index_scheduler: Arc, auth_controller: Arc, - ) -> Arc { + ) -> Option> { let instance_uid = super::find_user_id(&opt.db_path); let first_time_run = instance_uid.is_none(); let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4); @@ -112,7 +112,7 @@ impl SegmentAnalytics { // if reqwest throws an error we won't be able to send analytics if client.is_err() { - return Arc::new(Analytics::no_analytics()); + return None; } let client = @@ -148,13 +148,13 @@ impl SegmentAnalytics { user: user.clone(), opt: opt.clone(), batcher, - events: todo!(), + events: HashMap::new(), }); tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone())); let this = Self { instance_uid, sender, user: user.clone() }; - Arc::new(Analytics::segment_analytics(this)) + Some(Arc::new(this)) } } @@ -595,7 +595,7 @@ pub struct SearchAggregator { impl SearchAggregator { #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self { + pub fn from_query(query: &SearchQuery) -> Self { let SearchQuery { q, vector, diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 80177876a..633ad2776 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -120,7 +120,7 @@ pub fn create_app( search_queue: Data, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Data, enable_dashboard: bool, ) -> actix_web::App< impl ServiceFactory< @@ -473,14 +473,14 @@ pub fn configure_data( search_queue: Data, opt: &Opt, (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Data, ) { let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; config .app_data(index_scheduler) .app_data(auth) .app_data(search_queue) - .app_data(web::Data::from(analytics)) + .app_data(analytics) .app_data(web::Data::new(logs_route)) .app_data(web::Data::new(logs_stderr)) .app_data(web::Data::new(opt.clone())) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index de9784d15..eebea3b6d 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -124,19 +124,12 @@ async fn try_main() -> anyhow::Result<()> { let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?; - #[cfg(all(not(debug_assertions), feature = "analytics"))] - let analytics = if !opt.no_analytics { - analytics::SegmentAnalytics::new(&opt, index_scheduler.clone(), auth_controller.clone()) - .await - } else { - analytics::MockAnalytics::new(&opt) - }; - #[cfg(any(debug_assertions, not(feature = "analytics")))] - let analytics = analytics::MockAnalytics::new(&opt); + let analytics = + analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await; print_launch_resume(&opt, analytics.clone(), config_read_from); - run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?; + run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?; Ok(()) } @@ -146,12 +139,13 @@ async fn run_http( auth_controller: Arc, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Arc, ) -> anyhow::Result<()> { let enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); let index_scheduler = Data::from(index_scheduler); let auth_controller = Data::from(auth_controller); + let analytics = Data::from(analytics); let search_queue = SearchQueue::new( opt.experimental_search_queue_size, available_parallelism() @@ -187,11 +181,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume( - opt: &Opt, - analytics: Arc, - config_read_from: Option, -) { +pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Option) { let build_info = build_info::BuildInfo::from_build(); let protocol = diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 538c46fd0..ac6e23c8f 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -238,7 +238,7 @@ pub async fn search_with_url_query( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query); let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); @@ -281,7 +281,7 @@ pub async fn search_with_post( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query); let index = index_scheduler.index(&index_uid)?; From ef77c7699b21422b4857878d072494e1bfc49d6b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 09:06:23 +0200 Subject: [PATCH 09/22] add the required shared values between all the events and fix the timestamp --- meilisearch/src/analytics/mod.rs | 6 +- .../src/analytics/segment_analytics.rs | 75 +++++++++++++------ 2 files changed, 57 insertions(+), 24 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 91139e1dd..a3b8d6d1d 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -166,8 +166,8 @@ impl Analytics { /// The method used to publish most analytics that do not need to be batched every hours pub fn publish(&self, event: T, request: &HttpRequest) { - let Some(ref segment) = self.segment else { return }; - let user_agents = extract_user_agents(request); - let _ = segment.sender.try_send(segment_analytics::Message::new(event)); + if let Some(ref segment) = self.segment { + let _ = segment.sender.try_send(segment_analytics::Message::new(event, request)); + } } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 3496853ff..00a3adaaf 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -28,7 +28,6 @@ use super::{ config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, }; -use crate::analytics::Analytics; use crate::option::{ default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot, }; @@ -58,7 +57,7 @@ fn write_user_id(db_path: &Path, user_id: &InstanceUid) { const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb"; -pub fn extract_user_agents(request: &HttpRequest) -> Vec { +pub fn extract_user_agents(request: &HttpRequest) -> HashSet { request .headers() .get(ANALYTICS_HEADER) @@ -77,14 +76,26 @@ pub struct Message { type_id: TypeId, // Same for the aggregate function. aggregator_function: fn(Box, Box) -> Option>, - event: Box, + event: Event, +} + +pub struct Event { + original: Box, + timestamp: OffsetDateTime, + user_agents: HashSet, + total: usize, } impl Message { - pub fn new(event: T) -> Self { + pub fn new(event: T, request: &HttpRequest) -> Self { Self { type_id: TypeId::of::(), - event: Box::new(event), + event: Event { + original: Box::new(event), + timestamp: OffsetDateTime::now_utc(), + user_agents: extract_user_agents(request), + total: 1, + }, aggregator_function: T::downcast_aggregate, } } @@ -400,7 +411,7 @@ pub struct Segment { user: User, opt: Opt, batcher: AutoBatcher, - events: HashMap>, + events: HashMap, } impl Segment { @@ -451,22 +462,34 @@ impl Segment { _ = interval.tick() => { self.tick(index_scheduler.clone(), auth_controller.clone()).await; }, - msg = self.inbox.recv() => { - match msg { - Some(Message { type_id, event, aggregator_function }) => { - let new_event = match self.events.remove(&type_id) { - Some(old) => (aggregator_function)(old, event).unwrap(), - None => event, - }; - self.events.insert(type_id, new_event); - }, - None => (), - } - } + Some(msg) = self.inbox.recv() => { + self.handle_msg(msg); + } } } } + fn handle_msg(&mut self, Message { type_id, aggregator_function, event }: Message) { + let new_event = match self.events.remove(&type_id) { + Some(old) => { + // The function should never fail since we retrieved the corresponding TypeId in the map. But in the unfortunate + // case it could happens we're going to silently ignore the error + let Some(original) = (aggregator_function)(old.original, event.original) else { + return; + }; + Event { + original, + // We always want to return the FIRST timestamp ever encountered + timestamp: old.timestamp, + user_agents: old.user_agents.union(&event.user_agents).cloned().collect(), + total: old.total.saturating_add(event.total), + } + } + None => event, + }; + self.events.insert(type_id, new_event); + } + async fn tick( &mut self, index_scheduler: Arc, @@ -503,11 +526,21 @@ impl Segment { let events = std::mem::take(&mut self.events); for (_, event) in events { + let Event { original, timestamp, user_agents, total } = event; + let name = original.event_name(); + let mut properties = original.into_event(); + if properties["user-agent"].is_null() { + properties["user-agent"] = json!(user_agents); + }; + if properties["requests"]["total_received"].is_null() { + properties["requests"]["total_received"] = total.into(); + }; + self.batcher.push(Track { user: self.user.clone(), - event: event.event_name().to_string(), - properties: event.into_event(), - timestamp: todo!(), + event: name.to_string(), + properties, + timestamp: Some(timestamp), ..Default::default() }); } From 4ee65d870eab55f0c5098aaad659aa98fbd9d500 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 09:14:34 +0200 Subject: [PATCH 10/22] remove a lot of ununsed code --- meilisearch/src/analytics/mod.rs | 4 +- .../src/analytics/segment_analytics.rs | 598 +----------------- .../src/routes/indexes/facet_search.rs | 1 - 3 files changed, 17 insertions(+), 586 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index a3b8d6d1d..d08f3307c 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -22,9 +22,7 @@ pub use segment_analytics::SimilarAggregator; use crate::Opt; -use self::segment_analytics::extract_user_agents; -pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; -pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; +pub use self::segment_analytics::MultiSearchAggregator; /// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name. #[macro_export] diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 00a3adaaf..1edfa1bdd 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, Instant}; -use actix_web::http::header::{CONTENT_TYPE, USER_AGENT}; +use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use byte_unit::Byte; use index_scheduler::IndexScheduler; @@ -24,21 +24,15 @@ use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; -use super::{ - config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind, - MEILISEARCH_CONFIG_PATH, -}; +use super::{config_user_id_path, Aggregate, AggregateMethod, MEILISEARCH_CONFIG_PATH}; use crate::option::{ default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot, }; -use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; -use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::{create_all_stats, Stats}; use crate::search::{ - FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex, - SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, - DEFAULT_SEMANTIC_RATIO, + FederatedSearch, SearchQuery, SearchQueryWithIndex, SearchResult, SimilarQuery, SimilarResult, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO, }; use crate::{aggregate_methods, Opt}; @@ -75,6 +69,7 @@ pub struct Message { // Thus we have to send it in the message directly. type_id: TypeId, // Same for the aggregate function. + #[allow(clippy::type_complexity)] aggregator_function: fn(Box, Box) -> Option>, event: Event, } @@ -169,97 +164,6 @@ impl SegmentAnalytics { } } -/* -impl super::Analytics for SegmentAnalytics { - fn instance_uid(&self) -> Option<&InstanceUid> { - Some(&self.instance_uid) - } - - fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { - let user_agent = request.map(extract_user_agents); - - send["user-agent"] = json!(user_agent); - let event = Track { - user: self.user.clone(), - event: event_name.clone(), - properties: send, - ..Default::default() - }; - let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event)); - } - - fn get_search(&self, aggregate: SearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); - } - - fn post_search(&self, aggregate: SearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); - } - - fn get_similar(&self, aggregate: SimilarAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate)); - } - - fn post_similar(&self, aggregate: SimilarAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate)); - } - - fn post_facet_search(&self, aggregate: FacetSearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate)); - } - - fn post_multi_search(&self, aggregate: MultiSearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate)); - } - - fn add_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate)); - } - - fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest) { - let aggregate = DocumentsDeletionAggregator::from_query(kind, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateDeleteDocuments(aggregate)); - } - - fn update_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); - } - - fn update_documents_by_function( - &self, - documents_query: &DocumentEditionByFunction, - index_creation: bool, - request: &HttpRequest, - ) { - let aggregate = - EditDocumentsByFunctionAggregator::from_query(documents_query, index_creation, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateEditDocumentsByFunction(aggregate)); - } - - fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { - let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate)); - } - - fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { - let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate)); - } -} -*/ - /// This structure represent the `infos` field we send in the analytics. /// It's quite close to the `Opt` structure except all sensitive informations /// have been simplified to a boolean. @@ -536,13 +440,16 @@ impl Segment { properties["requests"]["total_received"] = total.into(); }; - self.batcher.push(Track { - user: self.user.clone(), - event: name.to_string(), - properties, - timestamp: Some(timestamp), - ..Default::default() - }); + let _ = self + .batcher + .push(Track { + user: self.user.clone(), + event: name.to_string(), + properties, + timestamp: Some(timestamp), + ..Default::default() + }) + .await; } let _ = self.batcher.flush().await; @@ -1181,479 +1088,6 @@ impl Aggregate for MultiSearchAggregator { } } -#[derive(Default)] -pub struct FacetSearchAggregator { - timestamp: Option, - - // context - user_agents: HashSet, - - // requests - total_received: usize, - total_succeeded: usize, - time_spent: BinaryHeap, - - // The set of all facetNames that were used - facet_names: HashSet, - - // As there been any other parameter than the facetName or facetQuery ones? - additional_search_parameters_provided: bool, -} - -impl FacetSearchAggregator { - #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { - let FacetSearchQuery { - facet_query: _, - facet_name, - vector, - q, - filter, - matching_strategy, - attributes_to_search_on, - hybrid, - ranking_score_threshold, - locales, - } = query; - - let mut ret = Self::default(); - ret.timestamp = Some(OffsetDateTime::now_utc()); - - ret.total_received = 1; - ret.user_agents = extract_user_agents(request).into_iter().collect(); - ret.facet_names = Some(facet_name.clone()).into_iter().collect(); - - ret.additional_search_parameters_provided = q.is_some() - || vector.is_some() - || filter.is_some() - || *matching_strategy != MatchingStrategy::default() - || attributes_to_search_on.is_some() - || hybrid.is_some() - || ranking_score_threshold.is_some() - || locales.is_some(); - - ret - } - - pub fn succeed(&mut self, result: &FacetSearchResult) { - let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result; - self.total_succeeded = self.total_succeeded.saturating_add(1); - self.time_spent.push(*processing_time_ms as usize); - } - - /// Aggregate one [FacetSearchAggregator] into another. - pub fn aggregate(&mut self, mut other: Self) { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - ref mut time_spent, - facet_names, - additional_search_parameters_provided, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // context - for user_agent in user_agents.into_iter() { - self.user_agents.insert(user_agent); - } - - // request - self.total_received = self.total_received.saturating_add(total_received); - self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); - self.time_spent.append(time_spent); - - // facet_names - for facet_name in facet_names.into_iter() { - self.facet_names.insert(facet_name); - } - - // additional_search_parameters_provided - self.additional_search_parameters_provided |= additional_search_parameters_provided; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - time_spent, - facet_names, - additional_search_parameters_provided, - } = self; - - if total_received == 0 { - None - } else { - // the index of the 99th percentage of value - let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.; - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th as usize); - - let properties = json!({ - "user-agent": user_agents, - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "facets": { - "total_distinct_facet_count": facet_names.len(), - "additional_search_parameters_provided": additional_search_parameters_provided, - }, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } - } -} - -#[derive(Default)] -pub struct DocumentsAggregator { - timestamp: Option, - - // set to true when at least one request was received - updated: bool, - - // context - user_agents: HashSet, - - content_types: HashSet, - primary_keys: HashSet, - index_creation: bool, -} - -impl DocumentsAggregator { - pub fn from_query( - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) -> Self { - let UpdateDocumentsQuery { primary_key, csv_delimiter: _ } = documents_query; - - let mut primary_keys = HashSet::new(); - if let Some(primary_key) = primary_key.clone() { - primary_keys.insert(primary_key); - } - - let mut content_types = HashSet::new(); - let content_type = request - .headers() - .get(CONTENT_TYPE) - .and_then(|s| s.to_str().ok()) - .unwrap_or("unknown") - .to_string(); - content_types.insert(content_type); - - Self { - timestamp: Some(OffsetDateTime::now_utc()), - updated: true, - user_agents: extract_user_agents(request).into_iter().collect(), - content_types, - primary_keys, - index_creation, - } - } - - /// Aggregate one [DocumentsAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } = - other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - self.updated |= updated; - // we can't create a union because there is no `into_union` method - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - for primary_key in primary_keys { - self.primary_keys.insert(primary_key); - } - for content_type in content_types { - self.content_types.insert(content_type); - } - self.index_creation |= index_creation; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } = - self; - - if !updated { - None - } else { - let properties = json!({ - "user-agent": user_agents, - "payload_type": content_types, - "primary_key": primary_keys, - "index_creation": index_creation, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } - } -} - -#[derive(Default)] -pub struct EditDocumentsByFunctionAggregator { - timestamp: Option, - - // Set to true if at least one request was filtered - filtered: bool, - // Set to true if at least one request contained a context - with_context: bool, - - // context - user_agents: HashSet, - - index_creation: bool, -} - -impl EditDocumentsByFunctionAggregator { - pub fn from_query( - documents_query: &DocumentEditionByFunction, - index_creation: bool, - request: &HttpRequest, - ) -> Self { - let DocumentEditionByFunction { filter, context, function: _ } = documents_query; - - Self { - timestamp: Some(OffsetDateTime::now_utc()), - user_agents: extract_user_agents(request).into_iter().collect(), - filtered: filter.is_some(), - with_context: context.is_some(), - index_creation, - } - } - - /// Aggregate one [DocumentsAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { timestamp, user_agents, index_creation, filtered, with_context } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // we can't create a union because there is no `into_union` method - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - self.index_creation |= index_creation; - self.filtered |= filtered; - self.with_context |= with_context; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { timestamp, user_agents, index_creation, filtered, with_context } = self; - - // if we had no timestamp it means we never encountered any events and - // thus we don't need to send this event. - let timestamp = timestamp?; - - let properties = json!({ - "user-agent": user_agents, - "filtered": filtered, - "with_context": with_context, - "index_creation": index_creation, - }); - - Some(Track { - timestamp: Some(timestamp), - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } -} - -#[derive(Default, Serialize)] -pub struct DocumentsDeletionAggregator { - #[serde(skip)] - timestamp: Option, - - // context - #[serde(rename = "user-agent")] - user_agents: HashSet, - - #[serde(rename = "requests.total_received")] - total_received: usize, - per_document_id: bool, - clear_all: bool, - per_batch: bool, - per_filter: bool, -} - -impl DocumentsDeletionAggregator { - pub fn from_query(kind: DocumentDeletionKind, request: &HttpRequest) -> Self { - Self { - timestamp: Some(OffsetDateTime::now_utc()), - user_agents: extract_user_agents(request).into_iter().collect(), - total_received: 1, - per_document_id: matches!(kind, DocumentDeletionKind::PerDocumentId), - clear_all: matches!(kind, DocumentDeletionKind::ClearAll), - per_batch: matches!(kind, DocumentDeletionKind::PerBatch), - per_filter: matches!(kind, DocumentDeletionKind::PerFilter), - } - } - - /// Aggregate one [DocumentsAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { - timestamp, - user_agents, - total_received, - per_document_id, - clear_all, - per_batch, - per_filter, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // we can't create a union because there is no `into_union` method - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - self.total_received = self.total_received.saturating_add(total_received); - self.per_document_id |= per_document_id; - self.clear_all |= clear_all; - self.per_batch |= per_batch; - self.per_filter |= per_filter; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - // if we had no timestamp it means we never encountered any events and - // thus we don't need to send this event. - let timestamp = self.timestamp?; - - Some(Track { - timestamp: Some(timestamp), - user: user.clone(), - event: event_name.to_string(), - properties: serde_json::to_value(self).ok()?, - ..Default::default() - }) - } -} - -#[derive(Default, Serialize)] -pub struct DocumentsFetchAggregator { - #[serde(skip)] - timestamp: Option, - - // context - #[serde(rename = "user-agent")] - user_agents: HashSet, - - #[serde(rename = "requests.total_received")] - total_received: usize, - - // a call on ../documents/:doc_id - per_document_id: bool, - // if a filter was used - per_filter: bool, - - #[serde(rename = "vector.retrieve_vectors")] - retrieve_vectors: bool, - - // pagination - #[serde(rename = "pagination.max_limit")] - max_limit: usize, - #[serde(rename = "pagination.max_offset")] - max_offset: usize, -} - -impl DocumentsFetchAggregator { - pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self { - let (limit, offset, retrieve_vectors) = match query { - DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), - DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { - (*limit, *offset, *retrieve_vectors) - } - }; - Self { - timestamp: Some(OffsetDateTime::now_utc()), - user_agents: extract_user_agents(request).into_iter().collect(), - total_received: 1, - per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), - per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), - max_limit: limit, - max_offset: offset, - retrieve_vectors, - } - } - - /// Aggregate one [DocumentsFetchAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { - timestamp, - user_agents, - total_received, - per_document_id, - per_filter, - max_limit, - max_offset, - retrieve_vectors, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - - self.total_received = self.total_received.saturating_add(total_received); - self.per_document_id |= per_document_id; - self.per_filter |= per_filter; - - self.max_limit = self.max_limit.max(max_limit); - self.max_offset = self.max_offset.max(max_offset); - - self.retrieve_vectors |= retrieve_vectors; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - // if we had no timestamp it means we never encountered any events and - // thus we don't need to send this event. - let timestamp = self.timestamp?; - - Some(Track { - timestamp: Some(timestamp), - user: user.clone(), - event: event_name.to_string(), - properties: serde_json::to_value(self).ok()?, - ..Default::default() - }) - } -} - aggregate_methods!( SimilarPOST => "Similar POST", SimilarGET => "Similar GET", diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 715eaaaa7..8e40397c7 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -9,7 +9,6 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::locales::Locale; -use serde::Serialize; use serde_json::Value; use tracing::debug; From 0fde49640a3f76cce57414e88b6690aa90ff8523 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 09:18:25 +0200 Subject: [PATCH 11/22] make clippy happy --- meilisearch/src/main.rs | 1 - meilisearch/src/routes/indexes/settings.rs | 111 ++++++++------------- 2 files changed, 43 insertions(+), 69 deletions(-) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index eebea3b6d..c0652bf1e 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -223,7 +223,6 @@ pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Op eprintln!("Prototype:\t\t{:?}", prototype); } - #[cfg(all(not(debug_assertions), feature = "analytics"))] { if !opt.no_analytics { eprintln!( diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index f31f52dc1..745ad5c78 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -94,7 +94,7 @@ macro_rules! make_setting_route { #[allow(clippy::redundant_closure_call)] analytics.publish( - $crate::routes::indexes::settings::$analytics::new(body.as_ref()).to_settings(), + $crate::routes::indexes::settings::$analytics::new(body.as_ref()).into_settings(), &req, ); @@ -605,58 +605,33 @@ struct RankingRulesAnalytics { impl RankingRulesAnalytics { pub fn new(rr: Option<&Vec>) -> Self { RankingRulesAnalytics { - words_position: rr - .as_ref() - .map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Words) - }) + words_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) + }), + typo_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) + }), + proximity_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) }) - .flatten(), - - typo_position: rr - .as_ref() - .map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Typo) - }) + }), + attribute_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) }) - .flatten(), - - proximity_position: rr - .as_ref() - .map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) - }) + }), + sort_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) + }), + exactness_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) }) - .flatten(), - - attribute_position: rr - .as_ref() - .map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) - }) - }) - .flatten(), - sort_position: rr - .as_ref() - .map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Sort) - }) - }) - .flatten(), - exactness_position: rr - .as_ref() - .map(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) - }) - }) - .flatten(), - + }), values: rr.as_ref().map(|rr| { rr.iter() .filter(|s| { @@ -673,7 +648,7 @@ impl RankingRulesAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { ranking_rules: self, ..Default::default() } } } @@ -694,7 +669,7 @@ impl SearchableAttributesAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { searchable_attributes: self, ..Default::default() } } } @@ -715,7 +690,7 @@ impl DisplayedAttributesAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { displayed_attributes: self, ..Default::default() } } } @@ -734,7 +709,7 @@ impl SortableAttributesAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { sortable_attributes: self, ..Default::default() } } } @@ -753,7 +728,7 @@ impl FilterableAttributesAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { filterable_attributes: self, ..Default::default() } } } @@ -768,7 +743,7 @@ impl DistinctAttributeAnalytics { Self { set: distinct.is_some() } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { distinct_attribute: self, ..Default::default() } } } @@ -784,7 +759,7 @@ impl ProximityPrecisionAnalytics { Self { set: precision.is_some(), value: precision.cloned() } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { proximity_precision: self, ..Default::default() } } } @@ -818,7 +793,7 @@ impl TypoToleranceAnalytics { .flatten(), } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { typo_tolerance: self, ..Default::default() } } } @@ -846,7 +821,7 @@ impl FacetingAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { faceting: self, ..Default::default() } } } @@ -861,7 +836,7 @@ impl PaginationAnalytics { Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { pagination: self, ..Default::default() } } } @@ -876,7 +851,7 @@ impl StopWordsAnalytics { Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { stop_words: self, ..Default::default() } } } @@ -891,7 +866,7 @@ impl SynonymsAnalytics { Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { synonyms: self, ..Default::default() } } } @@ -960,7 +935,7 @@ impl EmbeddersAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { embedders: self, ..Default::default() } } } @@ -976,7 +951,7 @@ impl SearchCutoffMsAnalytics { Self { search_cutoff_ms: setting.copied() } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } } } @@ -1001,7 +976,7 @@ impl LocalesAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { locales: self, ..Default::default() } } } @@ -1016,7 +991,7 @@ impl DictionaryAnalytics { Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { dictionary: self, ..Default::default() } } } @@ -1031,7 +1006,7 @@ impl SeparatorTokensAnalytics { Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { separator_tokens: self, ..Default::default() } } } @@ -1050,7 +1025,7 @@ impl NonSeparatorTokensAnalytics { } } - pub fn to_settings(self) -> SettingsAnalytics { + pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { non_separator_tokens: self, ..Default::default() } } } From d9115b74f09118b3bc687f9c0853bb74469b0d87 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 09:32:54 +0200 Subject: [PATCH 12/22] move the analytics settings code to a dedicated file --- meilisearch/src/routes/indexes/mod.rs | 1 + meilisearch/src/routes/indexes/settings.rs | 634 +----------------- .../src/routes/indexes/settings_analytics.rs | 627 +++++++++++++++++ 3 files changed, 632 insertions(+), 630 deletions(-) create mode 100644 meilisearch/src/routes/indexes/settings_analytics.rs diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 8972119d7..65c81a57e 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -29,6 +29,7 @@ pub mod documents; pub mod facet_search; pub mod search; pub mod settings; +mod settings_analytics; pub mod similar; pub fn configure(cfg: &mut web::ServiceConfig) { diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 745ad5c78..bca763a99 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -1,23 +1,17 @@ -use std::collections::{BTreeSet, HashSet}; - +use super::settings_analytics::*; use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; -use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::index_uid::IndexUid; -use meilisearch_types::locales::Locale; use meilisearch_types::milli::update::Setting; -use meilisearch_types::settings::{ - settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked, -}; +use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; -use serde::Serialize; use tracing::debug; -use crate::analytics::{Aggregate, Analytics}; +use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; @@ -94,7 +88,7 @@ macro_rules! make_setting_route { #[allow(clippy::redundant_closure_call)] analytics.publish( - $crate::routes::indexes::settings::$analytics::new(body.as_ref()).into_settings(), + $crate::routes::indexes::settings_analytics::$analytics::new(body.as_ref()).into_settings(), &req, ); @@ -410,626 +404,6 @@ generate_configure!( search_cutoff_ms ); -#[derive(Serialize, Default)] -struct SettingsAnalytics { - ranking_rules: RankingRulesAnalytics, - searchable_attributes: SearchableAttributesAnalytics, - displayed_attributes: DisplayedAttributesAnalytics, - sortable_attributes: SortableAttributesAnalytics, - filterable_attributes: FilterableAttributesAnalytics, - distinct_attribute: DistinctAttributeAnalytics, - proximity_precision: ProximityPrecisionAnalytics, - typo_tolerance: TypoToleranceAnalytics, - faceting: FacetingAnalytics, - pagination: PaginationAnalytics, - stop_words: StopWordsAnalytics, - synonyms: SynonymsAnalytics, - embedders: EmbeddersAnalytics, - search_cutoff_ms: SearchCutoffMsAnalytics, - locales: LocalesAnalytics, - dictionary: DictionaryAnalytics, - separator_tokens: SeparatorTokensAnalytics, - non_separator_tokens: NonSeparatorTokensAnalytics, -} - -impl Aggregate for SettingsAnalytics { - fn event_name(&self) -> &'static str { - "Settings Updated" - } - - fn aggregate(self: Box, other: Box) -> Box { - Box::new(Self { - ranking_rules: RankingRulesAnalytics { - words_position: self - .ranking_rules - .words_position - .or(other.ranking_rules.words_position), - typo_position: self - .ranking_rules - .typo_position - .or(other.ranking_rules.typo_position), - proximity_position: self - .ranking_rules - .proximity_position - .or(other.ranking_rules.proximity_position), - attribute_position: self - .ranking_rules - .attribute_position - .or(other.ranking_rules.attribute_position), - sort_position: self - .ranking_rules - .sort_position - .or(other.ranking_rules.sort_position), - exactness_position: self - .ranking_rules - .exactness_position - .or(other.ranking_rules.exactness_position), - values: self.ranking_rules.values.or(other.ranking_rules.values), - }, - searchable_attributes: SearchableAttributesAnalytics { - total: self.searchable_attributes.total.or(other.searchable_attributes.total), - with_wildcard: self - .searchable_attributes - .with_wildcard - .or(other.searchable_attributes.with_wildcard), - }, - displayed_attributes: DisplayedAttributesAnalytics { - total: self.displayed_attributes.total.or(other.displayed_attributes.total), - with_wildcard: self - .displayed_attributes - .with_wildcard - .or(other.displayed_attributes.with_wildcard), - }, - sortable_attributes: SortableAttributesAnalytics { - total: self.sortable_attributes.total.or(other.sortable_attributes.total), - has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo), - }, - filterable_attributes: FilterableAttributesAnalytics { - total: self.filterable_attributes.total.or(other.filterable_attributes.total), - has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), - }, - distinct_attribute: DistinctAttributeAnalytics { - set: self.distinct_attribute.set | other.distinct_attribute.set, - }, - proximity_precision: ProximityPrecisionAnalytics { - set: self.proximity_precision.set | other.proximity_precision.set, - value: self.proximity_precision.value.or(other.proximity_precision.value), - }, - typo_tolerance: TypoToleranceAnalytics { - enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), - disable_on_attributes: self - .typo_tolerance - .disable_on_attributes - .or(other.typo_tolerance.disable_on_attributes), - disable_on_words: self - .typo_tolerance - .disable_on_words - .or(other.typo_tolerance.disable_on_words), - min_word_size_for_one_typo: self - .typo_tolerance - .min_word_size_for_one_typo - .or(other.typo_tolerance.min_word_size_for_one_typo), - min_word_size_for_two_typos: self - .typo_tolerance - .min_word_size_for_two_typos - .or(other.typo_tolerance.min_word_size_for_two_typos), - }, - faceting: FacetingAnalytics { - max_values_per_facet: self - .faceting - .max_values_per_facet - .or(other.faceting.max_values_per_facet), - sort_facet_values_by_star_count: self - .faceting - .sort_facet_values_by_star_count - .or(other.faceting.sort_facet_values_by_star_count), - sort_facet_values_by_total: self - .faceting - .sort_facet_values_by_total - .or(other.faceting.sort_facet_values_by_total), - }, - pagination: PaginationAnalytics { - max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits), - }, - stop_words: StopWordsAnalytics { - total: self.stop_words.total.or(other.stop_words.total), - }, - synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) }, - embedders: EmbeddersAnalytics { - total: self.embedders.total.or(other.embedders.total), - sources: match (self.embedders.sources, other.embedders.sources) { - (None, None) => None, - (Some(sources), None) | (None, Some(sources)) => Some(sources), - (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), - }, - document_template_used: match ( - self.embedders.document_template_used, - other.embedders.document_template_used, - ) { - (None, None) => None, - (Some(used), None) | (None, Some(used)) => Some(used), - (Some(this), Some(other)) => Some(this | other), - }, - document_template_max_bytes: match ( - self.embedders.document_template_max_bytes, - other.embedders.document_template_max_bytes, - ) { - (None, None) => None, - (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), - (Some(this), Some(other)) => Some(this.max(other)), - }, - binary_quantization_used: match ( - self.embedders.binary_quantization_used, - other.embedders.binary_quantization_used, - ) { - (None, None) => None, - (Some(bq), None) | (None, Some(bq)) => Some(bq), - (Some(this), Some(other)) => Some(this | other), - }, - }, - search_cutoff_ms: SearchCutoffMsAnalytics { - search_cutoff_ms: self - .search_cutoff_ms - .search_cutoff_ms - .or(other.search_cutoff_ms.search_cutoff_ms), - }, - locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) }, - dictionary: DictionaryAnalytics { - total: self.dictionary.total.or(other.dictionary.total), - }, - separator_tokens: SeparatorTokensAnalytics { - total: self.separator_tokens.total.or(other.non_separator_tokens.total), - }, - non_separator_tokens: NonSeparatorTokensAnalytics { - total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), - }, - }) - } - - fn into_event(self: Box) -> serde_json::Value { - serde_json::to_value(*self).unwrap_or_default() - } -} - -#[derive(Serialize, Default)] -struct RankingRulesAnalytics { - words_position: Option, - typo_position: Option, - proximity_position: Option, - attribute_position: Option, - sort_position: Option, - exactness_position: Option, - values: Option, -} - -impl RankingRulesAnalytics { - pub fn new(rr: Option<&Vec>) -> Self { - RankingRulesAnalytics { - words_position: rr.as_ref().and_then(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) - }), - typo_position: rr.as_ref().and_then(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) - }), - proximity_position: rr.as_ref().and_then(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) - }) - }), - attribute_position: rr.as_ref().and_then(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) - }) - }), - sort_position: rr.as_ref().and_then(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) - }), - exactness_position: rr.as_ref().and_then(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) - }) - }), - values: rr.as_ref().map(|rr| { - rr.iter() - .filter(|s| { - matches!( - s, - meilisearch_types::settings::RankingRuleView::Asc(_) - | meilisearch_types::settings::RankingRuleView::Desc(_) - ) - }) - .map(|x| x.to_string()) - .collect::>() - .join(", ") - }), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { ranking_rules: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SearchableAttributesAnalytics { - total: Option, - with_wildcard: Option, -} - -impl SearchableAttributesAnalytics { - pub fn new(setting: Option<&Vec>) -> Self { - Self { - total: setting.as_ref().map(|searchable| searchable.len()), - with_wildcard: setting - .as_ref() - .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { searchable_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct DisplayedAttributesAnalytics { - total: Option, - with_wildcard: Option, -} - -impl DisplayedAttributesAnalytics { - pub fn new(displayed: Option<&Vec>) -> Self { - Self { - total: displayed.as_ref().map(|displayed| displayed.len()), - with_wildcard: displayed - .as_ref() - .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { displayed_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SortableAttributesAnalytics { - total: Option, - has_geo: Option, -} - -impl SortableAttributesAnalytics { - pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { - Self { - total: setting.as_ref().map(|sort| sort.len()), - has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { sortable_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct FilterableAttributesAnalytics { - total: Option, - has_geo: Option, -} - -impl FilterableAttributesAnalytics { - pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { - Self { - total: setting.as_ref().map(|filter| filter.len()), - has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { filterable_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct DistinctAttributeAnalytics { - set: bool, -} - -impl DistinctAttributeAnalytics { - pub fn new(distinct: Option<&String>) -> Self { - Self { set: distinct.is_some() } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { distinct_attribute: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct ProximityPrecisionAnalytics { - set: bool, - value: Option, -} - -impl ProximityPrecisionAnalytics { - pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self { - Self { set: precision.is_some(), value: precision.cloned() } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { proximity_precision: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct TypoToleranceAnalytics { - enabled: Option, - disable_on_attributes: Option, - disable_on_words: Option, - min_word_size_for_one_typo: Option, - min_word_size_for_two_typos: Option, -} - -impl TypoToleranceAnalytics { - pub fn new(setting: Option<&meilisearch_types::settings::TypoSettings>) -> Self { - Self { - enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), - disable_on_attributes: setting - .as_ref() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - disable_on_words: setting - .as_ref() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - min_word_size_for_one_typo: setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) - .flatten(), - min_word_size_for_two_typos: setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) - .flatten(), - } - } - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { typo_tolerance: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct FacetingAnalytics { - max_values_per_facet: Option, - sort_facet_values_by_star_count: Option, - sort_facet_values_by_total: Option, -} - -impl FacetingAnalytics { - pub fn new(setting: Option<&meilisearch_types::settings::FacetingSettings>) -> Self { - Self { - max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), - sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { - s.sort_facet_values_by - .as_ref() - .set() - .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - sort_facet_values_by_total: setting - .as_ref() - .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { faceting: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct PaginationAnalytics { - max_total_hits: Option, -} - -impl PaginationAnalytics { - pub fn new(setting: Option<&meilisearch_types::settings::PaginationSettings>) -> Self { - Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { pagination: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct StopWordsAnalytics { - total: Option, -} - -impl StopWordsAnalytics { - pub fn new(stop_words: Option<&BTreeSet>) -> Self { - Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { stop_words: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SynonymsAnalytics { - total: Option, -} - -impl SynonymsAnalytics { - pub fn new(synonyms: Option<&std::collections::BTreeMap>>) -> Self { - Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { synonyms: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct EmbeddersAnalytics { - // last - total: Option, - // Merge the sources - sources: Option>, - // |= - document_template_used: Option, - // max - document_template_max_bytes: Option, - // |= - binary_quantization_used: Option, -} - -impl EmbeddersAnalytics { - pub fn new( - setting: Option< - &std::collections::BTreeMap< - String, - Setting, - >, - >, - ) -> Self { - let mut sources = std::collections::HashSet::new(); - - if let Some(s) = &setting { - for source in s - .values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.source.set()) - { - use meilisearch_types::milli::vector::settings::EmbedderSource; - match source { - EmbedderSource::OpenAi => sources.insert("openAi".to_string()), - EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()), - EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), - EmbedderSource::Ollama => sources.insert("ollama".to_string()), - EmbedderSource::Rest => sources.insert("rest".to_string()), - }; - } - }; - - Self { - total: setting.as_ref().map(|s| s.len()), - sources: Some(sources), - document_template_used: setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.document_template.set().is_some()) - }), - document_template_max_bytes: setting.as_ref().and_then(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.document_template_max_bytes.set()) - .max() - }), - binary_quantization_used: setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.binary_quantized.set().is_some()) - }), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { embedders: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -#[serde(transparent)] -struct SearchCutoffMsAnalytics { - search_cutoff_ms: Option, -} - -impl SearchCutoffMsAnalytics { - pub fn new(setting: Option<&u64>) -> Self { - Self { search_cutoff_ms: setting.copied() } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -#[serde(transparent)] -struct LocalesAnalytics { - locales: Option>, -} - -impl LocalesAnalytics { - pub fn new( - rules: Option<&Vec>, - ) -> Self { - LocalesAnalytics { - locales: rules.as_ref().map(|rules| { - rules - .iter() - .flat_map(|rule| rule.locales.iter().cloned()) - .collect::>() - }), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { locales: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct DictionaryAnalytics { - total: Option, -} - -impl DictionaryAnalytics { - pub fn new(dictionary: Option<&std::collections::BTreeSet>) -> Self { - Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { dictionary: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SeparatorTokensAnalytics { - total: Option, -} - -impl SeparatorTokensAnalytics { - pub fn new(separator_tokens: Option<&std::collections::BTreeSet>) -> Self { - Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { separator_tokens: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct NonSeparatorTokensAnalytics { - total: Option, -} - -impl NonSeparatorTokensAnalytics { - pub fn new(non_separator_tokens: Option<&std::collections::BTreeSet>) -> Self { - Self { - total: non_separator_tokens - .as_ref() - .map(|non_separator_tokens| non_separator_tokens.len()), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { non_separator_tokens: self, ..Default::default() } - } -} - pub async fn update_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, diff --git a/meilisearch/src/routes/indexes/settings_analytics.rs b/meilisearch/src/routes/indexes/settings_analytics.rs new file mode 100644 index 000000000..636ef3c57 --- /dev/null +++ b/meilisearch/src/routes/indexes/settings_analytics.rs @@ -0,0 +1,627 @@ +//! All the structures used to make the analytics on the settings works. +//! The signatures of the `new` functions are not very rust idiomatic because they must match the types received +//! through the sub-settings route directly without any manipulation. +//! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`. + +use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::vector::settings::EmbeddingSettings; +use meilisearch_types::settings::{ + FacetingSettings, PaginationSettings, ProximityPrecisionView, TypoSettings, +}; +use meilisearch_types::{facet_values_sort::FacetValuesSort, settings::RankingRuleView}; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet, HashSet}; + +use crate::analytics::Aggregate; + +#[derive(Serialize, Default)] +pub struct SettingsAnalytics { + pub ranking_rules: RankingRulesAnalytics, + pub searchable_attributes: SearchableAttributesAnalytics, + pub displayed_attributes: DisplayedAttributesAnalytics, + pub sortable_attributes: SortableAttributesAnalytics, + pub filterable_attributes: FilterableAttributesAnalytics, + pub distinct_attribute: DistinctAttributeAnalytics, + pub proximity_precision: ProximityPrecisionAnalytics, + pub typo_tolerance: TypoToleranceAnalytics, + pub faceting: FacetingAnalytics, + pub pagination: PaginationAnalytics, + pub stop_words: StopWordsAnalytics, + pub synonyms: SynonymsAnalytics, + pub embedders: EmbeddersAnalytics, + pub search_cutoff_ms: SearchCutoffMsAnalytics, + pub locales: LocalesAnalytics, + pub dictionary: DictionaryAnalytics, + pub separator_tokens: SeparatorTokensAnalytics, + pub non_separator_tokens: NonSeparatorTokensAnalytics, +} + +impl Aggregate for SettingsAnalytics { + fn event_name(&self) -> &'static str { + "Settings Updated" + } + + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { + ranking_rules: RankingRulesAnalytics { + words_position: self + .ranking_rules + .words_position + .or(other.ranking_rules.words_position), + typo_position: self + .ranking_rules + .typo_position + .or(other.ranking_rules.typo_position), + proximity_position: self + .ranking_rules + .proximity_position + .or(other.ranking_rules.proximity_position), + attribute_position: self + .ranking_rules + .attribute_position + .or(other.ranking_rules.attribute_position), + sort_position: self + .ranking_rules + .sort_position + .or(other.ranking_rules.sort_position), + exactness_position: self + .ranking_rules + .exactness_position + .or(other.ranking_rules.exactness_position), + values: self.ranking_rules.values.or(other.ranking_rules.values), + }, + searchable_attributes: SearchableAttributesAnalytics { + total: self.searchable_attributes.total.or(other.searchable_attributes.total), + with_wildcard: self + .searchable_attributes + .with_wildcard + .or(other.searchable_attributes.with_wildcard), + }, + displayed_attributes: DisplayedAttributesAnalytics { + total: self.displayed_attributes.total.or(other.displayed_attributes.total), + with_wildcard: self + .displayed_attributes + .with_wildcard + .or(other.displayed_attributes.with_wildcard), + }, + sortable_attributes: SortableAttributesAnalytics { + total: self.sortable_attributes.total.or(other.sortable_attributes.total), + has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo), + }, + filterable_attributes: FilterableAttributesAnalytics { + total: self.filterable_attributes.total.or(other.filterable_attributes.total), + has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), + }, + distinct_attribute: DistinctAttributeAnalytics { + set: self.distinct_attribute.set | other.distinct_attribute.set, + }, + proximity_precision: ProximityPrecisionAnalytics { + set: self.proximity_precision.set | other.proximity_precision.set, + value: self.proximity_precision.value.or(other.proximity_precision.value), + }, + typo_tolerance: TypoToleranceAnalytics { + enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), + disable_on_attributes: self + .typo_tolerance + .disable_on_attributes + .or(other.typo_tolerance.disable_on_attributes), + disable_on_words: self + .typo_tolerance + .disable_on_words + .or(other.typo_tolerance.disable_on_words), + min_word_size_for_one_typo: self + .typo_tolerance + .min_word_size_for_one_typo + .or(other.typo_tolerance.min_word_size_for_one_typo), + min_word_size_for_two_typos: self + .typo_tolerance + .min_word_size_for_two_typos + .or(other.typo_tolerance.min_word_size_for_two_typos), + }, + faceting: FacetingAnalytics { + max_values_per_facet: self + .faceting + .max_values_per_facet + .or(other.faceting.max_values_per_facet), + sort_facet_values_by_star_count: self + .faceting + .sort_facet_values_by_star_count + .or(other.faceting.sort_facet_values_by_star_count), + sort_facet_values_by_total: self + .faceting + .sort_facet_values_by_total + .or(other.faceting.sort_facet_values_by_total), + }, + pagination: PaginationAnalytics { + max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits), + }, + stop_words: StopWordsAnalytics { + total: self.stop_words.total.or(other.stop_words.total), + }, + synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) }, + embedders: EmbeddersAnalytics { + total: self.embedders.total.or(other.embedders.total), + sources: match (self.embedders.sources, other.embedders.sources) { + (None, None) => None, + (Some(sources), None) | (None, Some(sources)) => Some(sources), + (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), + }, + document_template_used: match ( + self.embedders.document_template_used, + other.embedders.document_template_used, + ) { + (None, None) => None, + (Some(used), None) | (None, Some(used)) => Some(used), + (Some(this), Some(other)) => Some(this | other), + }, + document_template_max_bytes: match ( + self.embedders.document_template_max_bytes, + other.embedders.document_template_max_bytes, + ) { + (None, None) => None, + (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), + (Some(this), Some(other)) => Some(this.max(other)), + }, + binary_quantization_used: match ( + self.embedders.binary_quantization_used, + other.embedders.binary_quantization_used, + ) { + (None, None) => None, + (Some(bq), None) | (None, Some(bq)) => Some(bq), + (Some(this), Some(other)) => Some(this | other), + }, + }, + search_cutoff_ms: SearchCutoffMsAnalytics { + search_cutoff_ms: self + .search_cutoff_ms + .search_cutoff_ms + .or(other.search_cutoff_ms.search_cutoff_ms), + }, + locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) }, + dictionary: DictionaryAnalytics { + total: self.dictionary.total.or(other.dictionary.total), + }, + separator_tokens: SeparatorTokensAnalytics { + total: self.separator_tokens.total.or(other.non_separator_tokens.total), + }, + non_separator_tokens: NonSeparatorTokensAnalytics { + total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), + }, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + +#[derive(Serialize, Default)] +pub struct RankingRulesAnalytics { + pub words_position: Option, + pub typo_position: Option, + pub proximity_position: Option, + pub attribute_position: Option, + pub sort_position: Option, + pub exactness_position: Option, + pub values: Option, +} + +impl RankingRulesAnalytics { + pub fn new(rr: Option<&Vec>) -> Self { + RankingRulesAnalytics { + words_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) + }), + typo_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) + }), + proximity_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) + }) + }), + attribute_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) + }) + }), + sort_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) + }), + exactness_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) + }) + }), + values: rr.as_ref().map(|rr| { + rr.iter() + .filter(|s| { + matches!( + s, + meilisearch_types::settings::RankingRuleView::Asc(_) + | meilisearch_types::settings::RankingRuleView::Desc(_) + ) + }) + .map(|x| x.to_string()) + .collect::>() + .join(", ") + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { ranking_rules: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SearchableAttributesAnalytics { + pub total: Option, + pub with_wildcard: Option, +} + +impl SearchableAttributesAnalytics { + pub fn new(setting: Option<&Vec>) -> Self { + Self { + total: setting.as_ref().map(|searchable| searchable.len()), + with_wildcard: setting + .as_ref() + .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { searchable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DisplayedAttributesAnalytics { + pub total: Option, + pub with_wildcard: Option, +} + +impl DisplayedAttributesAnalytics { + pub fn new(displayed: Option<&Vec>) -> Self { + Self { + total: displayed.as_ref().map(|displayed| displayed.len()), + with_wildcard: displayed + .as_ref() + .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { displayed_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SortableAttributesAnalytics { + pub total: Option, + pub has_geo: Option, +} + +impl SortableAttributesAnalytics { + pub fn new(setting: Option<&BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|sort| sort.len()), + has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { sortable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct FilterableAttributesAnalytics { + pub total: Option, + pub has_geo: Option, +} + +impl FilterableAttributesAnalytics { + pub fn new(setting: Option<&BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|filter| filter.len()), + has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { filterable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DistinctAttributeAnalytics { + pub set: bool, +} + +impl DistinctAttributeAnalytics { + pub fn new(distinct: Option<&String>) -> Self { + Self { set: distinct.is_some() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { distinct_attribute: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct ProximityPrecisionAnalytics { + pub set: bool, + pub value: Option, +} + +impl ProximityPrecisionAnalytics { + pub fn new(precision: Option<&ProximityPrecisionView>) -> Self { + Self { set: precision.is_some(), value: precision.cloned() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { proximity_precision: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct TypoToleranceAnalytics { + pub enabled: Option, + pub disable_on_attributes: Option, + pub disable_on_words: Option, + pub min_word_size_for_one_typo: Option, + pub min_word_size_for_two_typos: Option, +} + +impl TypoToleranceAnalytics { + pub fn new(setting: Option<&TypoSettings>) -> Self { + Self { + enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), + disable_on_attributes: setting + .as_ref() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + disable_on_words: setting + .as_ref() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + min_word_size_for_one_typo: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) + .flatten(), + min_word_size_for_two_typos: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) + .flatten(), + } + } + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { typo_tolerance: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct FacetingAnalytics { + pub max_values_per_facet: Option, + pub sort_facet_values_by_star_count: Option, + pub sort_facet_values_by_total: Option, +} + +impl FacetingAnalytics { + pub fn new(setting: Option<&FacetingSettings>) -> Self { + Self { + max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), + sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { + s.sort_facet_values_by + .as_ref() + .set() + .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) + }), + sort_facet_values_by_total: setting + .as_ref() + .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { faceting: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct PaginationAnalytics { + pub max_total_hits: Option, +} + +impl PaginationAnalytics { + pub fn new(setting: Option<&PaginationSettings>) -> Self { + Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { pagination: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct StopWordsAnalytics { + pub total: Option, +} + +impl StopWordsAnalytics { + pub fn new(stop_words: Option<&BTreeSet>) -> Self { + Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { stop_words: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SynonymsAnalytics { + pub total: Option, +} + +impl SynonymsAnalytics { + pub fn new(synonyms: Option<&BTreeMap>>) -> Self { + Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { synonyms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct EmbeddersAnalytics { + // last + pub total: Option, + // Merge the sources + pub sources: Option>, + // |= + pub document_template_used: Option, + // max + pub document_template_max_bytes: Option, + // |= + pub binary_quantization_used: Option, +} + +impl EmbeddersAnalytics { + pub fn new(setting: Option<&BTreeMap>>) -> Self { + let mut sources = std::collections::HashSet::new(); + + if let Some(s) = &setting { + for source in s + .values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.source.set()) + { + use meilisearch_types::milli::vector::settings::EmbedderSource; + match source { + EmbedderSource::OpenAi => sources.insert("openAi".to_string()), + EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()), + EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), + EmbedderSource::Ollama => sources.insert("ollama".to_string()), + EmbedderSource::Rest => sources.insert("rest".to_string()), + }; + } + }; + + Self { + total: setting.as_ref().map(|s| s.len()), + sources: Some(sources), + document_template_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.document_template.set().is_some()) + }), + document_template_max_bytes: setting.as_ref().and_then(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.document_template_max_bytes.set()) + .max() + }), + binary_quantization_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { embedders: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +pub struct SearchCutoffMsAnalytics { + pub search_cutoff_ms: Option, +} + +impl SearchCutoffMsAnalytics { + pub fn new(setting: Option<&u64>) -> Self { + Self { search_cutoff_ms: setting.copied() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +pub struct LocalesAnalytics { + pub locales: Option>, +} + +impl LocalesAnalytics { + pub fn new(rules: Option<&Vec>) -> Self { + LocalesAnalytics { + locales: rules.as_ref().map(|rules| { + rules + .iter() + .flat_map(|rule| rule.locales.iter().cloned()) + .collect::>() + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { locales: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DictionaryAnalytics { + pub total: Option, +} + +impl DictionaryAnalytics { + pub fn new(dictionary: Option<&BTreeSet>) -> Self { + Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { dictionary: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SeparatorTokensAnalytics { + pub total: Option, +} + +impl SeparatorTokensAnalytics { + pub fn new(separator_tokens: Option<&BTreeSet>) -> Self { + Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { separator_tokens: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct NonSeparatorTokensAnalytics { + pub total: Option, +} + +impl NonSeparatorTokensAnalytics { + pub fn new(non_separator_tokens: Option<&BTreeSet>) -> Self { + Self { + total: non_separator_tokens + .as_ref() + .map(|non_separator_tokens| non_separator_tokens.len()), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { non_separator_tokens: self, ..Default::default() } + } +} From 18ac4032aa5512c96b0068d0603f4db285f81bd9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 09:35:11 +0200 Subject: [PATCH 13/22] Remove the experimental feature seen --- meilisearch/src/routes/features.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 1de00717d..8bdb3ffb3 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -17,24 +17,19 @@ use crate::extractors::sequential_extractor::SeqHandler; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") - .route(web::get().to(SeqHandler(get_features))) + .route(web::get().to(get_features)) .route(web::patch().to(SeqHandler(patch_features))), ); } -crate::empty_analytics!(GetExperimentalFeatureAnalytics, "Experimental features Seen"); - async fn get_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>, Data, >, - req: HttpRequest, - analytics: Data, ) -> HttpResponse { let features = index_scheduler.features(); - analytics.publish(GetExperimentalFeatureAnalytics::default(), &req); let features = features.runtime_features(); debug!(returns = ?features, "Get features"); HttpResponse::Ok().json(features) From 1ab6fec9030351956fd2462dc5afb3b2b317860c Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 09:49:21 +0200 Subject: [PATCH 14/22] send all experimental features in the info event including the runtime one --- .../src/analytics/segment_analytics.rs | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 1edfa1bdd..c0c2b64d8 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -10,6 +10,7 @@ use actix_web::HttpRequest; use byte_unit::Byte; use index_scheduler::IndexScheduler; use meilisearch_auth::{AuthController, AuthFilter}; +use meilisearch_types::features::RuntimeTogglableFeatures; use meilisearch_types::locales::Locale; use meilisearch_types::InstanceUid; use once_cell::sync::Lazy; @@ -173,7 +174,9 @@ impl SegmentAnalytics { struct Infos { env: String, experimental_contains_filter: bool, + experimental_vector_store: bool, experimental_enable_metrics: bool, + experimental_edit_documents_by_function: bool, experimental_search_queue_size: usize, experimental_drop_search_after: usize, experimental_nb_searches_per_core: usize, @@ -210,8 +213,8 @@ struct Infos { ssl_tickets: bool, } -impl From for Infos { - fn from(options: Opt) -> Self { +impl Infos { + pub fn new(options: Opt, features: RuntimeTogglableFeatures) -> Self { // We wants to decompose this whole struct by hand to be sure we don't forget // to add analytics when we add a field in the Opt. // Thus we must not insert `..` at the end. @@ -254,8 +257,7 @@ impl From for Infos { log_level, indexer_options, config_file_path, - #[cfg(feature = "analytics")] - no_analytics: _, + no_analytics: _, } = options; let schedule_snapshot = match schedule_snapshot { @@ -266,18 +268,28 @@ impl From for Infos { let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = indexer_options; + let RuntimeTogglableFeatures { + vector_store, + metrics, + logs_route, + edit_documents_by_function, + contains_filter, + } = features; + // We're going to override every sensible information. // We consider information sensible if it contains a path, an address, or a key. Self { env, - experimental_contains_filter, - experimental_enable_metrics, + experimental_contains_filter: experimental_contains_filter | contains_filter, + experimental_vector_store: vector_store, + experimental_edit_documents_by_function: edit_documents_by_function, + experimental_enable_metrics: experimental_enable_metrics | metrics, experimental_search_queue_size, experimental_drop_search_after: experimental_drop_search_after.into(), experimental_nb_searches_per_core: experimental_nb_searches_per_core.into(), experimental_logs_mode, experimental_replication_parameters, - experimental_enable_logs_route, + experimental_enable_logs_route: experimental_enable_logs_route | logs_route, experimental_reduce_indexing_memory_usage, gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), db_path: db_path != PathBuf::from("./data.ms"), @@ -319,7 +331,7 @@ pub struct Segment { } impl Segment { - fn compute_traits(opt: &Opt, stats: Stats) -> Value { + fn compute_traits(opt: &Opt, stats: Stats, features: RuntimeTogglableFeatures) -> Value { static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); static SYSTEM: Lazy = Lazy::new(|| { let disks = Disks::new_with_refreshed_list(); @@ -347,7 +359,7 @@ impl Segment { "indexes_number": stats.indexes.len(), "documents_number": number_of_documents, }, - "infos": Infos::from(opt.clone()), + "infos": Infos::new(opt.clone(), features), }) } @@ -399,9 +411,11 @@ impl Segment { index_scheduler: Arc, auth_controller: Arc, ) { - if let Ok(stats) = - create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default()) - { + if let Ok(stats) = create_all_stats( + index_scheduler.clone().into(), + auth_controller.into(), + &AuthFilter::default(), + ) { // Replace the version number with the prototype name if any. let version = if let Some(prototype) = build_info::DescribeResult::from_build() .and_then(|describe| describe.as_prototype()) @@ -420,7 +434,11 @@ impl Segment { }, })), user: self.user.clone(), - traits: Self::compute_traits(&self.opt, stats), + traits: Self::compute_traits( + &self.opt, + stats, + index_scheduler.features().runtime_features(), + ), ..Default::default() }) .await; From fa1db6b7216fce5e9727dfacbcdccc770ef80f16 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 09:55:30 +0200 Subject: [PATCH 15/22] fix the tests --- meilisearch/src/analytics/mod.rs | 4 ++++ meilisearch/tests/common/service.rs | 5 +++-- meilisearch/tests/logs/mod.rs | 5 +++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index d08f3307c..75e8083c5 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -158,6 +158,10 @@ impl Analytics { } } + pub fn no_analytics() -> Self { + Self { segment: None } + } + pub fn instance_uid(&self) -> Option<&InstanceUid> { self.segment.as_ref().map(|segment| segment.instance_uid.as_ref()) } diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index 8addbacf8..c0b07c217 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -9,8 +9,9 @@ use actix_web::test; use actix_web::test::TestRequest; use actix_web::web::Data; use index_scheduler::IndexScheduler; +use meilisearch::analytics::Analytics; use meilisearch::search_queue::SearchQueue; -use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; +use meilisearch::{create_app, Opt, SubscriberForSecondLayer}; use meilisearch_auth::AuthController; use tracing::level_filters::LevelFilter; use tracing_subscriber::Layer; @@ -141,7 +142,7 @@ impl Service { Data::new(search_queue), self.options.clone(), (route_layer_handle, stderr_layer_handle), - analytics::MockAnalytics::new(&self.options), + Data::new(Analytics::no_analytics()), true, )) .await diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index 9f4649dca..26482b561 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -7,8 +7,9 @@ use std::str::FromStr; use actix_web::http::header::ContentType; use actix_web::web::Data; use meili_snap::snapshot; +use meilisearch::analytics::Analytics; use meilisearch::search_queue::SearchQueue; -use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; +use meilisearch::{create_app, Opt, SubscriberForSecondLayer}; use tracing::level_filters::LevelFilter; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::Layer; @@ -54,7 +55,7 @@ async fn basic_test_log_stream_route() { Data::new(search_queue), server.service.options.clone(), (route_layer_handle, stderr_layer_handle), - analytics::MockAnalytics::new(&server.service.options), + Data::new(Analytics::no_analytics()), true, )) .await; From 3a7a20c7162b728a99327eb32b012f6651e7186b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 11:14:33 +0200 Subject: [PATCH 16/22] remove the segment feature and always import segment --- meilisearch/Cargo.toml | 5 ++--- meilisearch/src/analytics/mod.rs | 21 +++++++++++++++---- .../src/analytics/segment_analytics.rs | 1 - meilisearch/src/option.rs | 9 +------- meilisearch/tests/common/server.rs | 1 - 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 07357e724..57202f59f 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -75,7 +75,7 @@ reqwest = { version = "0.12.5", features = [ rustls = { version = "0.23.11", features = ["ring"], default-features = false } rustls-pki-types = { version = "1.7.0", features = ["alloc"] } rustls-pemfile = "2.1.2" -segment = { version = "0.2.4", optional = true } +segment = { version = "0.2.4" } serde = { version = "1.0.204", features = ["derive"] } serde_json = { version = "1.0.120", features = ["preserve_order"] } sha2 = "0.10.8" @@ -132,8 +132,7 @@ tempfile = { version = "3.10.1", optional = true } zip = { version = "2.1.3", optional = true } [features] -default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] -analytics = ["segment"] +default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] mini-dashboard = [ "static-files", "anyhow", diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 75e8083c5..67b830204 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -1,5 +1,3 @@ -#![allow(clippy::transmute_ptr_to_ref)] // mopify isn't updated with the latest version of clippy yet - pub mod segment_analytics; use std::fs; @@ -85,13 +83,19 @@ pub enum DocumentFetchKind { Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, } +/// To send an event to segment, your event must be able to aggregate itself with another event of the same type. pub trait Aggregate: 'static + mopa::Any + Send { + /// The name of the event that will be sent to segment. fn event_name(&self) -> &'static str; + /// Will be called every time an event has been used twice before segment flushed its buffer. fn aggregate(self: Box, other: Box) -> Box where Self: Sized; + /// An internal helper function, you shouldn't implement it yourself. + /// This function should always be called on the same type. If `this` and `other` + /// aren't the same type behind the function will do nothing and return `None`. fn downcast_aggregate( this: Box, other: Box, @@ -100,6 +104,7 @@ pub trait Aggregate: 'static + mopa::Any + Send { Self: Sized, { if this.is::() && other.is::() { + // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping let this = this.downcast::().ok()?; let other = other.downcast::().ok()?; Some(Self::aggregate(this, other)) @@ -108,18 +113,26 @@ pub trait Aggregate: 'static + mopa::Any + Send { } } + /// Converts your structure to the final event that'll be sent to segment. fn into_event(self: Box) -> serde_json::Value; } mopafy!(Aggregate); -/// Helper trait to define multiple aggregate with the same content but a different name. -/// Commonly used when you must aggregate a search with POST or with GET for example. +/// Helper trait to define multiple aggregates with the same content but a different name. +/// Commonly used when you must aggregate a search with POST or with GET, for example. pub trait AggregateMethod: 'static + Default + Send { fn event_name() -> &'static str; } /// A macro used to quickly define multiple aggregate method with their name +/// Usage: +/// ```rust +/// aggregate_methods!( +/// SearchGET => "Documents Searched GET", +/// SearchPOST => "Documents Searched POST", +/// ); +/// ``` #[macro_export] macro_rules! aggregate_methods { ($method:ident => $event_name:literal) => { diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index c0c2b64d8..10927f49b 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -695,7 +695,6 @@ impl SearchAggregator { aggregate_methods!( SearchGET => "Documents Searched GET", SearchPOST => "Documents Searched POST", - ); impl Aggregate for SearchAggregator { diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 02dc660a4..7e87a5a2c 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -29,7 +29,6 @@ const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; const MEILI_ENV: &str = "MEILI_ENV"; const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL"; const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER"; -#[cfg(feature = "analytics")] const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH"; @@ -210,7 +209,6 @@ pub struct Opt { /// Meilisearch automatically collects data from all instances that do not opt out using this flag. /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted /// at any time. - #[cfg(feature = "analytics")] #[serde(default)] // we can't send true #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, @@ -425,7 +423,6 @@ pub struct Opt { impl Opt { /// Whether analytics should be enabled or not. - #[cfg(all(not(debug_assertions), feature = "analytics"))] pub fn analytics(&self) -> bool { !self.no_analytics } @@ -505,7 +502,6 @@ impl Opt { ignore_missing_dump: _, ignore_dump_if_db_exists: _, config_file_path: _, - #[cfg(feature = "analytics")] no_analytics, experimental_contains_filter, experimental_enable_metrics, @@ -533,10 +529,7 @@ impl Opt { ); } - #[cfg(feature = "analytics")] - { - export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); - } + export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); export_to_env_if_not_present( MEILI_HTTP_PAYLOAD_SIZE_LIMIT, http_payload_size_limit.to_string(), diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index 6d331ebbc..92f181398 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -381,7 +381,6 @@ pub fn default_settings(dir: impl AsRef) -> Opt { db_path: dir.as_ref().join("db"), dump_dir: dir.as_ref().join("dumps"), env: "development".to_owned(), - #[cfg(feature = "analytics")] no_analytics: true, max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(), max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(), From 89e2d2b2b9b83a44e2a2af8e2d13020be72c1260 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 17 Oct 2024 13:55:49 +0200 Subject: [PATCH 17/22] fix the doctest --- meilisearch/src/analytics/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 67b830204..48ac13fc0 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -128,6 +128,8 @@ pub trait AggregateMethod: 'static + Default + Send { /// A macro used to quickly define multiple aggregate method with their name /// Usage: /// ```rust +/// use meilisearch::aggregate_methods; +/// /// aggregate_methods!( /// SearchGET => "Documents Searched GET", /// SearchPOST => "Documents Searched POST", From c94679bde6993f91418e4113852ce9c667a198f8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sun, 20 Oct 2024 17:24:12 +0200 Subject: [PATCH 18/22] apply review comments --- meilisearch/src/routes/indexes/documents.rs | 56 +++++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 854fa5b69..60014bae4 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -107,11 +107,8 @@ aggregate_methods!( DocumentsPOST => "Documents Fetched POST", ); -#[derive(Default, Serialize)] +#[derive(Serialize)] pub struct DocumentsFetchAggregator { - #[serde(rename = "requests.total_received")] - total_received: usize, - // a call on ../documents/:doc_id per_document_id: bool, // if a filter was used @@ -145,7 +142,6 @@ impl DocumentsFetchAggregator { }; Self { - total_received: 1, per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), max_limit: limit, @@ -164,7 +160,6 @@ impl Aggregate for DocumentsFetchAggregator { fn aggregate(self: Box, other: Box) -> Box { Box::new(Self { - total_received: self.total_received.saturating_add(other.total_received), per_document_id: self.per_document_id | other.per_document_id, per_filter: self.per_filter | other.per_filter, retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors, @@ -199,7 +194,11 @@ pub async fn get_document( analytics.publish( DocumentsFetchAggregator:: { retrieve_vectors: param_retrieve_vectors.0, - ..Default::default() + per_document_id: true, + per_filter: false, + max_limit: 0, + max_offset: 0, + marker: PhantomData, }, &req, ); @@ -211,10 +210,8 @@ pub async fn get_document( Ok(HttpResponse::Ok().json(document)) } -#[derive(Default, Serialize)] +#[derive(Serialize)] pub struct DocumentsDeletionAggregator { - #[serde(rename = "requests.total_received")] - total_received: usize, per_document_id: bool, clear_all: bool, per_batch: bool, @@ -228,7 +225,6 @@ impl Aggregate for DocumentsDeletionAggregator { fn aggregate(self: Box, other: Box) -> Box { Box::new(Self { - total_received: self.total_received.saturating_add(other.total_received), per_document_id: self.per_document_id | other.per_document_id, clear_all: self.clear_all | other.clear_all, per_batch: self.per_batch | other.per_batch, @@ -253,9 +249,10 @@ pub async fn delete_document( analytics.publish( DocumentsDeletionAggregator { - total_received: 1, per_document_id: true, - ..Default::default() + clear_all: false, + per_batch: false, + per_filter: false, }, &req, ); @@ -316,12 +313,12 @@ pub async fn documents_by_query_post( analytics.publish( DocumentsFetchAggregator:: { - total_received: 1, per_filter: body.filter.is_some(), retrieve_vectors: body.retrieve_vectors, max_limit: body.limit, max_offset: body.offset, - ..Default::default() + per_document_id: false, + marker: PhantomData, }, &req, ); @@ -358,12 +355,12 @@ pub async fn get_documents( analytics.publish( DocumentsFetchAggregator:: { - total_received: 1, per_filter: query.filter.is_some(), retrieve_vectors: query.retrieve_vectors, max_limit: query.limit, max_offset: query.offset, - ..Default::default() + per_document_id: false, + marker: PhantomData, }, &req, ); @@ -426,7 +423,7 @@ aggregate_methods!( Updated => "Documents Updated", ); -#[derive(Default, Serialize)] +#[derive(Serialize)] pub struct DocumentsAggregator { payload_types: HashSet, primary_key: HashSet, @@ -718,7 +715,12 @@ pub async fn delete_documents_batch( let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.publish( - DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() }, + DocumentsDeletionAggregator { + per_batch: true, + per_document_id: false, + clear_all: false, + per_filter: false, + }, &req, ); @@ -761,7 +763,12 @@ pub async fn delete_documents_by_filter( let filter = body.into_inner().filter; analytics.publish( - DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() }, + DocumentsDeletionAggregator { + per_filter: true, + per_document_id: false, + clear_all: false, + per_batch: false, + }, &req, ); @@ -793,7 +800,7 @@ pub struct DocumentEditionByFunction { pub function: String, } -#[derive(Default, Serialize)] +#[derive(Serialize)] struct EditDocumentsByFunctionAggregator { // Set to true if at least one request was filtered filtered: bool, @@ -899,7 +906,12 @@ pub async fn clear_all_documents( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.publish( - DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() }, + DocumentsDeletionAggregator { + clear_all: true, + per_document_id: false, + per_batch: false, + per_filter: false, + }, &req, ); From 73b57228967dffe4a3da7214f2f6bc3ebb15cf5c Mon Sep 17 00:00:00 2001 From: Tamo Date: Sun, 20 Oct 2024 17:31:21 +0200 Subject: [PATCH 19/22] rename the other parameter of the aggregate method to new to avoid confusion --- meilisearch/src/analytics/mod.rs | 12 +-- .../src/analytics/segment_analytics.rs | 26 +++--- meilisearch/src/routes/features.rs | 12 +-- meilisearch/src/routes/indexes/documents.rs | 38 ++++---- .../src/routes/indexes/facet_search.rs | 12 +-- meilisearch/src/routes/indexes/mod.rs | 12 +-- .../src/routes/indexes/settings_analytics.rs | 86 +++++++++---------- meilisearch/src/routes/swap_indexes.rs | 4 +- meilisearch/src/routes/tasks.rs | 24 +++--- 9 files changed, 108 insertions(+), 118 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 48ac13fc0..27203ea71 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -89,7 +89,7 @@ pub trait Aggregate: 'static + mopa::Any + Send { fn event_name(&self) -> &'static str; /// Will be called every time an event has been used twice before segment flushed its buffer. - fn aggregate(self: Box, other: Box) -> Box + fn aggregate(self: Box, new: Box) -> Box where Self: Sized; @@ -97,16 +97,16 @@ pub trait Aggregate: 'static + mopa::Any + Send { /// This function should always be called on the same type. If `this` and `other` /// aren't the same type behind the function will do nothing and return `None`. fn downcast_aggregate( - this: Box, - other: Box, + old: Box, + new: Box, ) -> Option> where Self: Sized, { - if this.is::() && other.is::() { + if old.is::() && new.is::() { // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping - let this = this.downcast::().ok()?; - let other = other.downcast::().ok()?; + let this = old.downcast::().ok()?; + let other = new.downcast::().ok()?; Some(Self::aggregate(this, other)) } else { None diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 10927f49b..328a3a048 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -702,7 +702,7 @@ impl Aggregate for SearchAggregator { Method::event_name() } - fn aggregate(mut self: Box, other: Box) -> Box { + fn aggregate(mut self: Box, new: Box) -> Box { let Self { total_received, total_succeeded, @@ -743,7 +743,7 @@ impl Aggregate for SearchAggregator { ranking_score_threshold, mut locales, marker: _, - } = *other; + } = *new; // request self.total_received = self.total_received.saturating_add(total_received); @@ -1038,22 +1038,22 @@ impl Aggregate for MultiSearchAggregator { } /// Aggregate one [MultiSearchAggregator] into another. - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { // write the aggregate in a way that will cause a compilation error if a field is added. // get ownership of self, replacing it by a default value. let this = *self; - let total_received = this.total_received.saturating_add(other.total_received); - let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded); + let total_received = this.total_received.saturating_add(new.total_received); + let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); let total_distinct_index_count = - this.total_distinct_index_count.saturating_add(other.total_distinct_index_count); - let total_single_index = this.total_single_index.saturating_add(other.total_single_index); - let total_search_count = this.total_search_count.saturating_add(other.total_search_count); - let show_ranking_score = this.show_ranking_score || other.show_ranking_score; + this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); + let total_single_index = this.total_single_index.saturating_add(new.total_single_index); + let total_search_count = this.total_search_count.saturating_add(new.total_search_count); + let show_ranking_score = this.show_ranking_score || new.show_ranking_score; let show_ranking_score_details = - this.show_ranking_score_details || other.show_ranking_score_details; - let use_federation = this.use_federation || other.use_federation; + this.show_ranking_score_details || new.show_ranking_score_details; + let use_federation = this.use_federation || new.use_federation; Box::new(Self { total_received, @@ -1215,7 +1215,7 @@ impl Aggregate for SimilarAggregator { } /// Aggregate one [SimilarAggregator] into another. - fn aggregate(mut self: Box, other: Box) -> Box { + fn aggregate(mut self: Box, new: Box) -> Box { let Self { total_received, total_succeeded, @@ -1233,7 +1233,7 @@ impl Aggregate for SimilarAggregator { ranking_score_threshold, retrieve_vectors, marker: _, - } = *other; + } = *new; // request self.total_received = self.total_received.saturating_add(total_received); diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 8bdb3ffb3..5d93adc02 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -64,13 +64,13 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { "Experimental features Updated" } - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { - vector_store: other.vector_store, - metrics: other.metrics, - logs_route: other.logs_route, - edit_documents_by_function: other.edit_documents_by_function, - contains_filter: other.contains_filter, + vector_store: new.vector_store, + metrics: new.metrics, + logs_route: new.logs_route, + edit_documents_by_function: new.edit_documents_by_function, + contains_filter: new.contains_filter, }) } diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 60014bae4..47f73ef42 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -158,13 +158,13 @@ impl Aggregate for DocumentsFetchAggregator { Method::event_name() } - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { - per_document_id: self.per_document_id | other.per_document_id, - per_filter: self.per_filter | other.per_filter, - retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors, - max_limit: self.max_limit.max(other.max_limit), - max_offset: self.max_offset.max(other.max_offset), + per_document_id: self.per_document_id | new.per_document_id, + per_filter: self.per_filter | new.per_filter, + retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors, + max_limit: self.max_limit.max(new.max_limit), + max_offset: self.max_offset.max(new.max_offset), marker: PhantomData, }) } @@ -223,12 +223,12 @@ impl Aggregate for DocumentsDeletionAggregator { "Documents Deleted" } - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { - per_document_id: self.per_document_id | other.per_document_id, - clear_all: self.clear_all | other.clear_all, - per_batch: self.per_batch | other.per_batch, - per_filter: self.per_filter | other.per_filter, + per_document_id: self.per_document_id | new.per_document_id, + clear_all: self.clear_all | new.clear_all, + per_batch: self.per_batch | new.per_batch, + per_filter: self.per_filter | new.per_filter, }) } @@ -437,11 +437,11 @@ impl Aggregate for DocumentsAggregator { Method::event_name() } - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { - payload_types: self.payload_types.union(&other.payload_types).cloned().collect(), - primary_key: self.primary_key.union(&other.primary_key).cloned().collect(), - index_creation: self.index_creation | other.index_creation, + payload_types: self.payload_types.union(&new.payload_types).cloned().collect(), + primary_key: self.primary_key.union(&new.primary_key).cloned().collect(), + index_creation: self.index_creation | new.index_creation, method: PhantomData, }) } @@ -815,11 +815,11 @@ impl Aggregate for EditDocumentsByFunctionAggregator { "Documents Edited By Function" } - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { - filtered: self.filtered | other.filtered, - with_context: self.with_context | other.with_context, - index_creation: self.index_creation | other.index_creation, + filtered: self.filtered | new.filtered, + with_context: self.with_context | new.with_context, + index_creation: self.index_creation | new.index_creation, }) } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 8e40397c7..99a4a4f28 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -113,18 +113,18 @@ impl Aggregate for FacetSearchAggregator { "Facet Searched POST" } - fn aggregate(mut self: Box, other: Box) -> Box { - for time in other.time_spent { + fn aggregate(mut self: Box, new: Box) -> Box { + for time in new.time_spent { self.time_spent.push(time); } Box::new(Self { - total_received: self.total_received.saturating_add(other.total_received), - total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded), + total_received: self.total_received.saturating_add(new.total_received), + total_succeeded: self.total_succeeded.saturating_add(new.total_succeeded), time_spent: self.time_spent, - facet_names: self.facet_names.union(&other.facet_names).cloned().collect(), + facet_names: self.facet_names.union(&new.facet_names).cloned().collect(), additional_search_parameters_provided: self.additional_search_parameters_provided - | other.additional_search_parameters_provided, + | new.additional_search_parameters_provided, }) } diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 65c81a57e..c8183186d 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -134,10 +134,8 @@ impl Aggregate for IndexCreatedAggregate { "Index Created" } - fn aggregate(self: Box, other: Box) -> Box { - Box::new(Self { - primary_key: self.primary_key.union(&other.primary_key).cloned().collect(), - }) + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() }) } fn into_event(self: Box) -> serde_json::Value { @@ -225,10 +223,8 @@ impl Aggregate for IndexUpdatedAggregate { "Index Updated" } - fn aggregate(self: Box, other: Box) -> Box { - Box::new(Self { - primary_key: self.primary_key.union(&other.primary_key).cloned().collect(), - }) + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() }) } fn into_event(self: Box) -> serde_json::Value { diff --git a/meilisearch/src/routes/indexes/settings_analytics.rs b/meilisearch/src/routes/indexes/settings_analytics.rs index 636ef3c57..e7d44fa20 100644 --- a/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/meilisearch/src/routes/indexes/settings_analytics.rs @@ -42,114 +42,108 @@ impl Aggregate for SettingsAnalytics { "Settings Updated" } - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { ranking_rules: RankingRulesAnalytics { words_position: self .ranking_rules .words_position - .or(other.ranking_rules.words_position), - typo_position: self - .ranking_rules - .typo_position - .or(other.ranking_rules.typo_position), + .or(new.ranking_rules.words_position), + typo_position: self.ranking_rules.typo_position.or(new.ranking_rules.typo_position), proximity_position: self .ranking_rules .proximity_position - .or(other.ranking_rules.proximity_position), + .or(new.ranking_rules.proximity_position), attribute_position: self .ranking_rules .attribute_position - .or(other.ranking_rules.attribute_position), - sort_position: self - .ranking_rules - .sort_position - .or(other.ranking_rules.sort_position), + .or(new.ranking_rules.attribute_position), + sort_position: self.ranking_rules.sort_position.or(new.ranking_rules.sort_position), exactness_position: self .ranking_rules .exactness_position - .or(other.ranking_rules.exactness_position), - values: self.ranking_rules.values.or(other.ranking_rules.values), + .or(new.ranking_rules.exactness_position), + values: self.ranking_rules.values.or(new.ranking_rules.values), }, searchable_attributes: SearchableAttributesAnalytics { - total: self.searchable_attributes.total.or(other.searchable_attributes.total), + total: self.searchable_attributes.total.or(new.searchable_attributes.total), with_wildcard: self .searchable_attributes .with_wildcard - .or(other.searchable_attributes.with_wildcard), + .or(new.searchable_attributes.with_wildcard), }, displayed_attributes: DisplayedAttributesAnalytics { - total: self.displayed_attributes.total.or(other.displayed_attributes.total), + total: self.displayed_attributes.total.or(new.displayed_attributes.total), with_wildcard: self .displayed_attributes .with_wildcard - .or(other.displayed_attributes.with_wildcard), + .or(new.displayed_attributes.with_wildcard), }, sortable_attributes: SortableAttributesAnalytics { - total: self.sortable_attributes.total.or(other.sortable_attributes.total), - has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo), + total: self.sortable_attributes.total.or(new.sortable_attributes.total), + has_geo: self.sortable_attributes.has_geo.or(new.sortable_attributes.has_geo), }, filterable_attributes: FilterableAttributesAnalytics { - total: self.filterable_attributes.total.or(other.filterable_attributes.total), - has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), + total: self.filterable_attributes.total.or(new.filterable_attributes.total), + has_geo: self.filterable_attributes.has_geo.or(new.filterable_attributes.has_geo), }, distinct_attribute: DistinctAttributeAnalytics { - set: self.distinct_attribute.set | other.distinct_attribute.set, + set: self.distinct_attribute.set | new.distinct_attribute.set, }, proximity_precision: ProximityPrecisionAnalytics { - set: self.proximity_precision.set | other.proximity_precision.set, - value: self.proximity_precision.value.or(other.proximity_precision.value), + set: self.proximity_precision.set | new.proximity_precision.set, + value: self.proximity_precision.value.or(new.proximity_precision.value), }, typo_tolerance: TypoToleranceAnalytics { - enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), + enabled: self.typo_tolerance.enabled.or(new.typo_tolerance.enabled), disable_on_attributes: self .typo_tolerance .disable_on_attributes - .or(other.typo_tolerance.disable_on_attributes), + .or(new.typo_tolerance.disable_on_attributes), disable_on_words: self .typo_tolerance .disable_on_words - .or(other.typo_tolerance.disable_on_words), + .or(new.typo_tolerance.disable_on_words), min_word_size_for_one_typo: self .typo_tolerance .min_word_size_for_one_typo - .or(other.typo_tolerance.min_word_size_for_one_typo), + .or(new.typo_tolerance.min_word_size_for_one_typo), min_word_size_for_two_typos: self .typo_tolerance .min_word_size_for_two_typos - .or(other.typo_tolerance.min_word_size_for_two_typos), + .or(new.typo_tolerance.min_word_size_for_two_typos), }, faceting: FacetingAnalytics { max_values_per_facet: self .faceting .max_values_per_facet - .or(other.faceting.max_values_per_facet), + .or(new.faceting.max_values_per_facet), sort_facet_values_by_star_count: self .faceting .sort_facet_values_by_star_count - .or(other.faceting.sort_facet_values_by_star_count), + .or(new.faceting.sort_facet_values_by_star_count), sort_facet_values_by_total: self .faceting .sort_facet_values_by_total - .or(other.faceting.sort_facet_values_by_total), + .or(new.faceting.sort_facet_values_by_total), }, pagination: PaginationAnalytics { - max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits), + max_total_hits: self.pagination.max_total_hits.or(new.pagination.max_total_hits), }, stop_words: StopWordsAnalytics { - total: self.stop_words.total.or(other.stop_words.total), + total: self.stop_words.total.or(new.stop_words.total), }, - synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) }, + synonyms: SynonymsAnalytics { total: self.synonyms.total.or(new.synonyms.total) }, embedders: EmbeddersAnalytics { - total: self.embedders.total.or(other.embedders.total), - sources: match (self.embedders.sources, other.embedders.sources) { + total: self.embedders.total.or(new.embedders.total), + sources: match (self.embedders.sources, new.embedders.sources) { (None, None) => None, (Some(sources), None) | (None, Some(sources)) => Some(sources), (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), }, document_template_used: match ( self.embedders.document_template_used, - other.embedders.document_template_used, + new.embedders.document_template_used, ) { (None, None) => None, (Some(used), None) | (None, Some(used)) => Some(used), @@ -157,7 +151,7 @@ impl Aggregate for SettingsAnalytics { }, document_template_max_bytes: match ( self.embedders.document_template_max_bytes, - other.embedders.document_template_max_bytes, + new.embedders.document_template_max_bytes, ) { (None, None) => None, (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), @@ -165,7 +159,7 @@ impl Aggregate for SettingsAnalytics { }, binary_quantization_used: match ( self.embedders.binary_quantization_used, - other.embedders.binary_quantization_used, + new.embedders.binary_quantization_used, ) { (None, None) => None, (Some(bq), None) | (None, Some(bq)) => Some(bq), @@ -176,17 +170,17 @@ impl Aggregate for SettingsAnalytics { search_cutoff_ms: self .search_cutoff_ms .search_cutoff_ms - .or(other.search_cutoff_ms.search_cutoff_ms), + .or(new.search_cutoff_ms.search_cutoff_ms), }, - locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) }, + locales: LocalesAnalytics { locales: self.locales.locales.or(new.locales.locales) }, dictionary: DictionaryAnalytics { - total: self.dictionary.total.or(other.dictionary.total), + total: self.dictionary.total.or(new.dictionary.total), }, separator_tokens: SeparatorTokensAnalytics { - total: self.separator_tokens.total.or(other.non_separator_tokens.total), + total: self.separator_tokens.total.or(new.non_separator_tokens.total), }, non_separator_tokens: NonSeparatorTokensAnalytics { - total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), + total: self.non_separator_tokens.total.or(new.non_separator_tokens.total), }, }) } diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index f7d8f4eff..9b8b67e63 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -39,9 +39,9 @@ impl Aggregate for IndexSwappedAnalytics { "Indexes Swapped" } - fn aggregate(self: Box, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { - swap_operation_number: self.swap_operation_number.max(other.swap_operation_number), + swap_operation_number: self.swap_operation_number.max(new.swap_operation_number), }) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index ff4aee998..712b8ecde 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -185,25 +185,25 @@ impl Aggregate for TaskFilterAnalytics, other: Box) -> Box { + fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { - filtered_by_uid: self.filtered_by_uid | other.filtered_by_uid, - filtered_by_index_uid: self.filtered_by_index_uid | other.filtered_by_index_uid, - filtered_by_type: self.filtered_by_type | other.filtered_by_type, - filtered_by_status: self.filtered_by_status | other.filtered_by_status, - filtered_by_canceled_by: self.filtered_by_canceled_by | other.filtered_by_canceled_by, + filtered_by_uid: self.filtered_by_uid | new.filtered_by_uid, + filtered_by_index_uid: self.filtered_by_index_uid | new.filtered_by_index_uid, + filtered_by_type: self.filtered_by_type | new.filtered_by_type, + filtered_by_status: self.filtered_by_status | new.filtered_by_status, + filtered_by_canceled_by: self.filtered_by_canceled_by | new.filtered_by_canceled_by, filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at - | other.filtered_by_before_enqueued_at, + | new.filtered_by_before_enqueued_at, filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at - | other.filtered_by_after_enqueued_at, + | new.filtered_by_after_enqueued_at, filtered_by_before_started_at: self.filtered_by_before_started_at - | other.filtered_by_before_started_at, + | new.filtered_by_before_started_at, filtered_by_after_started_at: self.filtered_by_after_started_at - | other.filtered_by_after_started_at, + | new.filtered_by_after_started_at, filtered_by_before_finished_at: self.filtered_by_before_finished_at - | other.filtered_by_before_finished_at, + | new.filtered_by_before_finished_at, filtered_by_after_finished_at: self.filtered_by_after_finished_at - | other.filtered_by_after_finished_at, + | new.filtered_by_after_finished_at, marker: std::marker::PhantomData, }) From ac919df37dff4dda34ae2687517bb4b1a6b2b4cf Mon Sep 17 00:00:00 2001 From: Tamo Date: Sun, 20 Oct 2024 17:36:29 +0200 Subject: [PATCH 20/22] simplify the trait a bit more by getting rids of the downcast_aggregate method --- meilisearch/src/analytics/mod.rs | 20 ------------------- .../src/analytics/segment_analytics.rs | 18 ++++++++++++++++- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 27203ea71..d72ab9d01 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -93,26 +93,6 @@ pub trait Aggregate: 'static + mopa::Any + Send { where Self: Sized; - /// An internal helper function, you shouldn't implement it yourself. - /// This function should always be called on the same type. If `this` and `other` - /// aren't the same type behind the function will do nothing and return `None`. - fn downcast_aggregate( - old: Box, - new: Box, - ) -> Option> - where - Self: Sized, - { - if old.is::() && new.is::() { - // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping - let this = old.downcast::().ok()?; - let other = new.downcast::().ok()?; - Some(Self::aggregate(this, other)) - } else { - None - } - } - /// Converts your structure to the final event that'll be sent to segment. fn into_event(self: Box) -> serde_json::Value; } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 328a3a048..96a0a676c 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -82,6 +82,22 @@ pub struct Event { total: usize, } +/// This function should always be called on the same type. If `this` and `other` +/// aren't the same type the function will do nothing and return `None`. +fn downcast_aggregate( + old: Box, + new: Box, +) -> Option> { + if old.is::() && new.is::() { + // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping + let this = old.downcast::().ok()?; + let other = new.downcast::().ok()?; + Some(ConcreteType::aggregate(this, other)) + } else { + None + } +} + impl Message { pub fn new(event: T, request: &HttpRequest) -> Self { Self { @@ -92,7 +108,7 @@ impl Message { user_agents: extract_user_agents(request), total: 1, }, - aggregator_function: T::downcast_aggregate, + aggregator_function: downcast_aggregate::, } } } From af589c85ec4746ef38a38420e0b6d433b1dc86d2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sun, 20 Oct 2024 17:40:31 +0200 Subject: [PATCH 21/22] reverse all the settings to keep the last one received instead of the first one received in case we receive the same setting multiple times --- .../src/routes/indexes/settings_analytics.rs | 94 +++++++++---------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/meilisearch/src/routes/indexes/settings_analytics.rs b/meilisearch/src/routes/indexes/settings_analytics.rs index e7d44fa20..de01b72e8 100644 --- a/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/meilisearch/src/routes/indexes/settings_analytics.rs @@ -45,97 +45,97 @@ impl Aggregate for SettingsAnalytics { fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { ranking_rules: RankingRulesAnalytics { - words_position: self + words_position: new .ranking_rules .words_position - .or(new.ranking_rules.words_position), - typo_position: self.ranking_rules.typo_position.or(new.ranking_rules.typo_position), - proximity_position: self + .or(self.ranking_rules.words_position), + typo_position: new.ranking_rules.typo_position.or(self.ranking_rules.typo_position), + proximity_position: new .ranking_rules .proximity_position - .or(new.ranking_rules.proximity_position), - attribute_position: self + .or(self.ranking_rules.proximity_position), + attribute_position: new .ranking_rules .attribute_position - .or(new.ranking_rules.attribute_position), - sort_position: self.ranking_rules.sort_position.or(new.ranking_rules.sort_position), - exactness_position: self + .or(self.ranking_rules.attribute_position), + sort_position: new.ranking_rules.sort_position.or(self.ranking_rules.sort_position), + exactness_position: new .ranking_rules .exactness_position - .or(new.ranking_rules.exactness_position), - values: self.ranking_rules.values.or(new.ranking_rules.values), + .or(self.ranking_rules.exactness_position), + values: new.ranking_rules.values.or(self.ranking_rules.values), }, searchable_attributes: SearchableAttributesAnalytics { - total: self.searchable_attributes.total.or(new.searchable_attributes.total), - with_wildcard: self + total: new.searchable_attributes.total.or(self.searchable_attributes.total), + with_wildcard: new .searchable_attributes .with_wildcard - .or(new.searchable_attributes.with_wildcard), + .or(self.searchable_attributes.with_wildcard), }, displayed_attributes: DisplayedAttributesAnalytics { - total: self.displayed_attributes.total.or(new.displayed_attributes.total), - with_wildcard: self + total: new.displayed_attributes.total.or(self.displayed_attributes.total), + with_wildcard: new .displayed_attributes .with_wildcard - .or(new.displayed_attributes.with_wildcard), + .or(self.displayed_attributes.with_wildcard), }, sortable_attributes: SortableAttributesAnalytics { - total: self.sortable_attributes.total.or(new.sortable_attributes.total), - has_geo: self.sortable_attributes.has_geo.or(new.sortable_attributes.has_geo), + total: new.sortable_attributes.total.or(self.sortable_attributes.total), + has_geo: new.sortable_attributes.has_geo.or(self.sortable_attributes.has_geo), }, filterable_attributes: FilterableAttributesAnalytics { - total: self.filterable_attributes.total.or(new.filterable_attributes.total), - has_geo: self.filterable_attributes.has_geo.or(new.filterable_attributes.has_geo), + total: new.filterable_attributes.total.or(self.filterable_attributes.total), + has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo), }, distinct_attribute: DistinctAttributeAnalytics { set: self.distinct_attribute.set | new.distinct_attribute.set, }, proximity_precision: ProximityPrecisionAnalytics { set: self.proximity_precision.set | new.proximity_precision.set, - value: self.proximity_precision.value.or(new.proximity_precision.value), + value: new.proximity_precision.value.or(self.proximity_precision.value), }, typo_tolerance: TypoToleranceAnalytics { - enabled: self.typo_tolerance.enabled.or(new.typo_tolerance.enabled), - disable_on_attributes: self + enabled: new.typo_tolerance.enabled.or(self.typo_tolerance.enabled), + disable_on_attributes: new .typo_tolerance .disable_on_attributes - .or(new.typo_tolerance.disable_on_attributes), - disable_on_words: self + .or(self.typo_tolerance.disable_on_attributes), + disable_on_words: new .typo_tolerance .disable_on_words - .or(new.typo_tolerance.disable_on_words), - min_word_size_for_one_typo: self + .or(self.typo_tolerance.disable_on_words), + min_word_size_for_one_typo: new .typo_tolerance .min_word_size_for_one_typo - .or(new.typo_tolerance.min_word_size_for_one_typo), - min_word_size_for_two_typos: self + .or(self.typo_tolerance.min_word_size_for_one_typo), + min_word_size_for_two_typos: new .typo_tolerance .min_word_size_for_two_typos - .or(new.typo_tolerance.min_word_size_for_two_typos), + .or(self.typo_tolerance.min_word_size_for_two_typos), }, faceting: FacetingAnalytics { - max_values_per_facet: self + max_values_per_facet: new .faceting .max_values_per_facet - .or(new.faceting.max_values_per_facet), - sort_facet_values_by_star_count: self + .or(self.faceting.max_values_per_facet), + sort_facet_values_by_star_count: new .faceting .sort_facet_values_by_star_count - .or(new.faceting.sort_facet_values_by_star_count), - sort_facet_values_by_total: self + .or(self.faceting.sort_facet_values_by_star_count), + sort_facet_values_by_total: new .faceting .sort_facet_values_by_total - .or(new.faceting.sort_facet_values_by_total), + .or(self.faceting.sort_facet_values_by_total), }, pagination: PaginationAnalytics { - max_total_hits: self.pagination.max_total_hits.or(new.pagination.max_total_hits), + max_total_hits: new.pagination.max_total_hits.or(self.pagination.max_total_hits), }, stop_words: StopWordsAnalytics { - total: self.stop_words.total.or(new.stop_words.total), + total: new.stop_words.total.or(self.stop_words.total), }, - synonyms: SynonymsAnalytics { total: self.synonyms.total.or(new.synonyms.total) }, + synonyms: SynonymsAnalytics { total: new.synonyms.total.or(self.synonyms.total) }, embedders: EmbeddersAnalytics { - total: self.embedders.total.or(new.embedders.total), + total: new.embedders.total.or(self.embedders.total), sources: match (self.embedders.sources, new.embedders.sources) { (None, None) => None, (Some(sources), None) | (None, Some(sources)) => Some(sources), @@ -167,20 +167,20 @@ impl Aggregate for SettingsAnalytics { }, }, search_cutoff_ms: SearchCutoffMsAnalytics { - search_cutoff_ms: self + search_cutoff_ms: new .search_cutoff_ms .search_cutoff_ms - .or(new.search_cutoff_ms.search_cutoff_ms), + .or(self.search_cutoff_ms.search_cutoff_ms), }, - locales: LocalesAnalytics { locales: self.locales.locales.or(new.locales.locales) }, + locales: LocalesAnalytics { locales: new.locales.locales.or(self.locales.locales) }, dictionary: DictionaryAnalytics { - total: self.dictionary.total.or(new.dictionary.total), + total: new.dictionary.total.or(self.dictionary.total), }, separator_tokens: SeparatorTokensAnalytics { - total: self.separator_tokens.total.or(new.non_separator_tokens.total), + total: new.non_separator_tokens.total.or(self.separator_tokens.total), }, non_separator_tokens: NonSeparatorTokensAnalytics { - total: self.non_separator_tokens.total.or(new.non_separator_tokens.total), + total: new.non_separator_tokens.total.or(self.non_separator_tokens.total), }, }) } From 5675585fe8b4f51eed7b08bb30e1fed0f711e340 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sun, 20 Oct 2024 17:54:43 +0200 Subject: [PATCH 22/22] move all the searches structures to new modules --- meilisearch/src/analytics/mod.rs | 4 - .../src/analytics/segment_analytics.rs | 868 +----------------- meilisearch/src/routes/indexes/mod.rs | 2 + meilisearch/src/routes/indexes/search.rs | 4 +- .../src/routes/indexes/search_analytics.rs | 485 ++++++++++ meilisearch/src/routes/indexes/similar.rs | 4 +- .../src/routes/indexes/similar_analytics.rs | 235 +++++ meilisearch/src/routes/mod.rs | 1 + meilisearch/src/routes/multi_search.rs | 4 +- .../src/routes/multi_search_analytics.rs | 170 ++++ 10 files changed, 903 insertions(+), 874 deletions(-) create mode 100644 meilisearch/src/routes/indexes/search_analytics.rs create mode 100644 meilisearch/src/routes/indexes/similar_analytics.rs create mode 100644 meilisearch/src/routes/multi_search_analytics.rs diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index d72ab9d01..bd14b0bfa 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -15,13 +15,9 @@ use platform_dirs::AppDirs; // if the feature analytics is enabled we use the real analytics pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; -pub use segment_analytics::SearchAggregator; -pub use segment_analytics::SimilarAggregator; use crate::Opt; -pub use self::segment_analytics::MultiSearchAggregator; - /// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name. #[macro_export] macro_rules! empty_analytics { diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 96a0a676c..7dc746b14 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1,5 +1,5 @@ use std::any::TypeId; -use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::fs; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -11,10 +11,8 @@ use byte_unit::Byte; use index_scheduler::IndexScheduler; use meilisearch_auth::{AuthController, AuthFilter}; use meilisearch_types::features::RuntimeTogglableFeatures; -use meilisearch_types::locales::Locale; use meilisearch_types::InstanceUid; use once_cell::sync::Lazy; -use regex::Regex; use segment::message::{Identify, Track, User}; use segment::{AutoBatcher, Batcher, HttpClient}; use serde::Serialize; @@ -25,17 +23,12 @@ use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; -use super::{config_user_id_path, Aggregate, AggregateMethod, MEILISEARCH_CONFIG_PATH}; +use super::{config_user_id_path, Aggregate, MEILISEARCH_CONFIG_PATH}; use crate::option::{ default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot, }; use crate::routes::{create_all_stats, Stats}; -use crate::search::{ - FederatedSearch, SearchQuery, SearchQueryWithIndex, SearchResult, SimilarQuery, SimilarResult, - DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO, -}; -use crate::{aggregate_methods, Opt}; +use crate::Opt; const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; @@ -489,858 +482,3 @@ impl Segment { let _ = self.batcher.flush().await; } } - -#[derive(Default)] -pub struct SearchAggregator { - // requests - total_received: usize, - total_succeeded: usize, - total_degraded: usize, - total_used_negative_operator: usize, - time_spent: BinaryHeap, - - // sort - sort_with_geo_point: bool, - // every time a request has a filter, this field must be incremented by the number of terms it contains - sort_sum_of_criteria_terms: usize, - // every time a request has a filter, this field must be incremented by one - sort_total_number_of_criteria: usize, - - // distinct - distinct: bool, - - // filter - filter_with_geo_radius: bool, - filter_with_geo_bounding_box: bool, - // every time a request has a filter, this field must be incremented by the number of terms it contains - filter_sum_of_criteria_terms: usize, - // every time a request has a filter, this field must be incremented by one - filter_total_number_of_criteria: usize, - used_syntax: HashMap, - - // attributes_to_search_on - // every time a search is done using attributes_to_search_on - attributes_to_search_on_total_number_of_uses: usize, - - // q - // The maximum number of terms in a q request - max_terms_number: usize, - - // vector - // The maximum number of floats in a vector request - max_vector_size: usize, - // Whether the semantic ratio passed to a hybrid search equals the default ratio. - semantic_ratio: bool, - hybrid: bool, - retrieve_vectors: bool, - - // every time a search is done, we increment the counter linked to the used settings - matching_strategy: HashMap, - - // List of the unique Locales passed as parameter - locales: BTreeSet, - - // pagination - max_limit: usize, - max_offset: usize, - finite_pagination: usize, - - // formatting - max_attributes_to_retrieve: usize, - max_attributes_to_highlight: usize, - highlight_pre_tag: bool, - highlight_post_tag: bool, - max_attributes_to_crop: usize, - crop_marker: bool, - show_matches_position: bool, - crop_length: bool, - - // facets - facets_sum_of_terms: usize, - facets_total_number_of_facets: usize, - - // scoring - show_ranking_score: bool, - show_ranking_score_details: bool, - ranking_score_threshold: bool, - - marker: std::marker::PhantomData, -} - -impl SearchAggregator { - #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &SearchQuery) -> Self { - let SearchQuery { - q, - vector, - offset, - limit, - page, - hits_per_page, - attributes_to_retrieve: _, - retrieve_vectors, - attributes_to_crop: _, - crop_length, - attributes_to_highlight: _, - show_matches_position, - show_ranking_score, - show_ranking_score_details, - filter, - sort, - distinct, - facets: _, - highlight_pre_tag, - highlight_post_tag, - crop_marker, - matching_strategy, - attributes_to_search_on, - hybrid, - ranking_score_threshold, - locales, - } = query; - - let mut ret = Self::default(); - - ret.total_received = 1; - - if let Some(ref sort) = sort { - ret.sort_total_number_of_criteria = 1; - ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint(")); - ret.sort_sum_of_criteria_terms = sort.len(); - } - - ret.distinct = distinct.is_some(); - - if let Some(ref filter) = filter { - static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); - ret.filter_total_number_of_criteria = 1; - - let syntax = match filter { - Value::String(_) => "string".to_string(), - Value::Array(values) => { - if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { - "mixed".to_string() - } else { - "array".to_string() - } - } - _ => "none".to_string(), - }; - // convert the string to a HashMap - ret.used_syntax.insert(syntax, 1); - - let stringified_filters = filter.to_string(); - ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); - ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); - ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); - } - - // attributes_to_search_on - if attributes_to_search_on.is_some() { - ret.attributes_to_search_on_total_number_of_uses = 1; - } - - if let Some(ref q) = q { - ret.max_terms_number = q.split_whitespace().count(); - } - - if let Some(ref vector) = vector { - ret.max_vector_size = vector.len(); - } - ret.retrieve_vectors |= retrieve_vectors; - - if query.is_finite_pagination() { - let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); - ret.max_limit = limit; - ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit; - ret.finite_pagination = 1; - } else { - ret.max_limit = *limit; - ret.max_offset = *offset; - ret.finite_pagination = 0; - } - - ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1); - - if let Some(locales) = locales { - ret.locales = locales.iter().copied().collect(); - } - - ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); - ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); - ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER(); - ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH(); - ret.show_matches_position = *show_matches_position; - - ret.show_ranking_score = *show_ranking_score; - ret.show_ranking_score_details = *show_ranking_score_details; - ret.ranking_score_threshold = ranking_score_threshold.is_some(); - - if let Some(hybrid) = hybrid { - ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); - ret.hybrid = true; - } - - ret - } - - pub fn succeed(&mut self, result: &SearchResult) { - let SearchResult { - hits: _, - query: _, - processing_time_ms, - hits_info: _, - semantic_hit_count: _, - facet_distribution: _, - facet_stats: _, - degraded, - used_negative_operator, - } = result; - - self.total_succeeded = self.total_succeeded.saturating_add(1); - if *degraded { - self.total_degraded = self.total_degraded.saturating_add(1); - } - if *used_negative_operator { - self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1); - } - self.time_spent.push(*processing_time_ms as usize); - } -} - -aggregate_methods!( - SearchGET => "Documents Searched GET", - SearchPOST => "Documents Searched POST", -); - -impl Aggregate for SearchAggregator { - fn event_name(&self) -> &'static str { - Method::event_name() - } - - fn aggregate(mut self: Box, new: Box) -> Box { - let Self { - total_received, - total_succeeded, - mut time_spent, - sort_with_geo_point, - sort_sum_of_criteria_terms, - sort_total_number_of_criteria, - distinct, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - attributes_to_search_on_total_number_of_uses, - max_terms_number, - max_vector_size, - retrieve_vectors, - matching_strategy, - max_limit, - max_offset, - finite_pagination, - max_attributes_to_retrieve, - max_attributes_to_highlight, - highlight_pre_tag, - highlight_post_tag, - max_attributes_to_crop, - crop_marker, - show_matches_position, - crop_length, - facets_sum_of_terms, - facets_total_number_of_facets, - show_ranking_score, - show_ranking_score_details, - semantic_ratio, - hybrid, - total_degraded, - total_used_negative_operator, - ranking_score_threshold, - mut locales, - marker: _, - } = *new; - - // request - self.total_received = self.total_received.saturating_add(total_received); - self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); - self.total_degraded = self.total_degraded.saturating_add(total_degraded); - self.total_used_negative_operator = - self.total_used_negative_operator.saturating_add(total_used_negative_operator); - self.time_spent.append(&mut time_spent); - - // sort - self.sort_with_geo_point |= sort_with_geo_point; - self.sort_sum_of_criteria_terms = - self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms); - self.sort_total_number_of_criteria = - self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); - - // distinct - self.distinct |= distinct; - - // filter - self.filter_with_geo_radius |= filter_with_geo_radius; - self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; - self.filter_sum_of_criteria_terms = - self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); - self.filter_total_number_of_criteria = - self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); - for (key, value) in used_syntax.into_iter() { - let used_syntax = self.used_syntax.entry(key).or_insert(0); - *used_syntax = used_syntax.saturating_add(value); - } - - // attributes_to_search_on - self.attributes_to_search_on_total_number_of_uses = self - .attributes_to_search_on_total_number_of_uses - .saturating_add(attributes_to_search_on_total_number_of_uses); - - // q - self.max_terms_number = self.max_terms_number.max(max_terms_number); - - // vector - self.max_vector_size = self.max_vector_size.max(max_vector_size); - self.retrieve_vectors |= retrieve_vectors; - self.semantic_ratio |= semantic_ratio; - self.hybrid |= hybrid; - - // pagination - self.max_limit = self.max_limit.max(max_limit); - self.max_offset = self.max_offset.max(max_offset); - self.finite_pagination += finite_pagination; - - // formatting - self.max_attributes_to_retrieve = - self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); - self.max_attributes_to_highlight = - self.max_attributes_to_highlight.max(max_attributes_to_highlight); - self.highlight_pre_tag |= highlight_pre_tag; - self.highlight_post_tag |= highlight_post_tag; - self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop); - self.crop_marker |= crop_marker; - self.show_matches_position |= show_matches_position; - self.crop_length |= crop_length; - - // facets - self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms); - self.facets_total_number_of_facets = - self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets); - - // matching strategy - for (key, value) in matching_strategy.into_iter() { - let matching_strategy = self.matching_strategy.entry(key).or_insert(0); - *matching_strategy = matching_strategy.saturating_add(value); - } - - // scoring - self.show_ranking_score |= show_ranking_score; - self.show_ranking_score_details |= show_ranking_score_details; - self.ranking_score_threshold |= ranking_score_threshold; - - // locales - self.locales.append(&mut locales); - - self - } - - fn into_event(self: Box) -> serde_json::Value { - let Self { - total_received, - total_succeeded, - time_spent, - sort_with_geo_point, - sort_sum_of_criteria_terms, - sort_total_number_of_criteria, - distinct, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - attributes_to_search_on_total_number_of_uses, - max_terms_number, - max_vector_size, - retrieve_vectors, - matching_strategy, - max_limit, - max_offset, - finite_pagination, - max_attributes_to_retrieve, - max_attributes_to_highlight, - highlight_pre_tag, - highlight_post_tag, - max_attributes_to_crop, - crop_marker, - show_matches_position, - crop_length, - facets_sum_of_terms, - facets_total_number_of_facets, - show_ranking_score, - show_ranking_score_details, - semantic_ratio, - hybrid, - total_degraded, - total_used_negative_operator, - ranking_score_threshold, - locales, - marker: _, - } = *self; - - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // the index of the 99th percentage of value - let percentile_99th = time_spent.len() * 99 / 100; - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th); - - json!({ - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - "total_degraded": total_degraded, - "total_used_negative_operator": total_used_negative_operator, - }, - "sort": { - "with_geoPoint": sort_with_geo_point, - "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), - }, - "distinct": distinct, - "filter": { - "with_geoRadius": filter_with_geo_radius, - "with_geoBoundingBox": filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), - "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "attributes_to_search_on": { - "total_number_of_uses": attributes_to_search_on_total_number_of_uses, - }, - "q": { - "max_terms_number": max_terms_number, - }, - "vector": { - "max_vector_size": max_vector_size, - "retrieve_vectors": retrieve_vectors, - }, - "hybrid": { - "enabled": hybrid, - "semantic_ratio": semantic_ratio, - }, - "pagination": { - "max_limit": max_limit, - "max_offset": max_offset, - "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, - }, - "formatting": { - "max_attributes_to_retrieve": max_attributes_to_retrieve, - "max_attributes_to_highlight": max_attributes_to_highlight, - "highlight_pre_tag": highlight_pre_tag, - "highlight_post_tag": highlight_post_tag, - "max_attributes_to_crop": max_attributes_to_crop, - "crop_marker": crop_marker, - "show_matches_position": show_matches_position, - "crop_length": crop_length, - }, - "facets": { - "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), - }, - "matching_strategy": { - "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "locales": locales, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - "ranking_score_threshold": ranking_score_threshold, - }, - }) - } -} - -#[derive(Default)] -pub struct MultiSearchAggregator { - // requests - total_received: usize, - total_succeeded: usize, - - // sum of the number of distinct indexes in each single request, use with total_received to compute an avg - total_distinct_index_count: usize, - // number of queries with a single index, use with total_received to compute a proportion - total_single_index: usize, - - // sum of the number of search queries in the requests, use with total_received to compute an average - total_search_count: usize, - - // scoring - show_ranking_score: bool, - show_ranking_score_details: bool, - - // federation - use_federation: bool, -} - -impl MultiSearchAggregator { - pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { - let use_federation = federated_search.federation.is_some(); - - let distinct_indexes: HashSet<_> = federated_search - .queries - .iter() - .map(|query| { - let query = &query; - // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex - let SearchQueryWithIndex { - index_uid, - federation_options: _, - q: _, - vector: _, - offset: _, - limit: _, - page: _, - hits_per_page: _, - attributes_to_retrieve: _, - retrieve_vectors: _, - attributes_to_crop: _, - crop_length: _, - attributes_to_highlight: _, - show_ranking_score: _, - show_ranking_score_details: _, - show_matches_position: _, - filter: _, - sort: _, - distinct: _, - facets: _, - highlight_pre_tag: _, - highlight_post_tag: _, - crop_marker: _, - matching_strategy: _, - attributes_to_search_on: _, - hybrid: _, - ranking_score_threshold: _, - locales: _, - } = query; - - index_uid.as_str() - }) - .collect(); - - let show_ranking_score = - federated_search.queries.iter().any(|query| query.show_ranking_score); - let show_ranking_score_details = - federated_search.queries.iter().any(|query| query.show_ranking_score_details); - - Self { - total_received: 1, - total_succeeded: 0, - total_distinct_index_count: distinct_indexes.len(), - total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, - total_search_count: federated_search.queries.len(), - show_ranking_score, - show_ranking_score_details, - use_federation, - } - } - - pub fn succeed(&mut self) { - self.total_succeeded = self.total_succeeded.saturating_add(1); - } -} - -impl Aggregate for MultiSearchAggregator { - fn event_name(&self) -> &'static str { - "Documents Searched by Multi-Search POST" - } - - /// Aggregate one [MultiSearchAggregator] into another. - fn aggregate(self: Box, new: Box) -> Box { - // write the aggregate in a way that will cause a compilation error if a field is added. - - // get ownership of self, replacing it by a default value. - let this = *self; - - let total_received = this.total_received.saturating_add(new.total_received); - let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); - let total_distinct_index_count = - this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); - let total_single_index = this.total_single_index.saturating_add(new.total_single_index); - let total_search_count = this.total_search_count.saturating_add(new.total_search_count); - let show_ranking_score = this.show_ranking_score || new.show_ranking_score; - let show_ranking_score_details = - this.show_ranking_score_details || new.show_ranking_score_details; - let use_federation = this.use_federation || new.use_federation; - - Box::new(Self { - total_received, - total_succeeded, - total_distinct_index_count, - total_single_index, - total_search_count, - show_ranking_score, - show_ranking_score_details, - use_federation, - }) - } - - fn into_event(self: Box) -> serde_json::Value { - let Self { - total_received, - total_succeeded, - total_distinct_index_count, - total_single_index, - total_search_count, - show_ranking_score, - show_ranking_score_details, - use_federation, - } = *self; - - json!({ - "requests": { - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "indexes": { - "total_single_index": total_single_index, - "total_distinct_index_count": total_distinct_index_count, - "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early - }, - "searches": { - "total_search_count": total_search_count, - "avg_search_count": (total_search_count as f64) / (total_received as f64), - }, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - }, - "federation": { - "use_federation": use_federation, - } - }) - } -} - -aggregate_methods!( - SimilarPOST => "Similar POST", - SimilarGET => "Similar GET", -); - -#[derive(Default)] -pub struct SimilarAggregator { - // requests - total_received: usize, - total_succeeded: usize, - time_spent: BinaryHeap, - - // filter - filter_with_geo_radius: bool, - filter_with_geo_bounding_box: bool, - // every time a request has a filter, this field must be incremented by the number of terms it contains - filter_sum_of_criteria_terms: usize, - // every time a request has a filter, this field must be incremented by one - filter_total_number_of_criteria: usize, - used_syntax: HashMap, - - // Whether a non-default embedder was specified - retrieve_vectors: bool, - - // pagination - max_limit: usize, - max_offset: usize, - - // formatting - max_attributes_to_retrieve: usize, - - // scoring - show_ranking_score: bool, - show_ranking_score_details: bool, - ranking_score_threshold: bool, - - marker: std::marker::PhantomData, -} - -impl SimilarAggregator { - #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &SimilarQuery) -> Self { - let SimilarQuery { - id: _, - embedder: _, - offset, - limit, - attributes_to_retrieve: _, - retrieve_vectors, - show_ranking_score, - show_ranking_score_details, - filter, - ranking_score_threshold, - } = query; - - let mut ret = Self::default(); - - ret.total_received = 1; - - if let Some(ref filter) = filter { - static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); - ret.filter_total_number_of_criteria = 1; - - let syntax = match filter { - Value::String(_) => "string".to_string(), - Value::Array(values) => { - if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { - "mixed".to_string() - } else { - "array".to_string() - } - } - _ => "none".to_string(), - }; - // convert the string to a HashMap - ret.used_syntax.insert(syntax, 1); - - let stringified_filters = filter.to_string(); - ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); - ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); - ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); - } - - ret.max_limit = *limit; - ret.max_offset = *offset; - - ret.show_ranking_score = *show_ranking_score; - ret.show_ranking_score_details = *show_ranking_score_details; - ret.ranking_score_threshold = ranking_score_threshold.is_some(); - - ret.retrieve_vectors = *retrieve_vectors; - - ret - } - - pub fn succeed(&mut self, result: &SimilarResult) { - let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result; - - self.total_succeeded = self.total_succeeded.saturating_add(1); - - self.time_spent.push(*processing_time_ms as usize); - } -} - -impl Aggregate for SimilarAggregator { - fn event_name(&self) -> &'static str { - Method::event_name() - } - - /// Aggregate one [SimilarAggregator] into another. - fn aggregate(mut self: Box, new: Box) -> Box { - let Self { - total_received, - total_succeeded, - mut time_spent, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - max_limit, - max_offset, - max_attributes_to_retrieve, - show_ranking_score, - show_ranking_score_details, - ranking_score_threshold, - retrieve_vectors, - marker: _, - } = *new; - - // request - self.total_received = self.total_received.saturating_add(total_received); - self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); - self.time_spent.append(&mut time_spent); - - // filter - self.filter_with_geo_radius |= filter_with_geo_radius; - self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; - self.filter_sum_of_criteria_terms = - self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); - self.filter_total_number_of_criteria = - self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); - for (key, value) in used_syntax.into_iter() { - let used_syntax = self.used_syntax.entry(key).or_insert(0); - *used_syntax = used_syntax.saturating_add(value); - } - - self.retrieve_vectors |= retrieve_vectors; - - // pagination - self.max_limit = self.max_limit.max(max_limit); - self.max_offset = self.max_offset.max(max_offset); - - // formatting - self.max_attributes_to_retrieve = - self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); - - // scoring - self.show_ranking_score |= show_ranking_score; - self.show_ranking_score_details |= show_ranking_score_details; - self.ranking_score_threshold |= ranking_score_threshold; - - self - } - - fn into_event(self: Box) -> serde_json::Value { - let Self { - total_received, - total_succeeded, - time_spent, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - max_limit, - max_offset, - max_attributes_to_retrieve, - show_ranking_score, - show_ranking_score_details, - ranking_score_threshold, - retrieve_vectors, - marker: _, - } = *self; - - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // the index of the 99th percentage of value - let percentile_99th = time_spent.len() * 99 / 100; - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th); - - json!({ - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "filter": { - "with_geoRadius": filter_with_geo_radius, - "with_geoBoundingBox": filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), - "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "vector": { - "retrieve_vectors": retrieve_vectors, - }, - "pagination": { - "max_limit": max_limit, - "max_offset": max_offset, - }, - "formatting": { - "max_attributes_to_retrieve": max_attributes_to_retrieve, - }, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - "ranking_score_threshold": ranking_score_threshold, - } - }) - } -} diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index c8183186d..7d073ec5f 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -28,9 +28,11 @@ use crate::Opt; pub mod documents; pub mod facet_search; pub mod search; +mod search_analytics; pub mod settings; mod settings_analytics; pub mod similar; +mod similar_analytics; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index ac6e23c8f..2f5cb4a36 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -13,13 +13,13 @@ use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use tracing::debug; -use crate::analytics::segment_analytics::{SearchGET, SearchPOST}; -use crate::analytics::{Analytics, SearchAggregator}; +use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; +use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST}; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, diff --git a/meilisearch/src/routes/indexes/search_analytics.rs b/meilisearch/src/routes/indexes/search_analytics.rs new file mode 100644 index 000000000..8bbb1781f --- /dev/null +++ b/meilisearch/src/routes/indexes/search_analytics.rs @@ -0,0 +1,485 @@ +use once_cell::sync::Lazy; +use regex::Regex; +use serde_json::{json, Value}; +use std::collections::{BTreeSet, BinaryHeap, HashMap}; + +use meilisearch_types::locales::Locale; + +use crate::{ + aggregate_methods, + analytics::{Aggregate, AggregateMethod}, + search::{ + SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEMANTIC_RATIO, + }, +}; + +aggregate_methods!( + SearchGET => "Documents Searched GET", + SearchPOST => "Documents Searched POST", +); + +#[derive(Default)] +pub struct SearchAggregator { + // requests + total_received: usize, + total_succeeded: usize, + total_degraded: usize, + total_used_negative_operator: usize, + time_spent: BinaryHeap, + + // sort + sort_with_geo_point: bool, + // every time a request has a filter, this field must be incremented by the number of terms it contains + sort_sum_of_criteria_terms: usize, + // every time a request has a filter, this field must be incremented by one + sort_total_number_of_criteria: usize, + + // distinct + distinct: bool, + + // filter + filter_with_geo_radius: bool, + filter_with_geo_bounding_box: bool, + // every time a request has a filter, this field must be incremented by the number of terms it contains + filter_sum_of_criteria_terms: usize, + // every time a request has a filter, this field must be incremented by one + filter_total_number_of_criteria: usize, + used_syntax: HashMap, + + // attributes_to_search_on + // every time a search is done using attributes_to_search_on + attributes_to_search_on_total_number_of_uses: usize, + + // q + // The maximum number of terms in a q request + max_terms_number: usize, + + // vector + // The maximum number of floats in a vector request + max_vector_size: usize, + // Whether the semantic ratio passed to a hybrid search equals the default ratio. + semantic_ratio: bool, + hybrid: bool, + retrieve_vectors: bool, + + // every time a search is done, we increment the counter linked to the used settings + matching_strategy: HashMap, + + // List of the unique Locales passed as parameter + locales: BTreeSet, + + // pagination + max_limit: usize, + max_offset: usize, + finite_pagination: usize, + + // formatting + max_attributes_to_retrieve: usize, + max_attributes_to_highlight: usize, + highlight_pre_tag: bool, + highlight_post_tag: bool, + max_attributes_to_crop: usize, + crop_marker: bool, + show_matches_position: bool, + crop_length: bool, + + // facets + facets_sum_of_terms: usize, + facets_total_number_of_facets: usize, + + // scoring + show_ranking_score: bool, + show_ranking_score_details: bool, + ranking_score_threshold: bool, + + marker: std::marker::PhantomData, +} + +impl SearchAggregator { + #[allow(clippy::field_reassign_with_default)] + pub fn from_query(query: &SearchQuery) -> Self { + let SearchQuery { + q, + vector, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve: _, + retrieve_vectors, + attributes_to_crop: _, + crop_length, + attributes_to_highlight: _, + show_matches_position, + show_ranking_score, + show_ranking_score_details, + filter, + sort, + distinct, + facets: _, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + attributes_to_search_on, + hybrid, + ranking_score_threshold, + locales, + } = query; + + let mut ret = Self::default(); + + ret.total_received = 1; + + if let Some(ref sort) = sort { + ret.sort_total_number_of_criteria = 1; + ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint(")); + ret.sort_sum_of_criteria_terms = sort.len(); + } + + ret.distinct = distinct.is_some(); + + if let Some(ref filter) = filter { + static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + ret.filter_total_number_of_criteria = 1; + + let syntax = match filter { + Value::String(_) => "string".to_string(), + Value::Array(values) => { + if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { + "mixed".to_string() + } else { + "array".to_string() + } + } + _ => "none".to_string(), + }; + // convert the string to a HashMap + ret.used_syntax.insert(syntax, 1); + + let stringified_filters = filter.to_string(); + ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); + ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); + ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); + } + + // attributes_to_search_on + if attributes_to_search_on.is_some() { + ret.attributes_to_search_on_total_number_of_uses = 1; + } + + if let Some(ref q) = q { + ret.max_terms_number = q.split_whitespace().count(); + } + + if let Some(ref vector) = vector { + ret.max_vector_size = vector.len(); + } + ret.retrieve_vectors |= retrieve_vectors; + + if query.is_finite_pagination() { + let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + ret.max_limit = limit; + ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit; + ret.finite_pagination = 1; + } else { + ret.max_limit = *limit; + ret.max_offset = *offset; + ret.finite_pagination = 0; + } + + ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1); + + if let Some(locales) = locales { + ret.locales = locales.iter().copied().collect(); + } + + ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); + ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); + ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER(); + ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH(); + ret.show_matches_position = *show_matches_position; + + ret.show_ranking_score = *show_ranking_score; + ret.show_ranking_score_details = *show_ranking_score_details; + ret.ranking_score_threshold = ranking_score_threshold.is_some(); + + if let Some(hybrid) = hybrid { + ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); + ret.hybrid = true; + } + + ret + } + + pub fn succeed(&mut self, result: &SearchResult) { + let SearchResult { + hits: _, + query: _, + processing_time_ms, + hits_info: _, + semantic_hit_count: _, + facet_distribution: _, + facet_stats: _, + degraded, + used_negative_operator, + } = result; + + self.total_succeeded = self.total_succeeded.saturating_add(1); + if *degraded { + self.total_degraded = self.total_degraded.saturating_add(1); + } + if *used_negative_operator { + self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1); + } + self.time_spent.push(*processing_time_ms as usize); + } +} + +impl Aggregate for SearchAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(mut self: Box, new: Box) -> Box { + let Self { + total_received, + total_succeeded, + mut time_spent, + sort_with_geo_point, + sort_sum_of_criteria_terms, + sort_total_number_of_criteria, + distinct, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + attributes_to_search_on_total_number_of_uses, + max_terms_number, + max_vector_size, + retrieve_vectors, + matching_strategy, + max_limit, + max_offset, + finite_pagination, + max_attributes_to_retrieve, + max_attributes_to_highlight, + highlight_pre_tag, + highlight_post_tag, + max_attributes_to_crop, + crop_marker, + show_matches_position, + crop_length, + facets_sum_of_terms, + facets_total_number_of_facets, + show_ranking_score, + show_ranking_score_details, + semantic_ratio, + hybrid, + total_degraded, + total_used_negative_operator, + ranking_score_threshold, + mut locales, + marker: _, + } = *new; + + // request + self.total_received = self.total_received.saturating_add(total_received); + self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.total_degraded = self.total_degraded.saturating_add(total_degraded); + self.total_used_negative_operator = + self.total_used_negative_operator.saturating_add(total_used_negative_operator); + self.time_spent.append(&mut time_spent); + + // sort + self.sort_with_geo_point |= sort_with_geo_point; + self.sort_sum_of_criteria_terms = + self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms); + self.sort_total_number_of_criteria = + self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); + + // distinct + self.distinct |= distinct; + + // filter + self.filter_with_geo_radius |= filter_with_geo_radius; + self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; + self.filter_sum_of_criteria_terms = + self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); + self.filter_total_number_of_criteria = + self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); + for (key, value) in used_syntax.into_iter() { + let used_syntax = self.used_syntax.entry(key).or_insert(0); + *used_syntax = used_syntax.saturating_add(value); + } + + // attributes_to_search_on + self.attributes_to_search_on_total_number_of_uses = self + .attributes_to_search_on_total_number_of_uses + .saturating_add(attributes_to_search_on_total_number_of_uses); + + // q + self.max_terms_number = self.max_terms_number.max(max_terms_number); + + // vector + self.max_vector_size = self.max_vector_size.max(max_vector_size); + self.retrieve_vectors |= retrieve_vectors; + self.semantic_ratio |= semantic_ratio; + self.hybrid |= hybrid; + + // pagination + self.max_limit = self.max_limit.max(max_limit); + self.max_offset = self.max_offset.max(max_offset); + self.finite_pagination += finite_pagination; + + // formatting + self.max_attributes_to_retrieve = + self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); + self.max_attributes_to_highlight = + self.max_attributes_to_highlight.max(max_attributes_to_highlight); + self.highlight_pre_tag |= highlight_pre_tag; + self.highlight_post_tag |= highlight_post_tag; + self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop); + self.crop_marker |= crop_marker; + self.show_matches_position |= show_matches_position; + self.crop_length |= crop_length; + + // facets + self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms); + self.facets_total_number_of_facets = + self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets); + + // matching strategy + for (key, value) in matching_strategy.into_iter() { + let matching_strategy = self.matching_strategy.entry(key).or_insert(0); + *matching_strategy = matching_strategy.saturating_add(value); + } + + // scoring + self.show_ranking_score |= show_ranking_score; + self.show_ranking_score_details |= show_ranking_score_details; + self.ranking_score_threshold |= ranking_score_threshold; + + // locales + self.locales.append(&mut locales); + + self + } + + fn into_event(self: Box) -> serde_json::Value { + let Self { + total_received, + total_succeeded, + time_spent, + sort_with_geo_point, + sort_sum_of_criteria_terms, + sort_total_number_of_criteria, + distinct, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + attributes_to_search_on_total_number_of_uses, + max_terms_number, + max_vector_size, + retrieve_vectors, + matching_strategy, + max_limit, + max_offset, + finite_pagination, + max_attributes_to_retrieve, + max_attributes_to_highlight, + highlight_pre_tag, + highlight_post_tag, + max_attributes_to_crop, + crop_marker, + show_matches_position, + crop_length, + facets_sum_of_terms, + facets_total_number_of_facets, + show_ranking_score, + show_ranking_score_details, + semantic_ratio, + hybrid, + total_degraded, + total_used_negative_operator, + ranking_score_threshold, + locales, + marker: _, + } = *self; + + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // the index of the 99th percentage of value + let percentile_99th = time_spent.len() * 99 / 100; + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th); + + json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + "total_degraded": total_degraded, + "total_used_negative_operator": total_used_negative_operator, + }, + "sort": { + "with_geoPoint": sort_with_geo_point, + "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), + }, + "distinct": distinct, + "filter": { + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "attributes_to_search_on": { + "total_number_of_uses": attributes_to_search_on_total_number_of_uses, + }, + "q": { + "max_terms_number": max_terms_number, + }, + "vector": { + "max_vector_size": max_vector_size, + "retrieve_vectors": retrieve_vectors, + }, + "hybrid": { + "enabled": hybrid, + "semantic_ratio": semantic_ratio, + }, + "pagination": { + "max_limit": max_limit, + "max_offset": max_offset, + "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, + }, + "formatting": { + "max_attributes_to_retrieve": max_attributes_to_retrieve, + "max_attributes_to_highlight": max_attributes_to_highlight, + "highlight_pre_tag": highlight_pre_tag, + "highlight_post_tag": highlight_post_tag, + "max_attributes_to_crop": max_attributes_to_crop, + "crop_marker": crop_marker, + "show_matches_position": show_matches_position, + "crop_length": crop_length, + }, + "facets": { + "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), + }, + "matching_strategy": { + "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "locales": locales, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + "ranking_score_threshold": ranking_score_threshold, + }, + }) + } +} diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 33df6bdad..79f42f0aa 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -13,10 +13,10 @@ use serde_json::Value; use tracing::debug; use super::ActionPolicy; -use crate::analytics::segment_analytics::{SimilarGET, SimilarPOST}; -use crate::analytics::{Analytics, SimilarAggregator}; +use crate::analytics::Analytics; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST}; use crate::search::{ add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind, SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, diff --git a/meilisearch/src/routes/indexes/similar_analytics.rs b/meilisearch/src/routes/indexes/similar_analytics.rs new file mode 100644 index 000000000..69685a56c --- /dev/null +++ b/meilisearch/src/routes/indexes/similar_analytics.rs @@ -0,0 +1,235 @@ +use std::collections::{BinaryHeap, HashMap}; + +use once_cell::sync::Lazy; +use regex::Regex; +use serde_json::{json, Value}; + +use crate::{ + aggregate_methods, + analytics::{Aggregate, AggregateMethod}, + search::{SimilarQuery, SimilarResult}, +}; + +aggregate_methods!( + SimilarPOST => "Similar POST", + SimilarGET => "Similar GET", +); + +#[derive(Default)] +pub struct SimilarAggregator { + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // filter + filter_with_geo_radius: bool, + filter_with_geo_bounding_box: bool, + // every time a request has a filter, this field must be incremented by the number of terms it contains + filter_sum_of_criteria_terms: usize, + // every time a request has a filter, this field must be incremented by one + filter_total_number_of_criteria: usize, + used_syntax: HashMap, + + // Whether a non-default embedder was specified + retrieve_vectors: bool, + + // pagination + max_limit: usize, + max_offset: usize, + + // formatting + max_attributes_to_retrieve: usize, + + // scoring + show_ranking_score: bool, + show_ranking_score_details: bool, + ranking_score_threshold: bool, + + marker: std::marker::PhantomData, +} + +impl SimilarAggregator { + #[allow(clippy::field_reassign_with_default)] + pub fn from_query(query: &SimilarQuery) -> Self { + let SimilarQuery { + id: _, + embedder: _, + offset, + limit, + attributes_to_retrieve: _, + retrieve_vectors, + show_ranking_score, + show_ranking_score_details, + filter, + ranking_score_threshold, + } = query; + + let mut ret = Self::default(); + + ret.total_received = 1; + + if let Some(ref filter) = filter { + static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + ret.filter_total_number_of_criteria = 1; + + let syntax = match filter { + Value::String(_) => "string".to_string(), + Value::Array(values) => { + if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { + "mixed".to_string() + } else { + "array".to_string() + } + } + _ => "none".to_string(), + }; + // convert the string to a HashMap + ret.used_syntax.insert(syntax, 1); + + let stringified_filters = filter.to_string(); + ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); + ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); + ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); + } + + ret.max_limit = *limit; + ret.max_offset = *offset; + + ret.show_ranking_score = *show_ranking_score; + ret.show_ranking_score_details = *show_ranking_score_details; + ret.ranking_score_threshold = ranking_score_threshold.is_some(); + + ret.retrieve_vectors = *retrieve_vectors; + + ret + } + + pub fn succeed(&mut self, result: &SimilarResult) { + let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result; + + self.total_succeeded = self.total_succeeded.saturating_add(1); + + self.time_spent.push(*processing_time_ms as usize); + } +} + +impl Aggregate for SimilarAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + /// Aggregate one [SimilarAggregator] into another. + fn aggregate(mut self: Box, new: Box) -> Box { + let Self { + total_received, + total_succeeded, + mut time_spent, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + max_limit, + max_offset, + max_attributes_to_retrieve, + show_ranking_score, + show_ranking_score_details, + ranking_score_threshold, + retrieve_vectors, + marker: _, + } = *new; + + // request + self.total_received = self.total_received.saturating_add(total_received); + self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.time_spent.append(&mut time_spent); + + // filter + self.filter_with_geo_radius |= filter_with_geo_radius; + self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; + self.filter_sum_of_criteria_terms = + self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); + self.filter_total_number_of_criteria = + self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); + for (key, value) in used_syntax.into_iter() { + let used_syntax = self.used_syntax.entry(key).or_insert(0); + *used_syntax = used_syntax.saturating_add(value); + } + + self.retrieve_vectors |= retrieve_vectors; + + // pagination + self.max_limit = self.max_limit.max(max_limit); + self.max_offset = self.max_offset.max(max_offset); + + // formatting + self.max_attributes_to_retrieve = + self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); + + // scoring + self.show_ranking_score |= show_ranking_score; + self.show_ranking_score_details |= show_ranking_score_details; + self.ranking_score_threshold |= ranking_score_threshold; + + self + } + + fn into_event(self: Box) -> serde_json::Value { + let Self { + total_received, + total_succeeded, + time_spent, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + max_limit, + max_offset, + max_attributes_to_retrieve, + show_ranking_score, + show_ranking_score_details, + ranking_score_threshold, + retrieve_vectors, + marker: _, + } = *self; + + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // the index of the 99th percentage of value + let percentile_99th = time_spent.len() * 99 / 100; + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th); + + json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "filter": { + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "vector": { + "retrieve_vectors": retrieve_vectors, + }, + "pagination": { + "max_limit": max_limit, + "max_offset": max_offset, + }, + "formatting": { + "max_attributes_to_retrieve": max_attributes_to_retrieve, + }, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + "ranking_score_threshold": ranking_score_threshold, + } + }) + } +} diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index c25aeee70..b7260ea08 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -25,6 +25,7 @@ pub mod indexes; mod logs; mod metrics; mod multi_search; +mod multi_search_analytics; mod snapshot; mod swap_indexes; pub mod tasks; diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 13a39cb44..b7bd31716 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -9,7 +9,7 @@ use meilisearch_types::keys::actions; use serde::Serialize; use tracing::debug; -use crate::analytics::{Analytics, MultiSearchAggregator}; +use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::{AuthenticationError, GuardedData}; @@ -21,6 +21,8 @@ use crate::search::{ }; use crate::search_queue::SearchQueue; +use super::multi_search_analytics::MultiSearchAggregator; + pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post)))); } diff --git a/meilisearch/src/routes/multi_search_analytics.rs b/meilisearch/src/routes/multi_search_analytics.rs new file mode 100644 index 000000000..be1218399 --- /dev/null +++ b/meilisearch/src/routes/multi_search_analytics.rs @@ -0,0 +1,170 @@ +use std::collections::HashSet; + +use serde_json::json; + +use crate::{ + analytics::Aggregate, + search::{FederatedSearch, SearchQueryWithIndex}, +}; + +#[derive(Default)] +pub struct MultiSearchAggregator { + // requests + total_received: usize, + total_succeeded: usize, + + // sum of the number of distinct indexes in each single request, use with total_received to compute an avg + total_distinct_index_count: usize, + // number of queries with a single index, use with total_received to compute a proportion + total_single_index: usize, + + // sum of the number of search queries in the requests, use with total_received to compute an average + total_search_count: usize, + + // scoring + show_ranking_score: bool, + show_ranking_score_details: bool, + + // federation + use_federation: bool, +} + +impl MultiSearchAggregator { + pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { + let use_federation = federated_search.federation.is_some(); + + let distinct_indexes: HashSet<_> = federated_search + .queries + .iter() + .map(|query| { + let query = &query; + // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex + let SearchQueryWithIndex { + index_uid, + federation_options: _, + q: _, + vector: _, + offset: _, + limit: _, + page: _, + hits_per_page: _, + attributes_to_retrieve: _, + retrieve_vectors: _, + attributes_to_crop: _, + crop_length: _, + attributes_to_highlight: _, + show_ranking_score: _, + show_ranking_score_details: _, + show_matches_position: _, + filter: _, + sort: _, + distinct: _, + facets: _, + highlight_pre_tag: _, + highlight_post_tag: _, + crop_marker: _, + matching_strategy: _, + attributes_to_search_on: _, + hybrid: _, + ranking_score_threshold: _, + locales: _, + } = query; + + index_uid.as_str() + }) + .collect(); + + let show_ranking_score = + federated_search.queries.iter().any(|query| query.show_ranking_score); + let show_ranking_score_details = + federated_search.queries.iter().any(|query| query.show_ranking_score_details); + + Self { + total_received: 1, + total_succeeded: 0, + total_distinct_index_count: distinct_indexes.len(), + total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, + total_search_count: federated_search.queries.len(), + show_ranking_score, + show_ranking_score_details, + use_federation, + } + } + + pub fn succeed(&mut self) { + self.total_succeeded = self.total_succeeded.saturating_add(1); + } +} + +impl Aggregate for MultiSearchAggregator { + fn event_name(&self) -> &'static str { + "Documents Searched by Multi-Search POST" + } + + /// Aggregate one [MultiSearchAggregator] into another. + fn aggregate(self: Box, new: Box) -> Box { + // write the aggregate in a way that will cause a compilation error if a field is added. + + // get ownership of self, replacing it by a default value. + let this = *self; + + let total_received = this.total_received.saturating_add(new.total_received); + let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); + let total_distinct_index_count = + this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); + let total_single_index = this.total_single_index.saturating_add(new.total_single_index); + let total_search_count = this.total_search_count.saturating_add(new.total_search_count); + let show_ranking_score = this.show_ranking_score || new.show_ranking_score; + let show_ranking_score_details = + this.show_ranking_score_details || new.show_ranking_score_details; + let use_federation = this.use_federation || new.use_federation; + + Box::new(Self { + total_received, + total_succeeded, + total_distinct_index_count, + total_single_index, + total_search_count, + show_ranking_score, + show_ranking_score_details, + use_federation, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + let Self { + total_received, + total_succeeded, + total_distinct_index_count, + total_single_index, + total_search_count, + show_ranking_score, + show_ranking_score_details, + use_federation, + } = *self; + + json!({ + "requests": { + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "indexes": { + "total_single_index": total_single_index, + "total_distinct_index_count": total_distinct_index_count, + "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early + }, + "searches": { + "total_search_count": total_search_count, + "avg_search_count": (total_search_count as f64) / (total_received as f64), + }, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + }, + "federation": { + "use_federation": use_federation, + } + }) + } +}