diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs deleted file mode 100644 index 54b8d4f1b..000000000 --- a/meilisearch/src/analytics/mock_analytics.rs +++ /dev/null @@ -1,109 +0,0 @@ -use std::any::Any; -use std::sync::Arc; - -use actix_web::HttpRequest; -use meilisearch_types::InstanceUid; -use serde_json::Value; - -use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind}; -use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; -use crate::Opt; - -pub struct MockAnalytics { - instance_uid: Option, -} - -#[derive(Default)] -pub struct SearchAggregator; - -#[allow(dead_code)] -impl SearchAggregator { - pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self, _: &dyn Any) {} -} - -#[derive(Default)] -pub struct SimilarAggregator; - -#[allow(dead_code)] -impl SimilarAggregator { - pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self, _: &dyn Any) {} -} - -#[derive(Default)] -pub struct MultiSearchAggregator; - -#[allow(dead_code)] -impl MultiSearchAggregator { - pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self) {} -} - -#[derive(Default)] -pub struct FacetSearchAggregator; - -#[allow(dead_code)] -impl FacetSearchAggregator { - pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self - } - - pub fn succeed(&mut self, _: &dyn Any) {} -} - -impl MockAnalytics { - #[allow(clippy::new_ret_no_self)] - pub fn new(opt: &Opt) -> Arc { - let instance_uid = find_user_id(&opt.db_path); - Arc::new(Self { instance_uid }) - } -} - -impl Analytics for MockAnalytics { - fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> { - self.instance_uid.as_ref() - } - - // These methods are noop and should be optimized out - fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} - fn get_search(&self, _aggregate: super::SearchAggregator) {} - fn post_search(&self, _aggregate: super::SearchAggregator) {} - fn get_similar(&self, _aggregate: super::SimilarAggregator) {} - fn post_similar(&self, _aggregate: super::SimilarAggregator) {} - fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {} - fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {} - fn add_documents( - &self, - _documents_query: &UpdateDocumentsQuery, - _index_creation: bool, - _request: &HttpRequest, - ) { - } - fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {} - fn update_documents( - &self, - _documents_query: &UpdateDocumentsQuery, - _index_creation: bool, - _request: &HttpRequest, - ) { - } - fn update_documents_by_function( - &self, - _documents_query: &DocumentEditionByFunction, - _index_creation: bool, - _request: &HttpRequest, - ) { - } - fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} - fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} -} diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 3c7ca0ed3..a8658d830 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -1,45 +1,51 @@ -mod mock_analytics; -#[cfg(feature = "analytics")] -mod segment_analytics; +pub mod segment_analytics; +use std::any::TypeId; +use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; use std::str::FromStr; use actix_web::HttpRequest; use meilisearch_types::InstanceUid; -pub use mock_analytics::MockAnalytics; use once_cell::sync::Lazy; use platform_dirs::AppDirs; -use serde_json::Value; - -use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; - -// if the analytics feature is disabled -// the `SegmentAnalytics` point to the mock instead of the real analytics -#[cfg(not(feature = "analytics"))] -pub type SegmentAnalytics = mock_analytics::MockAnalytics; -#[cfg(not(feature = "analytics"))] -pub type SearchAggregator = mock_analytics::SearchAggregator; -#[cfg(not(feature = "analytics"))] -pub type SimilarAggregator = mock_analytics::SimilarAggregator; -#[cfg(not(feature = "analytics"))] -pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator; -#[cfg(not(feature = "analytics"))] -pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator; +use segment::message::User; +use serde::Serialize; // if the feature analytics is enabled we use the real analytics -#[cfg(feature = "analytics")] pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; -#[cfg(feature = "analytics")] -pub type SearchAggregator = segment_analytics::SearchAggregator; -#[cfg(feature = "analytics")] +pub use segment_analytics::SearchAggregator; pub type SimilarAggregator = segment_analytics::SimilarAggregator; -#[cfg(feature = "analytics")] pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; -#[cfg(feature = "analytics")] pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; +/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name. +#[macro_export] +macro_rules! empty_analytics { + ($struct_name:ident, $event_name:literal) => { + #[derive(Default)] + struct $struct_name {} + + impl $crate::analytics::Aggregate for $struct_name { + fn event_name(&self) -> &'static str { + $event_name + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + self + } + + fn into_event(self) -> serde_json::Value { + serde_json::json!({}) + } + } + }; +} + /// The Meilisearch config dir: /// `~/.config/Meilisearch` on *NIX or *BSD. /// `~/Library/ApplicationSupport` on macOS. @@ -78,60 +84,73 @@ pub enum DocumentFetchKind { Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, } -pub trait Analytics: Sync + Send { - fn instance_uid(&self) -> Option<&InstanceUid>; +pub trait Aggregate { + fn event_name(&self) -> &'static str; + + fn aggregate(self, other: Self) -> Self + where + Self: Sized; + + fn into_event(self) -> impl Serialize + where + Self: Sized; +} + +/// Helper trait to define multiple aggregate with the same content but a different name. +/// Commonly used when you must aggregate a search with POST or with GET for example. +pub trait AggregateMethod { + fn event_name() -> &'static str; +} + +/// A macro used to quickly define multiple aggregate method with their name +#[macro_export] +macro_rules! aggregate_methods { + ($method:ident => $event_name:literal) => { + pub enum $method {} + + impl $crate::analytics::AggregateMethod for $method { + fn event_name() -> &'static str { + $event_name + } + } + }; + ($($method:ident => $event_name:literal,)+) => { + $( + aggregate_methods!($method => $event_name); + )+ + + }; +} + +pub struct Analytics { + // TODO: TAMO: remove + inner: Option, + + instance_uid: Option, + user: Option, + events: HashMap>, +} + +impl Analytics { + fn no_analytics() -> Self { + Self { inner: None, events: HashMap::new(), instance_uid: None, user: None } + } + + fn segment_analytics(segment: SegmentAnalytics) -> Self { + Self { + instance_uid: Some(segment.instance_uid), + user: Some(segment.user), + inner: Some(segment), + events: HashMap::new(), + } + } + + pub fn instance_uid(&self) -> Option<&InstanceUid> { + self.instance_uid + } /// The method used to publish most analytics that do not need to be batched every hours - fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); - - /// This method should be called to aggregate a get search - fn get_search(&self, aggregate: SearchAggregator); - - /// This method should be called to aggregate a post search - fn post_search(&self, aggregate: SearchAggregator); - - /// This method should be called to aggregate a get similar request - fn get_similar(&self, aggregate: SimilarAggregator); - - /// This method should be called to aggregate a post similar request - fn post_similar(&self, aggregate: SimilarAggregator); - - /// This method should be called to aggregate a post array of searches - fn post_multi_search(&self, aggregate: MultiSearchAggregator); - - /// This method should be called to aggregate post facet values searches - fn post_facet_search(&self, aggregate: FacetSearchAggregator); - - // this method should be called to aggregate an add documents request - fn add_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); - - // this method should be called to aggregate a fetch documents request - fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); - - // this method should be called to aggregate a fetch documents request - fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); - - // this method should be called to aggregate a add documents request - fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest); - - // this method should be called to batch an update documents request - fn update_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); - - // this method should be called to batch an update documents by function request - fn update_documents_by_function( - &self, - documents_query: &DocumentEditionByFunction, - index_creation: bool, - request: &HttpRequest, - ); + pub fn publish(&self, send: impl Aggregate, request: Option<&HttpRequest>) { + let Some(segment) = self.inner else { return }; + } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 476b3264e..8a6dfd780 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -25,7 +25,8 @@ use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; use super::{ - config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, + config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind, + MEILISEARCH_CONFIG_PATH, }; use crate::analytics::Analytics; use crate::option::{ @@ -40,7 +41,7 @@ use crate::search::{ DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO, }; -use crate::Opt; +use crate::{aggregate_methods, Opt}; const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; @@ -87,9 +88,9 @@ pub enum AnalyticsMsg { } pub struct SegmentAnalytics { - instance_uid: InstanceUid, + pub instance_uid: InstanceUid, sender: Sender, - user: User, + pub user: User, } impl SegmentAnalytics { @@ -98,7 +99,7 @@ impl SegmentAnalytics { opt: &Opt, index_scheduler: Arc, auth_controller: Arc, - ) -> Arc { + ) -> Arc { let instance_uid = super::find_user_id(&opt.db_path); let first_time_run = instance_uid.is_none(); let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4); @@ -108,7 +109,7 @@ impl SegmentAnalytics { // if reqwest throws an error we won't be able to send analytics if client.is_err() { - return super::MockAnalytics::new(opt); + return Arc::new(Analytics::no_analytics()); } let client = @@ -161,10 +162,11 @@ impl SegmentAnalytics { let this = Self { instance_uid, sender, user: user.clone() }; - Arc::new(this) + Arc::new(Analytics::segment_analytics(this)) } } +/* impl super::Analytics for SegmentAnalytics { fn instance_uid(&self) -> Option<&InstanceUid> { Some(&self.instance_uid) @@ -253,6 +255,7 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate)); } } +*/ /// This structure represent the `infos` field we send in the analytics. /// It's quite close to the `Opt` structure except all sensitive informations @@ -607,12 +610,7 @@ impl Segment { } #[derive(Default)] -pub struct SearchAggregator { - timestamp: Option, - - // context - user_agents: HashSet, - +pub struct SearchAggregator { // requests total_received: usize, total_succeeded: usize, @@ -684,9 +682,11 @@ pub struct SearchAggregator { show_ranking_score: bool, show_ranking_score_details: bool, ranking_score_threshold: bool, + + marker: std::marker::PhantomData, } -impl SearchAggregator { +impl SearchAggregator { #[allow(clippy::field_reassign_with_default)] pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self { let SearchQuery { @@ -827,12 +827,21 @@ impl SearchAggregator { } self.time_spent.push(*processing_time_ms as usize); } +} - /// Aggregate one [SearchAggregator] into another. - pub fn aggregate(&mut self, mut other: Self) { +aggregate_methods!( + SearchGET => "Documents Searched GET", + SearchPOST => "Documents Searched POST", + +); + +impl Aggregate for SearchAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(mut self, mut other: Self) -> Self { let Self { - timestamp, - user_agents, total_received, total_succeeded, ref mut time_spent, @@ -871,17 +880,9 @@ impl SearchAggregator { total_used_negative_operator, ranking_score_threshold, ref mut locales, + marker: _, } = other; - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // context - for user_agent in user_agents.into_iter() { - self.user_agents.insert(user_agent); - } - // request self.total_received = self.total_received.saturating_add(total_received); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); @@ -961,12 +962,12 @@ impl SearchAggregator { // locales self.locales.append(locales); + + self } - pub fn into_event(self, user: &User, event_name: &str) -> Option { + fn into_event(self) -> Option { let Self { - timestamp, - user_agents, total_received, total_succeeded, time_spent, @@ -1005,90 +1006,78 @@ impl SearchAggregator { total_used_negative_operator, ranking_score_threshold, locales, + marker: _, } = self; - if total_received == 0 { - None - } else { - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // the index of the 99th percentage of value - let percentile_99th = time_spent.len() * 99 / 100; - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th); + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // the index of the 99th percentage of value + let percentile_99th = time_spent.len() * 99 / 100; + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th); - let properties = json!({ - "user-agent": user_agents, - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - "total_degraded": total_degraded, - "total_used_negative_operator": total_used_negative_operator, - }, - "sort": { - "with_geoPoint": sort_with_geo_point, - "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), - }, - "distinct": distinct, - "filter": { - "with_geoRadius": filter_with_geo_radius, - "with_geoBoundingBox": filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), - "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "attributes_to_search_on": { - "total_number_of_uses": attributes_to_search_on_total_number_of_uses, - }, - "q": { - "max_terms_number": max_terms_number, - }, - "vector": { - "max_vector_size": max_vector_size, - "retrieve_vectors": retrieve_vectors, - }, - "hybrid": { - "enabled": hybrid, - "semantic_ratio": semantic_ratio, - }, - "pagination": { - "max_limit": max_limit, - "max_offset": max_offset, - "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, - }, - "formatting": { - "max_attributes_to_retrieve": max_attributes_to_retrieve, - "max_attributes_to_highlight": max_attributes_to_highlight, - "highlight_pre_tag": highlight_pre_tag, - "highlight_post_tag": highlight_post_tag, - "max_attributes_to_crop": max_attributes_to_crop, - "crop_marker": crop_marker, - "show_matches_position": show_matches_position, - "crop_length": crop_length, - }, - "facets": { - "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), - }, - "matching_strategy": { - "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "locales": locales, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - "ranking_score_threshold": ranking_score_threshold, - }, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } + json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + "total_degraded": total_degraded, + "total_used_negative_operator": total_used_negative_operator, + }, + "sort": { + "with_geoPoint": sort_with_geo_point, + "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), + }, + "distinct": distinct, + "filter": { + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "attributes_to_search_on": { + "total_number_of_uses": attributes_to_search_on_total_number_of_uses, + }, + "q": { + "max_terms_number": max_terms_number, + }, + "vector": { + "max_vector_size": max_vector_size, + "retrieve_vectors": retrieve_vectors, + }, + "hybrid": { + "enabled": hybrid, + "semantic_ratio": semantic_ratio, + }, + "pagination": { + "max_limit": max_limit, + "max_offset": max_offset, + "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, + }, + "formatting": { + "max_attributes_to_retrieve": max_attributes_to_retrieve, + "max_attributes_to_highlight": max_attributes_to_highlight, + "highlight_pre_tag": highlight_pre_tag, + "highlight_post_tag": highlight_post_tag, + "max_attributes_to_crop": max_attributes_to_crop, + "crop_marker": crop_marker, + "show_matches_position": show_matches_position, + "crop_length": crop_length, + }, + "facets": { + "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), + }, + "matching_strategy": { + "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "locales": locales, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + "ranking_score_threshold": ranking_score_threshold, + }, + }) } } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index b24f18fae..80177876a 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -120,7 +120,7 @@ pub fn create_app( search_queue: Data, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Arc, enable_dashboard: bool, ) -> actix_web::App< impl ServiceFactory< @@ -473,7 +473,7 @@ pub fn configure_data( search_queue: Data, opt: &Opt, (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; config diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 7f3cd06a5..0fdeef5ed 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -4,7 +4,6 @@ use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; -use serde_json::json; use tracing::debug; use crate::analytics::Analytics; @@ -18,14 +17,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); } +crate::empty_analytics!(DumpAnalytics, "Dump Created"); + pub async fn create_dump( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { - analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); + analytics.publish(DumpAnalytics::default(), Some(&req)); let task = KindWithContent::DumpCreation { keys: auth_controller.list_keys()?, diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index bc656bdbb..24c89938d 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -6,10 +6,11 @@ use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; +use serde::Serialize; use serde_json::json; use tracing::debug; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; @@ -22,17 +23,19 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } +crate::empty_analytics!(GetExperimentalFeatureAnalytics, "Experimental features Seen"); + async fn get_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>, Data, >, req: HttpRequest, - analytics: Data, + analytics: Data, ) -> HttpResponse { let features = index_scheduler.features(); - analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); + analytics.publish(GetExperimentalFeatureAnalytics::default(), Some(&req)); let features = features.runtime_features(); debug!(returns = ?features, "Get features"); HttpResponse::Ok().json(features) @@ -53,6 +56,38 @@ pub struct RuntimeTogglableFeatures { pub contains_filter: Option, } +#[derive(Serialize)] +pub struct PatchExperimentalFeatureAnalytics { + vector_store: bool, + metrics: bool, + logs_route: bool, + edit_documents_by_function: bool, + contains_filter: bool, +} + +impl Aggregate for PatchExperimentalFeatureAnalytics { + fn event_name(&self) -> &'static str { + "Experimental features Updated" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + vector_store: other.vector_store, + metrics: other.metrics, + logs_route: other.logs_route, + edit_documents_by_function: other.edit_documents_by_function, + contains_filter: other.contains_filter, + } + } + + fn into_event(self) -> serde_json::Value { + serde_json::to_value(self).unwrap() + } +} + async fn patch_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>, @@ -60,7 +95,7 @@ async fn patch_features( >, new_features: AwebJson, req: HttpRequest, - analytics: Data, + analytics: Data, ) -> Result { let features = index_scheduler.features(); debug!(parameters = ?new_features, "Patch features"); @@ -89,14 +124,13 @@ async fn patch_features( } = new_features; analytics.publish( - "Experimental features Updated".to_string(), - json!({ - "vector_store": vector_store, - "metrics": metrics, - "logs_route": logs_route, - "edit_documents_by_function": edit_documents_by_function, - "contains_filter": contains_filter, - }), + PatchExperimentalFeatureAnalytics { + vector_store, + metrics, + logs_route, + edit_documents_by_function, + contains_filter, + }, Some(&req), ); index_scheduler.put_runtime_features(new_features)?; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 85cf33c54..8f4cd026d 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -1,4 +1,6 @@ +use std::collections::HashSet; use std::io::ErrorKind; +use std::marker::PhantomData; use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; @@ -23,14 +25,14 @@ use meilisearch_types::tasks::KindWithContent; use meilisearch_types::{milli, Document, Index}; use mime::Mime; use once_cell::sync::Lazy; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tracing::debug; -use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; +use crate::analytics::{Aggregate, AggregateMethod, Analytics, DocumentDeletionKind}; use crate::error::MeilisearchHttpError; use crate::error::PayloadError::ReceivePayload; use crate::extractors::authentication::policies::*; @@ -41,7 +43,7 @@ use crate::routes::{ get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, }; use crate::search::{parse_filter, RetrieveVectors}; -use crate::Opt; +use crate::{aggregate_methods, Opt}; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] @@ -100,12 +102,82 @@ pub struct GetDocument { retrieve_vectors: Param, } +#[derive(Default, Serialize)] +pub struct DocumentsFetchAggregator { + #[serde(rename = "requests.total_received")] + total_received: usize, + + // a call on ../documents/:doc_id + per_document_id: bool, + // if a filter was used + per_filter: bool, + + #[serde(rename = "vector.retrieve_vectors")] + retrieve_vectors: bool, + + // pagination + #[serde(rename = "pagination.max_limit")] + max_limit: usize, + #[serde(rename = "pagination.max_offset")] + max_offset: usize, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentFetchKind { + PerDocumentId { retrieve_vectors: bool }, + Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, +} + +impl DocumentsFetchAggregator { + pub fn from_query(query: &DocumentFetchKind) -> Self { + let (limit, offset, retrieve_vectors) = match query { + DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), + DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { + (*limit, *offset, *retrieve_vectors) + } + }; + Self { + total_received: 1, + per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), + per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), + max_limit: limit, + max_offset: offset, + retrieve_vectors, + } + } +} + +impl Aggregate for DocumentsFetchAggregator { + // TODO: TAMO: Should we do the same event for the GET requests + fn event_name(&self) -> &'static str { + "Documents Fetched POST" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + total_received: self.total_received.saturating_add(other.total_received), + per_document_id: self.per_document_id | other.per_document_id, + per_filter: self.per_filter | other.per_filter, + retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors, + max_limit: self.max_limit.max(other.max_limit), + max_offset: self.max_offset.max(other.max_offset), + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn get_document( index_scheduler: GuardedData, Data>, document_param: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); debug!(parameters = ?params, "Get document"); @@ -117,9 +189,12 @@ pub async fn get_document( let features = index_scheduler.features(); let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?; - analytics.get_fetch_documents( - &DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 }, - &req, + analytics.publish( + DocumentsFetchAggregator { + retrieve_vectors: param_retrieve_vectors.0, + ..Default::default() + }, + Some(&req), ); let index = index_scheduler.index(&index_uid)?; @@ -129,17 +204,57 @@ pub async fn get_document( Ok(HttpResponse::Ok().json(document)) } +#[derive(Default, Serialize)] +pub struct DocumentsDeletionAggregator { + #[serde(rename = "requests.total_received")] + total_received: usize, + per_document_id: bool, + clear_all: bool, + per_batch: bool, + per_filter: bool, +} + +impl Aggregate for DocumentsDeletionAggregator { + fn event_name(&self) -> &'static str { + "Documents Deleted" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + total_received: self.total_received.saturating_add(other.total_received), + per_document_id: self.per_document_id | other.per_document_id, + clear_all: self.clear_all | other.clear_all, + per_batch: self.per_batch | other.per_batch, + per_filter: self.per_filter | other.per_filter, + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn delete_document( index_scheduler: GuardedData, Data>, path: web::Path, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = path.into_inner(); let index_uid = IndexUid::try_from(index_uid)?; - analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req); + analytics.publish( + DocumentsDeletionAggregator { + total_received: 1, + per_document_id: true, + ..Default::default() + }, + Some(&req), + ); let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), @@ -190,19 +305,21 @@ pub async fn documents_by_query_post( index_uid: web::Path, body: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let body = body.into_inner(); debug!(parameters = ?body, "Get documents POST"); - analytics.post_fetch_documents( - &DocumentFetchKind::Normal { - with_filter: body.filter.is_some(), - limit: body.limit, - offset: body.offset, + analytics.publish( + DocumentsFetchAggregator { + total_received: 1, + per_filter: body.filter.is_some(), retrieve_vectors: body.retrieve_vectors, + max_limit: body.limit, + max_offset: body.offset, + ..Default::default() }, - &req, + Some(&req), ); documents_by_query(&index_scheduler, index_uid, body) @@ -213,7 +330,7 @@ pub async fn get_documents( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Get documents GET"); @@ -235,14 +352,16 @@ pub async fn get_documents( filter, }; - analytics.get_fetch_documents( - &DocumentFetchKind::Normal { - with_filter: query.filter.is_some(), - limit: query.limit, - offset: query.offset, + analytics.publish( + DocumentsFetchAggregator { + total_received: 1, + per_filter: query.filter.is_some(), retrieve_vectors: query.retrieve_vectors, + max_limit: query.limit, + max_offset: query.offset, + ..Default::default() }, - &req, + Some(&req), ); documents_by_query(&index_scheduler, index_uid, query) @@ -298,6 +417,42 @@ fn from_char_csv_delimiter( } } +aggregate_methods!( + Replaced => "Documents Added", + Updated => "Documents Updated", +); + +#[derive(Default, Serialize)] +pub struct DocumentsAggregator { + payload_types: HashSet, + primary_key: HashSet, + index_creation: bool, + #[serde(skip)] + method: PhantomData, +} + +impl Aggregate for DocumentsAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(mut self, other: Self) -> Self + where + Self: Sized, + { + Self { + payload_types: self.payload_types.union(&other.payload_types).collect(), + primary_key: self.primary_key.union(&other.primary_key).collect(), + index_creation: self.index_creation | other.index_creation, + method: PhantomData, + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn replace_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -305,17 +460,33 @@ pub async fn replace_documents( body: Payload, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; debug!(parameters = ?params, "Replace documents"); let params = params.into_inner(); - analytics.add_documents( - ¶ms, - index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), - &req, + let mut content_types = HashSet::new(); + let content_type = req + .headers() + .get(CONTENT_TYPE) + .and_then(|s| s.to_str().ok()) + .unwrap_or("unknown") + .to_string(); + content_types.insert(content_type); + let mut primary_keys = HashSet::new(); + if let Some(primary_key) = params.primary_key.clone() { + primary_keys.insert(primary_key); + } + analytics.publish( + DocumentsAggregator:: { + payload_types: content_types, + primary_key: primary_keys, + index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + method: PhantomData, + }, + Some(&req), ); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); @@ -346,17 +517,33 @@ pub async fn update_documents( body: Payload, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let params = params.into_inner(); debug!(parameters = ?params, "Update documents"); - analytics.add_documents( - ¶ms, - index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), - &req, + let mut content_types = HashSet::new(); + let content_type = req + .headers() + .get(CONTENT_TYPE) + .and_then(|s| s.to_str().ok()) + .unwrap_or("unknown") + .to_string(); + content_types.insert(content_type); + let mut primary_keys = HashSet::new(); + if let Some(primary_key) = params.primary_key.clone() { + primary_keys.insert(primary_key); + } + analytics.publish( + DocumentsAggregator:: { + payload_types: content_types, + primary_key: primary_keys, + index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + method: PhantomData, + }, + Some(&req), ); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); @@ -524,12 +711,15 @@ pub async fn delete_documents_batch( body: web::Json>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by batch"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); + analytics.publish( + DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() }, + Some(&req), + ); let ids = body .iter() @@ -562,14 +752,17 @@ pub async fn delete_documents_by_filter( body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by filter"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; - analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); + analytics.publish( + DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() }, + Some(&req), + ); // we ensure the filter is well formed before enqueuing it crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())? @@ -599,13 +792,44 @@ pub struct DocumentEditionByFunction { pub function: String, } +#[derive(Default, Serialize)] +struct EditDocumentsByFunctionAggregator { + // Set to true if at least one request was filtered + filtered: bool, + // Set to true if at least one request contained a context + with_context: bool, + + index_creation: bool, +} + +impl Aggregate for EditDocumentsByFunctionAggregator { + fn event_name(&self) -> &'static str { + "Documents Edited By Function" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + filtered: self.filtered | other.filtered, + with_context: self.with_context | other.with_context, + index_creation: self.index_creation | other.index_creation, + } + } + + fn into_event(self) -> Value { + serde_json::to_value(self).unwrap() + } +} + pub async fn edit_documents_by_function( index_scheduler: GuardedData, Data>, index_uid: web::Path, params: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Edit documents by function"); @@ -617,10 +841,13 @@ pub async fn edit_documents_by_function( let index_uid = index_uid.into_inner(); let params = params.into_inner(); - analytics.update_documents_by_function( - ¶ms, - index_scheduler.index(&index_uid).is_err(), - &req, + analytics.publish( + EditDocumentsByFunctionAggregator { + filtered: params.filter.is_some(), + with_context: params.context.is_some(), + index_creation: index_scheduler.index(&index_uid).is_err(), + }, + Some(&req), ); let DocumentEditionByFunction { filter, context, function } = params; @@ -670,10 +897,13 @@ pub async fn clear_all_documents( index_uid: web::Path, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); + analytics.publish( + DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() }, + Some(&req), + ); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; let uid = get_task_id(&req, &opt)?; diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 1df80711d..1e9d0e15e 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -1,3 +1,5 @@ +use std::collections::{BinaryHeap, HashSet}; + use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; @@ -10,14 +12,15 @@ use meilisearch_types::locales::Locale; use serde_json::Value; use tracing::debug; -use crate::analytics::{Analytics, FacetSearchAggregator}; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::indexes::search::search_kind; use crate::search::{ - add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, - SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy, + RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEARCH_OFFSET, }; use crate::search_queue::SearchQueue; @@ -53,13 +56,110 @@ pub struct FacetSearchQuery { pub locales: Option>, } +#[derive(Default)] +pub struct FacetSearchAggregator { + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // The set of all facetNames that were used + facet_names: HashSet, + + // As there been any other parameter than the facetName or facetQuery ones? + additional_search_parameters_provided: bool, +} + +impl FacetSearchAggregator { + #[allow(clippy::field_reassign_with_default)] + pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name, + vector, + q, + filter, + matching_strategy, + attributes_to_search_on, + hybrid, + ranking_score_threshold, + locales, + } = query; + + Self { + total_received: 1, + facet_names: Some(facet_name.clone()).into_iter().collect(), + additional_search_parameters_provided: q.is_some() + || vector.is_some() + || filter.is_some() + || *matching_strategy != MatchingStrategy::default() + || attributes_to_search_on.is_some() + || hybrid.is_some() + || ranking_score_threshold.is_some() + || locales.is_some(), + ..Default::default() + } + } + + pub fn succeed(&mut self, result: &FacetSearchResult) { + let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result; + self.total_succeeded = 1; + self.time_spent.push(*processing_time_ms as usize); + } +} + +impl Aggregate for FacetSearchAggregator { + fn event_name(&self) -> &'static str { + "Facet Searched POST" + } + + fn aggregate(mut self, other: Self) -> Self + where + Self: Sized, + { + self.time_spent.insert(other.time_spent); + + Self { + total_received: self.total_received.saturating_add(other.total_received), + total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded), + time_spent: self.time_spent, + facet_names: self.facet_names.union(&other.facet_names).collect(), + additional_search_parameters_provided: self.additional_search_parameters_provided + | other.additional_search_parameters_provided, + } + } + + fn into_event(self) -> Value { + let Self { + total_received, + total_succeeded, + time_spent, + facet_names, + additional_search_parameters_provided, + } = self; + + serde_json::json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "facets": { + "total_distinct_facet_count": facet_names.len(), + "additional_search_parameters_provided": additional_search_parameters_provided, + }, + }) + } +} + pub async fn search( index_scheduler: GuardedData, Data>, search_queue: Data, index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -100,7 +200,7 @@ pub async fn search( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.post_facet_search(aggregate); + analytics.publish(aggregate, Some(&req)); let search_result = search_result?; diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 35b747ccf..483a48a16 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeSet; use std::convert::Infallible; use actix_web::web::Data; @@ -18,7 +19,7 @@ use time::OffsetDateTime; use tracing::debug; use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; @@ -123,12 +124,34 @@ pub struct IndexCreateRequest { primary_key: Option, } +#[derive(Serialize)] +struct IndexCreatedAggregate { + primary_key: BTreeSet, +} + +impl Aggregate for IndexCreatedAggregate { + fn event_name(&self) -> &'static str { + "Index Created" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { primary_key: self.primary_key.union(&other.primary_key).collect() } + } + + fn into_event(self) -> impl Serialize { + self + } +} + pub async fn create_index( index_scheduler: GuardedData, Data>, body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Create index"); let IndexCreateRequest { primary_key, uid } = body.into_inner(); @@ -136,8 +159,7 @@ pub async fn create_index( let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); if allow_index_creation { analytics.publish( - "Index Created".to_string(), - json!({ "primary_key": primary_key }), + IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() }, Some(&req), ); @@ -194,20 +216,37 @@ pub async fn get_index( Ok(HttpResponse::Ok().json(index_view)) } +#[derive(Serialize)] +struct IndexUpdatedAggregate { + primary_key: BTreeSet, +} + +impl Aggregate for IndexUpdatedAggregate { + fn event_name(&self) -> &'static str { + "Index Updated" + } + + fn aggregate(self, other: Self) -> Self { + Self { primary_key: self.primary_key.union(&other.primary_key).collect() } + } + + fn into_event(self) -> impl Serialize { + self + } +} pub async fn update_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Update index"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); analytics.publish( - "Index Updated".to_string(), - json!({ "primary_key": body.primary_key }), + IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() }, Some(&req), ); diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 6a8eee521..f833a57d2 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -13,6 +13,7 @@ use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use tracing::debug; +use crate::analytics::segment_analytics::{SearchGET, SearchPOST}; use crate::analytics::{Analytics, SearchAggregator}; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; @@ -225,7 +226,7 @@ pub async fn search_with_url_query( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -237,7 +238,7 @@ pub async fn search_with_url_query( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query, &req); let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); @@ -254,7 +255,7 @@ pub async fn search_with_url_query( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.get_search(aggregate); + analytics.publish(aggregate, Some(&req)); let search_result = search_result?; @@ -268,7 +269,7 @@ pub async fn search_with_post( index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -280,7 +281,7 @@ pub async fn search_with_post( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query, &req); let index = index_scheduler.index(&index_uid)?; @@ -302,7 +303,7 @@ pub async fn search_with_post( MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); } } - analytics.post_search(aggregate); + analytics.publish(aggregate, Some(&req)); let search_result = search_result?; diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index aaf8673d0..112f8671b 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -1,3 +1,5 @@ +use std::collections::{BTreeSet, HashSet}; + use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; @@ -7,12 +9,15 @@ use meilisearch_types::error::ResponseError; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::Setting; -use meilisearch_types::settings::{settings, RankingRuleView, SecretPolicy, Settings, Unchecked}; +use meilisearch_types::settings::{ + settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked, +}; use meilisearch_types::tasks::KindWithContent; +use serde::Serialize; use serde_json::json; use tracing::debug; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; @@ -80,7 +85,7 @@ macro_rules! make_setting_route { body: deserr::actix_web::AwebJson, $err_ty>, req: HttpRequest, opt: web::Data, - $analytics_var: web::Data, + $analytics_var: web::Data, ) -> std::result::Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -162,16 +167,8 @@ make_setting_route!( "filterableAttributes", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "FilterableAttributes Updated".to_string(), - json!({ - "filterable_attributes": { - "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), - } - }), + crate::routes::indexes::settings::FilterableAttributesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -188,16 +185,8 @@ make_setting_route!( "sortableAttributes", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "SortableAttributes Updated".to_string(), - json!({ - "sortable_attributes": { - "total": setting.as_ref().map(|sort| sort.len()), - "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")), - }, - }), + crate::routes::indexes::settings::SortableAttributesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -214,16 +203,8 @@ make_setting_route!( "displayedAttributes", analytics, |displayed: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "DisplayedAttributes Updated".to_string(), - json!({ - "displayed_attributes": { - "total": displayed.as_ref().map(|displayed| displayed.len()), - "with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - }, - }), + crate::routes::indexes::settings::DisplayedAttributesAnalytics::new(displayed.as_ref()).to_settings(), Some(req), ); } @@ -240,35 +221,8 @@ make_setting_route!( "typoTolerance", analytics, |setting: &Option, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "TypoTolerance Updated".to_string(), - json!({ - "typo_tolerance": { - "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), - "disable_on_attributes": setting - .as_ref() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": setting - .as_ref() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.one_typo.set())) - .flatten(), - "min_word_size_for_two_typos": setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.two_typos.set())) - .flatten(), - }, - }), + crate::routes::indexes::settings::TypoToleranceAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -285,16 +239,8 @@ make_setting_route!( "searchableAttributes", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "SearchableAttributes Updated".to_string(), - json!({ - "searchable_attributes": { - "total": setting.as_ref().map(|searchable| searchable.len()), - "with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - }, - }), + crate::routes::indexes::settings::SearchableAttributesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -311,15 +257,8 @@ make_setting_route!( "stopWords", analytics, |stop_words: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "StopWords Updated".to_string(), - json!({ - "stop_words": { - "total": stop_words.as_ref().map(|stop_words| stop_words.len()), - }, - }), + crate::routes::indexes::settings::StopWordsAnalytics::new(stop_words.as_ref()).to_settings(), Some(req), ); } @@ -336,15 +275,8 @@ make_setting_route!( "nonSeparatorTokens", analytics, |non_separator_tokens: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "nonSeparatorTokens Updated".to_string(), - json!({ - "non_separator_tokens": { - "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()), - }, - }), + crate::routes::indexes::settings::NonSeparatorTokensAnalytics::new(non_separator_tokens.as_ref()).to_settings(), Some(req), ); } @@ -361,15 +293,8 @@ make_setting_route!( "separatorTokens", analytics, |separator_tokens: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "separatorTokens Updated".to_string(), - json!({ - "separator_tokens": { - "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()), - }, - }), + crate::routes::indexes::settings::SeparatorTokensAnalytics::new(separator_tokens.as_ref()).to_settings(), Some(req), ); } @@ -386,15 +311,8 @@ make_setting_route!( "dictionary", analytics, |dictionary: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "dictionary Updated".to_string(), - json!({ - "dictionary": { - "total": dictionary.as_ref().map(|dictionary| dictionary.len()), - }, - }), + crate::routes::indexes::settings::DictionaryAnalytics::new(dictionary.as_ref()).to_settings(), Some(req), ); } @@ -411,15 +329,8 @@ make_setting_route!( "synonyms", analytics, |synonyms: &Option>>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "Synonyms Updated".to_string(), - json!({ - "synonyms": { - "total": synonyms.as_ref().map(|synonyms| synonyms.len()), - }, - }), + crate::routes::indexes::settings::SynonymsAnalytics::new(synonyms.as_ref()).to_settings(), Some(req), ); } @@ -436,14 +347,8 @@ make_setting_route!( "distinctAttribute", analytics, |distinct: &Option, req: &HttpRequest| { - use serde_json::json; analytics.publish( - "DistinctAttribute Updated".to_string(), - json!({ - "distinct_attribute": { - "set": distinct.is_some(), - } - }), + crate::routes::indexes::settings::DistinctAttributeAnalytics::new(distinct.as_ref()).to_settings(), Some(req), ); } @@ -460,15 +365,8 @@ make_setting_route!( "proximityPrecision", analytics, |precision: &Option, req: &HttpRequest| { - use serde_json::json; analytics.publish( - "ProximityPrecision Updated".to_string(), - json!({ - "proximity_precision": { - "set": precision.is_some(), - "value": precision.unwrap_or_default(), - } - }), + crate::routes::indexes::settings::ProximityPrecisionAnalytics::new(precision.as_ref()).to_settings(), Some(req), ); } @@ -485,12 +383,8 @@ make_setting_route!( "localizedAttributes", analytics, |rules: &Option>, req: &HttpRequest| { - use serde_json::json; analytics.publish( - "LocalizedAttributesRules Updated".to_string(), - json!({ - "locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::>()) - }), + crate::routes::indexes::settings::LocalesAnalytics::new(rules.as_ref()).to_settings(), Some(req), ); } @@ -507,21 +401,8 @@ make_setting_route!( "rankingRules", analytics, |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "RankingRules Updated".to_string(), - json!({ - "ranking_rules": { - "words_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))), - "typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))), - "proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Proximity))), - "attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Attribute))), - "sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))), - "exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Exactness))), - "values": setting.as_ref().map(|rr| rr.iter().filter(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::>().join(", ")), - } - }), + crate::routes::indexes::settings::RankingRulesAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -538,20 +419,8 @@ make_setting_route!( "faceting", analytics, |setting: &Option, req: &HttpRequest| { - use serde_json::json; - use meilisearch_types::facet_values_sort::FacetValuesSort; - analytics.publish( - "Faceting Updated".to_string(), - json!({ - "faceting": { - "max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()), - "sort_facet_values_by_star_count": setting.as_ref().and_then(|s| { - s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - "sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - }, - }), + crate::routes::indexes::settings::FacetingAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -568,15 +437,8 @@ make_setting_route!( "pagination", analytics, |setting: &Option, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "Pagination Updated".to_string(), - json!({ - "pagination": { - "max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()), - }, - }), + crate::routes::indexes::settings::PaginationAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -593,11 +455,8 @@ make_setting_route!( "embedders", analytics, |setting: &Option>>, req: &HttpRequest| { - - analytics.publish( - "Embedders Updated".to_string(), - serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}), + crate::routes::indexes::settings::EmbeddersAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -651,10 +510,15 @@ fn embedder_analytics( json!( { + // last "total": setting.as_ref().map(|s| s.len()), + // Merge the sources "sources": sources, + // |= "document_template_used": document_template_used, + // max "document_template_max_bytes": document_template_max_bytes, + // |= "binary_quantization_used": binary_quantization_used, } ) @@ -672,8 +536,7 @@ make_setting_route!( analytics, |setting: &Option, req: &HttpRequest| { analytics.publish( - "Search Cutoff Updated".to_string(), - serde_json::json!({"search_cutoff_ms": setting }), + crate::routes::indexes::settings::SearchCutoffMsAnalytics::new(setting.as_ref()).to_settings(), Some(req), ); } @@ -714,13 +577,639 @@ generate_configure!( search_cutoff_ms ); +#[derive(Serialize, Default)] +struct SettingsAnalytics { + ranking_rules: RankingRulesAnalytics, + searchable_attributes: SearchableAttributesAnalytics, + displayed_attributes: DisplayedAttributesAnalytics, + sortable_attributes: SortableAttributesAnalytics, + filterable_attributes: FilterableAttributesAnalytics, + distinct_attribute: DistinctAttributeAnalytics, + proximity_precision: ProximityPrecisionAnalytics, + typo_tolerance: TypoToleranceAnalytics, + faceting: FacetingAnalytics, + pagination: PaginationAnalytics, + stop_words: StopWordsAnalytics, + synonyms: SynonymsAnalytics, + embedders: EmbeddersAnalytics, + search_cutoff_ms: SearchCutoffMsAnalytics, + locales: LocalesAnalytics, + dictionary: DictionaryAnalytics, + separator_tokens: SeparatorTokensAnalytics, + non_separator_tokens: NonSeparatorTokensAnalytics, +} + +impl Aggregate for SettingsAnalytics { + fn event_name(&self) -> &'static str { + "Settings Updated" + } + + fn aggregate(self, other: Self) -> Self + where + Self: Sized, + { + Self { + ranking_rules: RankingRulesAnalytics { + words_position: self + .ranking_rules + .words_position + .or(other.ranking_rules.words_position), + typo_position: self + .ranking_rules + .typo_position + .or(other.ranking_rules.typo_position), + proximity_position: self + .ranking_rules + .proximity_position + .or(other.ranking_rules.proximity_position), + attribute_position: self + .ranking_rules + .attribute_position + .or(other.ranking_rules.attribute_position), + sort_position: self + .ranking_rules + .sort_position + .or(other.ranking_rules.sort_position), + exactness_position: self + .ranking_rules + .exactness_position + .or(other.ranking_rules.exactness_position), + values: self.ranking_rules.values.or(other.ranking_rules.values), + }, + searchable_attributes: SearchableAttributesAnalytics { + total: self.searchable_attributes.total.or(other.searchable_attributes.total), + with_wildcard: self + .searchable_attributes + .with_wildcard + .or(other.searchable_attributes.with_wildcard), + }, + displayed_attributes: DisplayedAttributesAnalytics { + total: self.displayed_attributes.total.or(other.displayed_attributes.total), + with_wildcard: self + .displayed_attributes + .with_wildcard + .or(other.displayed_attributes.with_wildcard), + }, + sortable_attributes: SortableAttributesAnalytics { + total: self.sortable_attributes.total.or(other.sortable_attributes.total), + has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo), + }, + filterable_attributes: FilterableAttributesAnalytics { + total: self.filterable_attributes.total.or(other.filterable_attributes.total), + has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), + }, + distinct_attribute: DistinctAttributeAnalytics { + set: self.distinct_attribute.set.or(other.distinct_attribute.set), + }, + proximity_precision: ProximityPrecisionAnalytics { + set: self.proximity_precision.set(other.proximity_precision.set), + value: self.proximity_precision.value(other.proximity_precision.value), + }, + typo_tolerance: TypoToleranceAnalytics { + enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), + disable_on_attributes: self + .typo_tolerance + .disable_on_attributes + .or(other.typo_tolerance.disable_on_attributes), + disable_on_words: self + .typo_tolerance + .disable_on_words + .or(other.typo_tolerance.disable_on_words), + min_word_size_for_one_typo: self + .typo_tolerance + .min_word_size_for_one_typo + .or(other.typo_tolerance.min_word_size_for_one_typo), + min_word_size_for_two_typos: self + .typo_tolerance + .min_word_size_for_two_typos + .or(other.typo_tolerance.min_word_size_for_two_typos), + }, + faceting: FacetingAnalytics { + max_values_per_facet: self + .faceting + .max_values_per_facet + .or(other.faceting.max_values_per_facet), + sort_facet_values_by_star_count: self + .faceting + .sort_facet_values_by_star_count + .or(other.faceting.sort_facet_values_by_star_count), + sort_facet_values_by_total: self + .faceting + .sort_facet_values_by_total + .or(other.faceting.sort_facet_values_by_total), + }, + pagination: PaginationAnalytics { + max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits), + }, + stop_words: StopWordsAnalytics { + total: self.stop_words.total.or(other.stop_words.total), + }, + synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) }, + embedders: EmbeddersAnalytics { + total: self.embedders.total.or(other.embedders.total), + sources: match (self.embedders.sources, other.embedders.sources) { + (None, None) => None, + (Some(sources), None) | (None, Some(sources)) => Some(sources), + (Some(this), Some(other)) => Some(this.union(&other).collect()), + }, + document_template_used: match ( + self.embedders.document_template_used, + other.embedders.document_template_used, + ) { + (None, None) => None, + (Some(used), None) | (None, Some(used)) => Some(used), + (Some(this), Some(other)) => Some(this | other), + }, + document_template_max_bytes: match ( + self.embedders.document_template_max_bytes, + other.embedders.document_template_max_bytes, + ) { + (None, None) => None, + (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), + (Some(this), Some(other)) => Some(this.max(other)), + }, + binary_quantization_used: match ( + self.embedders.binary_quantization_used, + other.embedders.binary_quantization_used, + ) { + (None, None) => None, + (Some(bq), None) | (None, Some(bq)) => Some(bq), + (Some(this), Some(other)) => Some(this | other), + }, + }, + search_cutoff_ms: SearchCutoffMsAnalytics { + search_cutoff_ms: self + .search_cutoff_ms + .search_cutoff_ms + .or(other.search_cutoff_ms.search_cutoff_ms), + }, + locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) }, + dictionary: DictionaryAnalytics { + total: self.dictionary.total.or(other.dictionary.total), + }, + separator_tokens: SeparatorTokensAnalytics { + total: self.separator_tokens.total.or(other.non_separator_tokens.total), + }, + non_separator_tokens: NonSeparatorTokensAnalytics { + total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), + }, + } + } + + fn into_event(self) -> impl Serialize + where + Self: Sized, + { + self + } +} + +#[derive(Serialize, Default)] +struct RankingRulesAnalytics { + words_position: Option, + typo_position: Option, + proximity_position: Option, + attribute_position: Option, + sort_position: Option, + exactness_position: Option, + values: Option, +} + +impl RankingRulesAnalytics { + pub fn new(rr: Option<&Vec>) -> Self { + RankingRulesAnalytics { + words_position: rr.as_ref().map(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) + }), + typo_position: rr.as_ref().map(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) + }), + proximity_position: rr.as_ref().map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) + }) + }), + attribute_position: rr.as_ref().map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) + }) + }), + sort_position: rr.as_ref().map(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) + }), + exactness_position: rr.as_ref().map(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) + }) + }), + values: rr.as_ref().map(|rr| { + rr.iter() + .filter(|s| { + matches!( + s, + meilisearch_types::settings::RankingRuleView::Asc(_) + | meilisearch_types::settings::RankingRuleView::Desc(_) + ) + }) + .map(|x| x.to_string()) + .collect::>() + .join(", ") + }), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { ranking_rules: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SearchableAttributesAnalytics { + total: Option, + with_wildcard: bool, +} + +impl SearchableAttributesAnalytics { + pub fn new(setting: Option<&Vec>) -> Self { + Self { + total: setting.as_ref().map(|searchable| searchable.len()), + with_wildcard: setting + .as_ref() + .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { searchable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct DisplayedAttributesAnalytics { + total: usize, + with_wildcard: bool, +} + +impl DisplayedAttributesAnalytics { + pub fn new(displayed: Option<&Vec>) -> Self { + Self { + total: displayed.as_ref().map(|displayed| displayed.len()), + with_wildcard: displayed + .as_ref() + .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { displayed_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SortableAttributesAnalytics { + total: usize, + has_geo: bool, +} + +impl SortableAttributesAnalytics { + pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|sort| sort.len()), + has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { sortable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct FilterableAttributesAnalytics { + total: usize, + has_geo: bool, +} + +impl FilterableAttributesAnalytics { + pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|filter| filter.len()).unwrap_or(0), + has_geo: setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { filterable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct DistinctAttributeAnalytics { + set: bool, +} + +impl DistinctAttributeAnalytics { + pub fn new(distinct: Option<&String>) -> Self { + Self { set: distinct.is_some() } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { distinct_attribute: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct ProximityPrecisionAnalytics { + set: bool, + value: Option, +} + +impl ProximityPrecisionAnalytics { + pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self { + Self { set: precision.is_some(), value: precision.unwrap_or_default() } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { proximity_precision: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct TypoToleranceAnalytics { + enabled: Option, + disable_on_attributes: Option, + disable_on_words: Option, + min_word_size_for_one_typo: Option, + min_word_size_for_two_typos: Option, +} + +impl TypoToleranceAnalytics { + pub fn new(setting: Option<&meilisearch_types::settings::TypoSettings>) -> Self { + Self { + enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), + disable_on_attributes: setting + .as_ref() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + disable_on_words: setting + .as_ref() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + min_word_size_for_one_typo: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) + .flatten(), + min_word_size_for_two_typos: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) + .flatten(), + } + } + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { typo_tolerance: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct FacetingAnalytics { + max_values_per_facet: Option, + sort_facet_values_by_star_count: Option, + sort_facet_values_by_total: Option, +} + +impl FacetingAnalytics { + pub fn new(setting: Option<&meilisearch_types::settings::FacetingSettings>) -> Self { + Self { + max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), + sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { + s.sort_facet_values_by + .as_ref() + .set() + .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) + }), + sort_facet_values_by_total: setting + .as_ref() + .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { faceting: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct PaginationAnalytics { + max_total_hits: Option, +} + +impl PaginationAnalytics { + pub fn new(setting: Option<&meilisearch_types::settings::PaginationSettings>) -> Self { + Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { pagination: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct StopWordsAnalytics { + total: Option, +} + +impl StopWordsAnalytics { + pub fn new(stop_words: Option<&BTreeSet>) -> Self { + Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { stop_words: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SynonymsAnalytics { + total: Option, +} + +impl SynonymsAnalytics { + pub fn new(synonyms: Option<&std::collections::BTreeMap>>) -> Self { + Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { synonyms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct EmbeddersAnalytics { + // last + total: Option, + // Merge the sources + sources: Option>, + // |= + document_template_used: Option, + // max + document_template_max_bytes: Option, + // |= + binary_quantization_used: Option, +} + +impl EmbeddersAnalytics { + pub fn new( + setting: Option< + &std::collections::BTreeMap< + String, + Setting, + >, + >, + ) -> Self { + let mut sources = std::collections::HashSet::new(); + + if let Some(s) = &setting { + for source in s + .values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.source.set()) + { + use meilisearch_types::milli::vector::settings::EmbedderSource; + match source { + EmbedderSource::OpenAi => sources.insert("openAi"), + EmbedderSource::HuggingFace => sources.insert("huggingFace"), + EmbedderSource::UserProvided => sources.insert("userProvided"), + EmbedderSource::Ollama => sources.insert("ollama"), + EmbedderSource::Rest => sources.insert("rest"), + }; + } + }; + + Self { + total: setting.as_ref().map(|s| s.len()), + sources, + document_template_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.document_template.set().is_some()) + }), + document_template_max_bytes: setting.as_ref().and_then(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.document_template_max_bytes.set()) + .max() + }), + binary_quantization_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { embedders: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +struct SearchCutoffMsAnalytics { + search_cutoff_ms: Option, +} + +impl SearchCutoffMsAnalytics { + pub fn new(setting: Option<&u64>) -> Self { + Self { search_cutoff_ms: setting } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +struct LocalesAnalytics { + locales: BTreeSet, +} + +impl LocalesAnalytics { + pub fn new( + rules: Option<&Vec>, + ) -> Self { + LocalesAnalytics { + locales: rules.as_ref().map(|rules| { + rules + .iter() + .flat_map(|rule| rule.locales.iter().cloned()) + .collect::>() + }), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { locales: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct DictionaryAnalytics { + total: usize, +} + +impl DictionaryAnalytics { + pub fn new(dictionary: Option<&std::collections::BTreeSet>) -> Self { + Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { dictionary: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct SeparatorTokensAnalytics { + total: usize, +} + +impl SeparatorTokensAnalytics { + pub fn new(separator_tokens: Option<&std::collections::BTreeSet>) -> Self { + Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { separator_tokens: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +struct NonSeparatorTokensAnalytics { + total: usize, +} + +impl NonSeparatorTokensAnalytics { + pub fn new(non_separator_tokens: Option<&std::collections::BTreeSet>) -> Self { + Self { + total: non_separator_tokens + .as_ref() + .map(|non_separator_tokens| non_separator_tokens.len()), + } + } + + pub fn to_settings(self) -> SettingsAnalytics { + SettingsAnalytics { non_separator_tokens: self, ..Default::default() } + } +} + pub async fn update_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: AwebJson, DeserrJsonError>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -729,103 +1218,44 @@ pub async fn update_all( let new_settings = validate_settings(new_settings, &index_scheduler)?; analytics.publish( - "Settings Updated".to_string(), - json!({ - "ranking_rules": { - "words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Words))), - "typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Typo))), - "proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Proximity))), - "attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Attribute))), - "sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Sort))), - "exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Exactness))), - "values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !matches!(s, RankingRuleView::Asc(_) | RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::>().join(", ")), - }, - "searchable_attributes": { - "total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), - "with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - }, - "displayed_attributes": { - "total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()), - "with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - }, - "sortable_attributes": { - "total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), - "has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), - }, - "filterable_attributes": { - "total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), - "has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), - }, - "distinct_attribute": { - "set": new_settings.distinct_attribute.as_ref().set().is_some() - }, - "proximity_precision": { - "set": new_settings.proximity_precision.as_ref().set().is_some(), - "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default() - }, - "typo_tolerance": { - "enabled": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.enabled.as_ref().set()) - .copied(), - "disable_on_attributes": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.one_typo.set())) - .flatten(), - "min_word_size_for_two_typos": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.two_typos.set())) - .flatten(), - }, - "faceting": { - "max_values_per_facet": new_settings.faceting - .as_ref() - .set() - .and_then(|s| s.max_values_per_facet.as_ref().set()), - "sort_facet_values_by_star_count": new_settings.faceting - .as_ref() - .set() - .and_then(|s| { - s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - "sort_facet_values_by_total": new_settings.faceting - .as_ref() - .set() - .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - }, - "pagination": { - "max_total_hits": new_settings.pagination - .as_ref() - .set() - .and_then(|s| s.max_total_hits.as_ref().set()), - }, - "stop_words": { - "total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()), - }, - "synonyms": { - "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), - }, - "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()), - "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(), - "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::>()), - }), + SettingsAnalytics { + ranking_rules: RankingRulesAnalytics::new(new_settings.ranking_rules.as_ref().set()), + searchable_attributes: SearchableAttributesAnalytics::new( + new_settings.searchable_attributes.as_ref().set(), + ), + displayed_attributes: DisplayedAttributesAnalytics::new( + new_settings.displayed_attributes.as_ref().set(), + ), + sortable_attributes: SortableAttributesAnalytics::new( + new_settings.sortable_attributes.as_ref().set(), + ), + filterable_attributes: FilterableAttributesAnalytics::new( + new_settings.filterable_attributes.as_ref().set(), + ), + distinct_attribute: DistinctAttributeAnalytics::new( + new_settings.distinct_attribute.as_ref().set(), + ), + proximity_precision: ProximityPrecisionAnalytics::new( + new_settings.proximity_precision.as_ref().set(), + ), + typo_tolerance: TypoToleranceAnalytics::new(new_settings.typo_tolerance.as_ref().set()), + faceting: FacetingAnalytics::new(new_settings.faceting.as_ref().set()), + pagination: PaginationAnalytics::new(new_settings.pagination.as_ref().set()), + stop_words: StopWordsAnalytics::new(new_settings.stop_words.as_ref().set()), + synonyms: SynonymsAnalytics::new(new_settings.synonyms.as_ref().set()), + embedders: EmbeddersAnalytics::new(new_settings.embedders.as_ref().set()), + search_cutoff_ms: SearchCutoffMsAnalytics::new( + new_settings.search_cutoff_ms.as_ref().set(), + ), + locales: LocalesAnalytics::new(new_settings.localized_attributes.as_ref().set()), + dictionary: DictionaryAnalytics::new(new_settings.dictionary.as_ref().set()), + separator_tokens: SeparatorTokensAnalytics::new( + new_settings.separator_tokens.as_ref().set(), + ), + non_separator_tokens: NonSeparatorTokensAnalytics::new( + new_settings.non_separator_tokens.as_ref().set(), + ), + }, Some(&req), ); diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 51a7b0707..34e904230 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -40,7 +40,7 @@ pub async fn swap_indexes( analytics.publish( "Indexes Swapped".to_string(), json!({ - "swap_operation_number": params.len(), + "swap_operation_number": params.len(), // Return the max ever encountered }), Some(&req), );