//! All the structures used to make the analytics on the settings works. //! The signatures of the `new` functions are not very rust idiomatic because they must match the types received //! through the sub-settings route directly without any manipulation. //! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`. use std::collections::{BTreeMap, BTreeSet, HashSet}; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{ FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, RankingRuleView, SettingEmbeddingSettings, TypoSettings, }; use serde::Serialize; use crate::analytics::Aggregate; #[derive(Serialize, Default)] pub struct SettingsAnalytics { pub ranking_rules: RankingRulesAnalytics, pub searchable_attributes: SearchableAttributesAnalytics, pub displayed_attributes: DisplayedAttributesAnalytics, pub sortable_attributes: SortableAttributesAnalytics, pub filterable_attributes: FilterableAttributesAnalytics, pub distinct_attribute: DistinctAttributeAnalytics, pub proximity_precision: ProximityPrecisionAnalytics, pub typo_tolerance: TypoToleranceAnalytics, pub faceting: FacetingAnalytics, pub pagination: PaginationAnalytics, pub stop_words: StopWordsAnalytics, pub synonyms: SynonymsAnalytics, pub embedders: EmbeddersAnalytics, pub search_cutoff_ms: SearchCutoffMsAnalytics, pub locales: LocalesAnalytics, pub dictionary: DictionaryAnalytics, pub separator_tokens: SeparatorTokensAnalytics, pub non_separator_tokens: NonSeparatorTokensAnalytics, pub facet_search: FacetSearchAnalytics, pub prefix_search: PrefixSearchAnalytics, } impl Aggregate for SettingsAnalytics { fn event_name(&self) -> &'static str { "Settings Updated" } fn aggregate(self: Box, new: Box) -> Box { Box::new(Self { ranking_rules: RankingRulesAnalytics { words_position: new .ranking_rules .words_position .or(self.ranking_rules.words_position), typo_position: new.ranking_rules.typo_position.or(self.ranking_rules.typo_position), proximity_position: new .ranking_rules .proximity_position .or(self.ranking_rules.proximity_position), attribute_position: new .ranking_rules .attribute_position .or(self.ranking_rules.attribute_position), sort_position: new.ranking_rules.sort_position.or(self.ranking_rules.sort_position), exactness_position: new .ranking_rules .exactness_position .or(self.ranking_rules.exactness_position), values: new.ranking_rules.values.or(self.ranking_rules.values), }, searchable_attributes: SearchableAttributesAnalytics { total: new.searchable_attributes.total.or(self.searchable_attributes.total), with_wildcard: new .searchable_attributes .with_wildcard .or(self.searchable_attributes.with_wildcard), }, displayed_attributes: DisplayedAttributesAnalytics { total: new.displayed_attributes.total.or(self.displayed_attributes.total), with_wildcard: new .displayed_attributes .with_wildcard .or(self.displayed_attributes.with_wildcard), }, sortable_attributes: SortableAttributesAnalytics { total: new.sortable_attributes.total.or(self.sortable_attributes.total), has_geo: new.sortable_attributes.has_geo.or(self.sortable_attributes.has_geo), }, filterable_attributes: FilterableAttributesAnalytics { total: new.filterable_attributes.total.or(self.filterable_attributes.total), has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo), }, distinct_attribute: DistinctAttributeAnalytics { set: self.distinct_attribute.set | new.distinct_attribute.set, }, proximity_precision: ProximityPrecisionAnalytics { set: self.proximity_precision.set | new.proximity_precision.set, value: new.proximity_precision.value.or(self.proximity_precision.value), }, typo_tolerance: TypoToleranceAnalytics { enabled: new.typo_tolerance.enabled.or(self.typo_tolerance.enabled), disable_on_attributes: new .typo_tolerance .disable_on_attributes .or(self.typo_tolerance.disable_on_attributes), disable_on_words: new .typo_tolerance .disable_on_words .or(self.typo_tolerance.disable_on_words), min_word_size_for_one_typo: new .typo_tolerance .min_word_size_for_one_typo .or(self.typo_tolerance.min_word_size_for_one_typo), min_word_size_for_two_typos: new .typo_tolerance .min_word_size_for_two_typos .or(self.typo_tolerance.min_word_size_for_two_typos), }, faceting: FacetingAnalytics { max_values_per_facet: new .faceting .max_values_per_facet .or(self.faceting.max_values_per_facet), sort_facet_values_by_star_count: new .faceting .sort_facet_values_by_star_count .or(self.faceting.sort_facet_values_by_star_count), sort_facet_values_by_total: new .faceting .sort_facet_values_by_total .or(self.faceting.sort_facet_values_by_total), }, pagination: PaginationAnalytics { max_total_hits: new.pagination.max_total_hits.or(self.pagination.max_total_hits), }, stop_words: StopWordsAnalytics { total: new.stop_words.total.or(self.stop_words.total), }, synonyms: SynonymsAnalytics { total: new.synonyms.total.or(self.synonyms.total) }, embedders: EmbeddersAnalytics { total: new.embedders.total.or(self.embedders.total), sources: match (self.embedders.sources, new.embedders.sources) { (None, None) => None, (Some(sources), None) | (None, Some(sources)) => Some(sources), (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), }, document_template_used: match ( self.embedders.document_template_used, new.embedders.document_template_used, ) { (None, None) => None, (Some(used), None) | (None, Some(used)) => Some(used), (Some(this), Some(other)) => Some(this | other), }, document_template_max_bytes: match ( self.embedders.document_template_max_bytes, new.embedders.document_template_max_bytes, ) { (None, None) => None, (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), (Some(this), Some(other)) => Some(this.max(other)), }, binary_quantization_used: match ( self.embedders.binary_quantization_used, new.embedders.binary_quantization_used, ) { (None, None) => None, (Some(bq), None) | (None, Some(bq)) => Some(bq), (Some(this), Some(other)) => Some(this | other), }, }, search_cutoff_ms: SearchCutoffMsAnalytics { search_cutoff_ms: new .search_cutoff_ms .search_cutoff_ms .or(self.search_cutoff_ms.search_cutoff_ms), }, locales: LocalesAnalytics { locales: new.locales.locales.or(self.locales.locales) }, dictionary: DictionaryAnalytics { total: new.dictionary.total.or(self.dictionary.total), }, separator_tokens: SeparatorTokensAnalytics { total: new.non_separator_tokens.total.or(self.separator_tokens.total), }, non_separator_tokens: NonSeparatorTokensAnalytics { total: new.non_separator_tokens.total.or(self.non_separator_tokens.total), }, facet_search: FacetSearchAnalytics { set: new.facet_search.set | self.facet_search.set, value: new.facet_search.value.or(self.facet_search.value), }, prefix_search: PrefixSearchAnalytics { set: new.prefix_search.set | self.prefix_search.set, value: new.prefix_search.value.or(self.prefix_search.value), }, }) } fn into_event(self: Box) -> serde_json::Value { serde_json::to_value(*self).unwrap_or_default() } } #[derive(Serialize, Default)] pub struct RankingRulesAnalytics { pub words_position: Option, pub typo_position: Option, pub proximity_position: Option, pub attribute_position: Option, pub sort_position: Option, pub exactness_position: Option, pub values: Option, } impl RankingRulesAnalytics { pub fn new(rr: Option<&Vec>) -> Self { RankingRulesAnalytics { words_position: rr.as_ref().and_then(|rr| { rr.iter() .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) }), typo_position: rr.as_ref().and_then(|rr| { rr.iter() .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) }), proximity_position: rr.as_ref().and_then(|rr| { rr.iter().position(|s| { matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) }) }), attribute_position: rr.as_ref().and_then(|rr| { rr.iter().position(|s| { matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) }) }), sort_position: rr.as_ref().and_then(|rr| { rr.iter() .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) }), exactness_position: rr.as_ref().and_then(|rr| { rr.iter().position(|s| { matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) }) }), values: rr.as_ref().map(|rr| { rr.iter() .filter(|s| { matches!( s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_) ) }) .map(|x| x.to_string()) .collect::>() .join(", ") }), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { ranking_rules: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct SearchableAttributesAnalytics { pub total: Option, pub with_wildcard: Option, } impl SearchableAttributesAnalytics { pub fn new(setting: Option<&Vec>) -> Self { Self { total: setting.as_ref().map(|searchable| searchable.len()), with_wildcard: setting .as_ref() .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { searchable_attributes: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct DisplayedAttributesAnalytics { pub total: Option, pub with_wildcard: Option, } impl DisplayedAttributesAnalytics { pub fn new(displayed: Option<&Vec>) -> Self { Self { total: displayed.as_ref().map(|displayed| displayed.len()), with_wildcard: displayed .as_ref() .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { displayed_attributes: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct SortableAttributesAnalytics { pub total: Option, pub has_geo: Option, } impl SortableAttributesAnalytics { pub fn new(setting: Option<&BTreeSet>) -> Self { Self { total: setting.as_ref().map(|sort| sort.len()), has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { sortable_attributes: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct FilterableAttributesAnalytics { pub total: Option, pub has_geo: Option, } impl FilterableAttributesAnalytics { pub fn new(setting: Option<&BTreeSet>) -> Self { Self { total: setting.as_ref().map(|filter| filter.len()), has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { filterable_attributes: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct DistinctAttributeAnalytics { pub set: bool, } impl DistinctAttributeAnalytics { pub fn new(distinct: Option<&String>) -> Self { Self { set: distinct.is_some() } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { distinct_attribute: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct ProximityPrecisionAnalytics { pub set: bool, pub value: Option, } impl ProximityPrecisionAnalytics { pub fn new(precision: Option<&ProximityPrecisionView>) -> Self { Self { set: precision.is_some(), value: precision.cloned() } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { proximity_precision: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct TypoToleranceAnalytics { pub enabled: Option, pub disable_on_attributes: Option, pub disable_on_words: Option, pub min_word_size_for_one_typo: Option, pub min_word_size_for_two_typos: Option, } impl TypoToleranceAnalytics { pub fn new(setting: Option<&TypoSettings>) -> Self { Self { enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), disable_on_attributes: setting .as_ref() .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), disable_on_words: setting .as_ref() .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), min_word_size_for_one_typo: setting .as_ref() .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) .flatten(), min_word_size_for_two_typos: setting .as_ref() .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) .flatten(), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { typo_tolerance: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct FacetingAnalytics { pub max_values_per_facet: Option, pub sort_facet_values_by_star_count: Option, pub sort_facet_values_by_total: Option, } impl FacetingAnalytics { pub fn new(setting: Option<&FacetingSettings>) -> Self { Self { max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { s.sort_facet_values_by .as_ref() .set() .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) }), sort_facet_values_by_total: setting .as_ref() .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { faceting: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct PaginationAnalytics { pub max_total_hits: Option, } impl PaginationAnalytics { pub fn new(setting: Option<&PaginationSettings>) -> Self { Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { pagination: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct StopWordsAnalytics { pub total: Option, } impl StopWordsAnalytics { pub fn new(stop_words: Option<&BTreeSet>) -> Self { Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { stop_words: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct SynonymsAnalytics { pub total: Option, } impl SynonymsAnalytics { pub fn new(synonyms: Option<&BTreeMap>>) -> Self { Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { synonyms: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct EmbeddersAnalytics { // last pub total: Option, // Merge the sources pub sources: Option>, // |= pub document_template_used: Option, // max pub document_template_max_bytes: Option, // |= pub binary_quantization_used: Option, } impl EmbeddersAnalytics { pub fn new(setting: Option<&BTreeMap>) -> Self { let mut sources = std::collections::HashSet::new(); if let Some(s) = &setting { for source in s .values() .filter_map(|config| config.inner.clone().set()) .filter_map(|config| config.source.set()) { use meilisearch_types::milli::vector::settings::EmbedderSource; match source { EmbedderSource::OpenAi => sources.insert("openAi".to_string()), EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()), EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), EmbedderSource::Ollama => sources.insert("ollama".to_string()), EmbedderSource::Rest => sources.insert("rest".to_string()), EmbedderSource::Composite => sources.insert("composite".to_string()), }; } }; Self { total: setting.as_ref().map(|s| s.len()), sources: Some(sources), document_template_used: setting.as_ref().map(|map| { map.values() .filter_map(|config| config.inner.clone().set()) .any(|config| config.document_template.set().is_some()) }), document_template_max_bytes: setting.as_ref().and_then(|map| { map.values() .filter_map(|config| config.inner.clone().set()) .filter_map(|config| config.document_template_max_bytes.set()) .max() }), binary_quantization_used: setting.as_ref().map(|map| { map.values() .filter_map(|config| config.inner.clone().set()) .any(|config| config.binary_quantized.set().is_some()) }), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { embedders: self, ..Default::default() } } } #[derive(Serialize, Default)] #[serde(transparent)] pub struct SearchCutoffMsAnalytics { pub search_cutoff_ms: Option, } impl SearchCutoffMsAnalytics { pub fn new(setting: Option<&u64>) -> Self { Self { search_cutoff_ms: setting.copied() } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } } } #[derive(Serialize, Default)] #[serde(transparent)] pub struct LocalesAnalytics { pub locales: Option>, } impl LocalesAnalytics { pub fn new(rules: Option<&Vec>) -> Self { LocalesAnalytics { locales: rules.as_ref().map(|rules| { rules .iter() .flat_map(|rule| rule.locales.iter().cloned()) .collect::>() }), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { locales: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct DictionaryAnalytics { pub total: Option, } impl DictionaryAnalytics { pub fn new(dictionary: Option<&BTreeSet>) -> Self { Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { dictionary: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct SeparatorTokensAnalytics { pub total: Option, } impl SeparatorTokensAnalytics { pub fn new(separator_tokens: Option<&BTreeSet>) -> Self { Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { separator_tokens: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct NonSeparatorTokensAnalytics { pub total: Option, } impl NonSeparatorTokensAnalytics { pub fn new(non_separator_tokens: Option<&BTreeSet>) -> Self { Self { total: non_separator_tokens .as_ref() .map(|non_separator_tokens| non_separator_tokens.len()), } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { non_separator_tokens: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct FacetSearchAnalytics { pub set: bool, pub value: Option, } impl FacetSearchAnalytics { pub fn new(settings: Option<&bool>) -> Self { Self { set: settings.is_some(), value: settings.copied() } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { facet_search: self, ..Default::default() } } } #[derive(Serialize, Default)] pub struct PrefixSearchAnalytics { pub set: bool, pub value: Option, } impl PrefixSearchAnalytics { pub fn new(settings: Option<&PrefixSearchSettings>) -> Self { Self { set: settings.is_some(), value: settings.cloned() } } pub fn into_settings(self) -> SettingsAnalytics { SettingsAnalytics { prefix_search: self, ..Default::default() } } }