diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 8972119d7..65c81a57e 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -29,6 +29,7 @@ pub mod documents; pub mod facet_search; pub mod search; pub mod settings; +mod settings_analytics; pub mod similar; pub fn configure(cfg: &mut web::ServiceConfig) { diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 745ad5c78..bca763a99 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -1,23 +1,17 @@ -use std::collections::{BTreeSet, HashSet}; - +use super::settings_analytics::*; use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; -use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::index_uid::IndexUid; -use meilisearch_types::locales::Locale; use meilisearch_types::milli::update::Setting; -use meilisearch_types::settings::{ - settings, ProximityPrecisionView, RankingRuleView, SecretPolicy, Settings, Unchecked, -}; +use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; -use serde::Serialize; use tracing::debug; -use crate::analytics::{Aggregate, Analytics}; +use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; @@ -94,7 +88,7 @@ macro_rules! make_setting_route { #[allow(clippy::redundant_closure_call)] analytics.publish( - $crate::routes::indexes::settings::$analytics::new(body.as_ref()).into_settings(), + $crate::routes::indexes::settings_analytics::$analytics::new(body.as_ref()).into_settings(), &req, ); @@ -410,626 +404,6 @@ generate_configure!( search_cutoff_ms ); -#[derive(Serialize, Default)] -struct SettingsAnalytics { - ranking_rules: RankingRulesAnalytics, - searchable_attributes: SearchableAttributesAnalytics, - displayed_attributes: DisplayedAttributesAnalytics, - sortable_attributes: SortableAttributesAnalytics, - filterable_attributes: FilterableAttributesAnalytics, - distinct_attribute: DistinctAttributeAnalytics, - proximity_precision: ProximityPrecisionAnalytics, - typo_tolerance: TypoToleranceAnalytics, - faceting: FacetingAnalytics, - pagination: PaginationAnalytics, - stop_words: StopWordsAnalytics, - synonyms: SynonymsAnalytics, - embedders: EmbeddersAnalytics, - search_cutoff_ms: SearchCutoffMsAnalytics, - locales: LocalesAnalytics, - dictionary: DictionaryAnalytics, - separator_tokens: SeparatorTokensAnalytics, - non_separator_tokens: NonSeparatorTokensAnalytics, -} - -impl Aggregate for SettingsAnalytics { - fn event_name(&self) -> &'static str { - "Settings Updated" - } - - fn aggregate(self: Box, other: Box) -> Box { - Box::new(Self { - ranking_rules: RankingRulesAnalytics { - words_position: self - .ranking_rules - .words_position - .or(other.ranking_rules.words_position), - typo_position: self - .ranking_rules - .typo_position - .or(other.ranking_rules.typo_position), - proximity_position: self - .ranking_rules - .proximity_position - .or(other.ranking_rules.proximity_position), - attribute_position: self - .ranking_rules - .attribute_position - .or(other.ranking_rules.attribute_position), - sort_position: self - .ranking_rules - .sort_position - .or(other.ranking_rules.sort_position), - exactness_position: self - .ranking_rules - .exactness_position - .or(other.ranking_rules.exactness_position), - values: self.ranking_rules.values.or(other.ranking_rules.values), - }, - searchable_attributes: SearchableAttributesAnalytics { - total: self.searchable_attributes.total.or(other.searchable_attributes.total), - with_wildcard: self - .searchable_attributes - .with_wildcard - .or(other.searchable_attributes.with_wildcard), - }, - displayed_attributes: DisplayedAttributesAnalytics { - total: self.displayed_attributes.total.or(other.displayed_attributes.total), - with_wildcard: self - .displayed_attributes - .with_wildcard - .or(other.displayed_attributes.with_wildcard), - }, - sortable_attributes: SortableAttributesAnalytics { - total: self.sortable_attributes.total.or(other.sortable_attributes.total), - has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo), - }, - filterable_attributes: FilterableAttributesAnalytics { - total: self.filterable_attributes.total.or(other.filterable_attributes.total), - has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), - }, - distinct_attribute: DistinctAttributeAnalytics { - set: self.distinct_attribute.set | other.distinct_attribute.set, - }, - proximity_precision: ProximityPrecisionAnalytics { - set: self.proximity_precision.set | other.proximity_precision.set, - value: self.proximity_precision.value.or(other.proximity_precision.value), - }, - typo_tolerance: TypoToleranceAnalytics { - enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), - disable_on_attributes: self - .typo_tolerance - .disable_on_attributes - .or(other.typo_tolerance.disable_on_attributes), - disable_on_words: self - .typo_tolerance - .disable_on_words - .or(other.typo_tolerance.disable_on_words), - min_word_size_for_one_typo: self - .typo_tolerance - .min_word_size_for_one_typo - .or(other.typo_tolerance.min_word_size_for_one_typo), - min_word_size_for_two_typos: self - .typo_tolerance - .min_word_size_for_two_typos - .or(other.typo_tolerance.min_word_size_for_two_typos), - }, - faceting: FacetingAnalytics { - max_values_per_facet: self - .faceting - .max_values_per_facet - .or(other.faceting.max_values_per_facet), - sort_facet_values_by_star_count: self - .faceting - .sort_facet_values_by_star_count - .or(other.faceting.sort_facet_values_by_star_count), - sort_facet_values_by_total: self - .faceting - .sort_facet_values_by_total - .or(other.faceting.sort_facet_values_by_total), - }, - pagination: PaginationAnalytics { - max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits), - }, - stop_words: StopWordsAnalytics { - total: self.stop_words.total.or(other.stop_words.total), - }, - synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) }, - embedders: EmbeddersAnalytics { - total: self.embedders.total.or(other.embedders.total), - sources: match (self.embedders.sources, other.embedders.sources) { - (None, None) => None, - (Some(sources), None) | (None, Some(sources)) => Some(sources), - (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), - }, - document_template_used: match ( - self.embedders.document_template_used, - other.embedders.document_template_used, - ) { - (None, None) => None, - (Some(used), None) | (None, Some(used)) => Some(used), - (Some(this), Some(other)) => Some(this | other), - }, - document_template_max_bytes: match ( - self.embedders.document_template_max_bytes, - other.embedders.document_template_max_bytes, - ) { - (None, None) => None, - (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), - (Some(this), Some(other)) => Some(this.max(other)), - }, - binary_quantization_used: match ( - self.embedders.binary_quantization_used, - other.embedders.binary_quantization_used, - ) { - (None, None) => None, - (Some(bq), None) | (None, Some(bq)) => Some(bq), - (Some(this), Some(other)) => Some(this | other), - }, - }, - search_cutoff_ms: SearchCutoffMsAnalytics { - search_cutoff_ms: self - .search_cutoff_ms - .search_cutoff_ms - .or(other.search_cutoff_ms.search_cutoff_ms), - }, - locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) }, - dictionary: DictionaryAnalytics { - total: self.dictionary.total.or(other.dictionary.total), - }, - separator_tokens: SeparatorTokensAnalytics { - total: self.separator_tokens.total.or(other.non_separator_tokens.total), - }, - non_separator_tokens: NonSeparatorTokensAnalytics { - total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), - }, - }) - } - - fn into_event(self: Box) -> serde_json::Value { - serde_json::to_value(*self).unwrap_or_default() - } -} - -#[derive(Serialize, Default)] -struct RankingRulesAnalytics { - words_position: Option, - typo_position: Option, - proximity_position: Option, - attribute_position: Option, - sort_position: Option, - exactness_position: Option, - values: Option, -} - -impl RankingRulesAnalytics { - pub fn new(rr: Option<&Vec>) -> Self { - RankingRulesAnalytics { - words_position: rr.as_ref().and_then(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) - }), - typo_position: rr.as_ref().and_then(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) - }), - proximity_position: rr.as_ref().and_then(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) - }) - }), - attribute_position: rr.as_ref().and_then(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) - }) - }), - sort_position: rr.as_ref().and_then(|rr| { - rr.iter() - .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) - }), - exactness_position: rr.as_ref().and_then(|rr| { - rr.iter().position(|s| { - matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) - }) - }), - values: rr.as_ref().map(|rr| { - rr.iter() - .filter(|s| { - matches!( - s, - meilisearch_types::settings::RankingRuleView::Asc(_) - | meilisearch_types::settings::RankingRuleView::Desc(_) - ) - }) - .map(|x| x.to_string()) - .collect::>() - .join(", ") - }), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { ranking_rules: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SearchableAttributesAnalytics { - total: Option, - with_wildcard: Option, -} - -impl SearchableAttributesAnalytics { - pub fn new(setting: Option<&Vec>) -> Self { - Self { - total: setting.as_ref().map(|searchable| searchable.len()), - with_wildcard: setting - .as_ref() - .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { searchable_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct DisplayedAttributesAnalytics { - total: Option, - with_wildcard: Option, -} - -impl DisplayedAttributesAnalytics { - pub fn new(displayed: Option<&Vec>) -> Self { - Self { - total: displayed.as_ref().map(|displayed| displayed.len()), - with_wildcard: displayed - .as_ref() - .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { displayed_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SortableAttributesAnalytics { - total: Option, - has_geo: Option, -} - -impl SortableAttributesAnalytics { - pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { - Self { - total: setting.as_ref().map(|sort| sort.len()), - has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { sortable_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct FilterableAttributesAnalytics { - total: Option, - has_geo: Option, -} - -impl FilterableAttributesAnalytics { - pub fn new(setting: Option<&std::collections::BTreeSet>) -> Self { - Self { - total: setting.as_ref().map(|filter| filter.len()), - has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { filterable_attributes: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct DistinctAttributeAnalytics { - set: bool, -} - -impl DistinctAttributeAnalytics { - pub fn new(distinct: Option<&String>) -> Self { - Self { set: distinct.is_some() } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { distinct_attribute: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct ProximityPrecisionAnalytics { - set: bool, - value: Option, -} - -impl ProximityPrecisionAnalytics { - pub fn new(precision: Option<&meilisearch_types::settings::ProximityPrecisionView>) -> Self { - Self { set: precision.is_some(), value: precision.cloned() } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { proximity_precision: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct TypoToleranceAnalytics { - enabled: Option, - disable_on_attributes: Option, - disable_on_words: Option, - min_word_size_for_one_typo: Option, - min_word_size_for_two_typos: Option, -} - -impl TypoToleranceAnalytics { - pub fn new(setting: Option<&meilisearch_types::settings::TypoSettings>) -> Self { - Self { - enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), - disable_on_attributes: setting - .as_ref() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - disable_on_words: setting - .as_ref() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - min_word_size_for_one_typo: setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) - .flatten(), - min_word_size_for_two_typos: setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) - .flatten(), - } - } - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { typo_tolerance: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct FacetingAnalytics { - max_values_per_facet: Option, - sort_facet_values_by_star_count: Option, - sort_facet_values_by_total: Option, -} - -impl FacetingAnalytics { - pub fn new(setting: Option<&meilisearch_types::settings::FacetingSettings>) -> Self { - Self { - max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), - sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { - s.sort_facet_values_by - .as_ref() - .set() - .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - sort_facet_values_by_total: setting - .as_ref() - .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { faceting: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct PaginationAnalytics { - max_total_hits: Option, -} - -impl PaginationAnalytics { - pub fn new(setting: Option<&meilisearch_types::settings::PaginationSettings>) -> Self { - Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { pagination: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct StopWordsAnalytics { - total: Option, -} - -impl StopWordsAnalytics { - pub fn new(stop_words: Option<&BTreeSet>) -> Self { - Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { stop_words: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SynonymsAnalytics { - total: Option, -} - -impl SynonymsAnalytics { - pub fn new(synonyms: Option<&std::collections::BTreeMap>>) -> Self { - Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { synonyms: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct EmbeddersAnalytics { - // last - total: Option, - // Merge the sources - sources: Option>, - // |= - document_template_used: Option, - // max - document_template_max_bytes: Option, - // |= - binary_quantization_used: Option, -} - -impl EmbeddersAnalytics { - pub fn new( - setting: Option< - &std::collections::BTreeMap< - String, - Setting, - >, - >, - ) -> Self { - let mut sources = std::collections::HashSet::new(); - - if let Some(s) = &setting { - for source in s - .values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.source.set()) - { - use meilisearch_types::milli::vector::settings::EmbedderSource; - match source { - EmbedderSource::OpenAi => sources.insert("openAi".to_string()), - EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()), - EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), - EmbedderSource::Ollama => sources.insert("ollama".to_string()), - EmbedderSource::Rest => sources.insert("rest".to_string()), - }; - } - }; - - Self { - total: setting.as_ref().map(|s| s.len()), - sources: Some(sources), - document_template_used: setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.document_template.set().is_some()) - }), - document_template_max_bytes: setting.as_ref().and_then(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.document_template_max_bytes.set()) - .max() - }), - binary_quantization_used: setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.binary_quantized.set().is_some()) - }), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { embedders: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -#[serde(transparent)] -struct SearchCutoffMsAnalytics { - search_cutoff_ms: Option, -} - -impl SearchCutoffMsAnalytics { - pub fn new(setting: Option<&u64>) -> Self { - Self { search_cutoff_ms: setting.copied() } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -#[serde(transparent)] -struct LocalesAnalytics { - locales: Option>, -} - -impl LocalesAnalytics { - pub fn new( - rules: Option<&Vec>, - ) -> Self { - LocalesAnalytics { - locales: rules.as_ref().map(|rules| { - rules - .iter() - .flat_map(|rule| rule.locales.iter().cloned()) - .collect::>() - }), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { locales: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct DictionaryAnalytics { - total: Option, -} - -impl DictionaryAnalytics { - pub fn new(dictionary: Option<&std::collections::BTreeSet>) -> Self { - Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { dictionary: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct SeparatorTokensAnalytics { - total: Option, -} - -impl SeparatorTokensAnalytics { - pub fn new(separator_tokens: Option<&std::collections::BTreeSet>) -> Self { - Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { separator_tokens: self, ..Default::default() } - } -} - -#[derive(Serialize, Default)] -struct NonSeparatorTokensAnalytics { - total: Option, -} - -impl NonSeparatorTokensAnalytics { - pub fn new(non_separator_tokens: Option<&std::collections::BTreeSet>) -> Self { - Self { - total: non_separator_tokens - .as_ref() - .map(|non_separator_tokens| non_separator_tokens.len()), - } - } - - pub fn into_settings(self) -> SettingsAnalytics { - SettingsAnalytics { non_separator_tokens: self, ..Default::default() } - } -} - pub async fn update_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, diff --git a/meilisearch/src/routes/indexes/settings_analytics.rs b/meilisearch/src/routes/indexes/settings_analytics.rs new file mode 100644 index 000000000..636ef3c57 --- /dev/null +++ b/meilisearch/src/routes/indexes/settings_analytics.rs @@ -0,0 +1,627 @@ +//! All the structures used to make the analytics on the settings works. +//! The signatures of the `new` functions are not very rust idiomatic because they must match the types received +//! through the sub-settings route directly without any manipulation. +//! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`. + +use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::vector::settings::EmbeddingSettings; +use meilisearch_types::settings::{ + FacetingSettings, PaginationSettings, ProximityPrecisionView, TypoSettings, +}; +use meilisearch_types::{facet_values_sort::FacetValuesSort, settings::RankingRuleView}; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet, HashSet}; + +use crate::analytics::Aggregate; + +#[derive(Serialize, Default)] +pub struct SettingsAnalytics { + pub ranking_rules: RankingRulesAnalytics, + pub searchable_attributes: SearchableAttributesAnalytics, + pub displayed_attributes: DisplayedAttributesAnalytics, + pub sortable_attributes: SortableAttributesAnalytics, + pub filterable_attributes: FilterableAttributesAnalytics, + pub distinct_attribute: DistinctAttributeAnalytics, + pub proximity_precision: ProximityPrecisionAnalytics, + pub typo_tolerance: TypoToleranceAnalytics, + pub faceting: FacetingAnalytics, + pub pagination: PaginationAnalytics, + pub stop_words: StopWordsAnalytics, + pub synonyms: SynonymsAnalytics, + pub embedders: EmbeddersAnalytics, + pub search_cutoff_ms: SearchCutoffMsAnalytics, + pub locales: LocalesAnalytics, + pub dictionary: DictionaryAnalytics, + pub separator_tokens: SeparatorTokensAnalytics, + pub non_separator_tokens: NonSeparatorTokensAnalytics, +} + +impl Aggregate for SettingsAnalytics { + fn event_name(&self) -> &'static str { + "Settings Updated" + } + + fn aggregate(self: Box, other: Box) -> Box { + Box::new(Self { + ranking_rules: RankingRulesAnalytics { + words_position: self + .ranking_rules + .words_position + .or(other.ranking_rules.words_position), + typo_position: self + .ranking_rules + .typo_position + .or(other.ranking_rules.typo_position), + proximity_position: self + .ranking_rules + .proximity_position + .or(other.ranking_rules.proximity_position), + attribute_position: self + .ranking_rules + .attribute_position + .or(other.ranking_rules.attribute_position), + sort_position: self + .ranking_rules + .sort_position + .or(other.ranking_rules.sort_position), + exactness_position: self + .ranking_rules + .exactness_position + .or(other.ranking_rules.exactness_position), + values: self.ranking_rules.values.or(other.ranking_rules.values), + }, + searchable_attributes: SearchableAttributesAnalytics { + total: self.searchable_attributes.total.or(other.searchable_attributes.total), + with_wildcard: self + .searchable_attributes + .with_wildcard + .or(other.searchable_attributes.with_wildcard), + }, + displayed_attributes: DisplayedAttributesAnalytics { + total: self.displayed_attributes.total.or(other.displayed_attributes.total), + with_wildcard: self + .displayed_attributes + .with_wildcard + .or(other.displayed_attributes.with_wildcard), + }, + sortable_attributes: SortableAttributesAnalytics { + total: self.sortable_attributes.total.or(other.sortable_attributes.total), + has_geo: self.sortable_attributes.has_geo.or(other.sortable_attributes.has_geo), + }, + filterable_attributes: FilterableAttributesAnalytics { + total: self.filterable_attributes.total.or(other.filterable_attributes.total), + has_geo: self.filterable_attributes.has_geo.or(other.filterable_attributes.has_geo), + }, + distinct_attribute: DistinctAttributeAnalytics { + set: self.distinct_attribute.set | other.distinct_attribute.set, + }, + proximity_precision: ProximityPrecisionAnalytics { + set: self.proximity_precision.set | other.proximity_precision.set, + value: self.proximity_precision.value.or(other.proximity_precision.value), + }, + typo_tolerance: TypoToleranceAnalytics { + enabled: self.typo_tolerance.enabled.or(other.typo_tolerance.enabled), + disable_on_attributes: self + .typo_tolerance + .disable_on_attributes + .or(other.typo_tolerance.disable_on_attributes), + disable_on_words: self + .typo_tolerance + .disable_on_words + .or(other.typo_tolerance.disable_on_words), + min_word_size_for_one_typo: self + .typo_tolerance + .min_word_size_for_one_typo + .or(other.typo_tolerance.min_word_size_for_one_typo), + min_word_size_for_two_typos: self + .typo_tolerance + .min_word_size_for_two_typos + .or(other.typo_tolerance.min_word_size_for_two_typos), + }, + faceting: FacetingAnalytics { + max_values_per_facet: self + .faceting + .max_values_per_facet + .or(other.faceting.max_values_per_facet), + sort_facet_values_by_star_count: self + .faceting + .sort_facet_values_by_star_count + .or(other.faceting.sort_facet_values_by_star_count), + sort_facet_values_by_total: self + .faceting + .sort_facet_values_by_total + .or(other.faceting.sort_facet_values_by_total), + }, + pagination: PaginationAnalytics { + max_total_hits: self.pagination.max_total_hits.or(other.pagination.max_total_hits), + }, + stop_words: StopWordsAnalytics { + total: self.stop_words.total.or(other.stop_words.total), + }, + synonyms: SynonymsAnalytics { total: self.synonyms.total.or(other.synonyms.total) }, + embedders: EmbeddersAnalytics { + total: self.embedders.total.or(other.embedders.total), + sources: match (self.embedders.sources, other.embedders.sources) { + (None, None) => None, + (Some(sources), None) | (None, Some(sources)) => Some(sources), + (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), + }, + document_template_used: match ( + self.embedders.document_template_used, + other.embedders.document_template_used, + ) { + (None, None) => None, + (Some(used), None) | (None, Some(used)) => Some(used), + (Some(this), Some(other)) => Some(this | other), + }, + document_template_max_bytes: match ( + self.embedders.document_template_max_bytes, + other.embedders.document_template_max_bytes, + ) { + (None, None) => None, + (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), + (Some(this), Some(other)) => Some(this.max(other)), + }, + binary_quantization_used: match ( + self.embedders.binary_quantization_used, + other.embedders.binary_quantization_used, + ) { + (None, None) => None, + (Some(bq), None) | (None, Some(bq)) => Some(bq), + (Some(this), Some(other)) => Some(this | other), + }, + }, + search_cutoff_ms: SearchCutoffMsAnalytics { + search_cutoff_ms: self + .search_cutoff_ms + .search_cutoff_ms + .or(other.search_cutoff_ms.search_cutoff_ms), + }, + locales: LocalesAnalytics { locales: self.locales.locales.or(other.locales.locales) }, + dictionary: DictionaryAnalytics { + total: self.dictionary.total.or(other.dictionary.total), + }, + separator_tokens: SeparatorTokensAnalytics { + total: self.separator_tokens.total.or(other.non_separator_tokens.total), + }, + non_separator_tokens: NonSeparatorTokensAnalytics { + total: self.non_separator_tokens.total.or(other.non_separator_tokens.total), + }, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + +#[derive(Serialize, Default)] +pub struct RankingRulesAnalytics { + pub words_position: Option, + pub typo_position: Option, + pub proximity_position: Option, + pub attribute_position: Option, + pub sort_position: Option, + pub exactness_position: Option, + pub values: Option, +} + +impl RankingRulesAnalytics { + pub fn new(rr: Option<&Vec>) -> Self { + RankingRulesAnalytics { + words_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) + }), + typo_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) + }), + proximity_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) + }) + }), + attribute_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) + }) + }), + sort_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) + }), + exactness_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) + }) + }), + values: rr.as_ref().map(|rr| { + rr.iter() + .filter(|s| { + matches!( + s, + meilisearch_types::settings::RankingRuleView::Asc(_) + | meilisearch_types::settings::RankingRuleView::Desc(_) + ) + }) + .map(|x| x.to_string()) + .collect::>() + .join(", ") + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { ranking_rules: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SearchableAttributesAnalytics { + pub total: Option, + pub with_wildcard: Option, +} + +impl SearchableAttributesAnalytics { + pub fn new(setting: Option<&Vec>) -> Self { + Self { + total: setting.as_ref().map(|searchable| searchable.len()), + with_wildcard: setting + .as_ref() + .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { searchable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DisplayedAttributesAnalytics { + pub total: Option, + pub with_wildcard: Option, +} + +impl DisplayedAttributesAnalytics { + pub fn new(displayed: Option<&Vec>) -> Self { + Self { + total: displayed.as_ref().map(|displayed| displayed.len()), + with_wildcard: displayed + .as_ref() + .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { displayed_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SortableAttributesAnalytics { + pub total: Option, + pub has_geo: Option, +} + +impl SortableAttributesAnalytics { + pub fn new(setting: Option<&BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|sort| sort.len()), + has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { sortable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct FilterableAttributesAnalytics { + pub total: Option, + pub has_geo: Option, +} + +impl FilterableAttributesAnalytics { + pub fn new(setting: Option<&BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|filter| filter.len()), + has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { filterable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DistinctAttributeAnalytics { + pub set: bool, +} + +impl DistinctAttributeAnalytics { + pub fn new(distinct: Option<&String>) -> Self { + Self { set: distinct.is_some() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { distinct_attribute: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct ProximityPrecisionAnalytics { + pub set: bool, + pub value: Option, +} + +impl ProximityPrecisionAnalytics { + pub fn new(precision: Option<&ProximityPrecisionView>) -> Self { + Self { set: precision.is_some(), value: precision.cloned() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { proximity_precision: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct TypoToleranceAnalytics { + pub enabled: Option, + pub disable_on_attributes: Option, + pub disable_on_words: Option, + pub min_word_size_for_one_typo: Option, + pub min_word_size_for_two_typos: Option, +} + +impl TypoToleranceAnalytics { + pub fn new(setting: Option<&TypoSettings>) -> Self { + Self { + enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), + disable_on_attributes: setting + .as_ref() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + disable_on_words: setting + .as_ref() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + min_word_size_for_one_typo: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) + .flatten(), + min_word_size_for_two_typos: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) + .flatten(), + } + } + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { typo_tolerance: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct FacetingAnalytics { + pub max_values_per_facet: Option, + pub sort_facet_values_by_star_count: Option, + pub sort_facet_values_by_total: Option, +} + +impl FacetingAnalytics { + pub fn new(setting: Option<&FacetingSettings>) -> Self { + Self { + max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), + sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { + s.sort_facet_values_by + .as_ref() + .set() + .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) + }), + sort_facet_values_by_total: setting + .as_ref() + .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { faceting: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct PaginationAnalytics { + pub max_total_hits: Option, +} + +impl PaginationAnalytics { + pub fn new(setting: Option<&PaginationSettings>) -> Self { + Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { pagination: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct StopWordsAnalytics { + pub total: Option, +} + +impl StopWordsAnalytics { + pub fn new(stop_words: Option<&BTreeSet>) -> Self { + Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { stop_words: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SynonymsAnalytics { + pub total: Option, +} + +impl SynonymsAnalytics { + pub fn new(synonyms: Option<&BTreeMap>>) -> Self { + Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { synonyms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct EmbeddersAnalytics { + // last + pub total: Option, + // Merge the sources + pub sources: Option>, + // |= + pub document_template_used: Option, + // max + pub document_template_max_bytes: Option, + // |= + pub binary_quantization_used: Option, +} + +impl EmbeddersAnalytics { + pub fn new(setting: Option<&BTreeMap>>) -> Self { + let mut sources = std::collections::HashSet::new(); + + if let Some(s) = &setting { + for source in s + .values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.source.set()) + { + use meilisearch_types::milli::vector::settings::EmbedderSource; + match source { + EmbedderSource::OpenAi => sources.insert("openAi".to_string()), + EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()), + EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), + EmbedderSource::Ollama => sources.insert("ollama".to_string()), + EmbedderSource::Rest => sources.insert("rest".to_string()), + }; + } + }; + + Self { + total: setting.as_ref().map(|s| s.len()), + sources: Some(sources), + document_template_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.document_template.set().is_some()) + }), + document_template_max_bytes: setting.as_ref().and_then(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.document_template_max_bytes.set()) + .max() + }), + binary_quantization_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { embedders: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +pub struct SearchCutoffMsAnalytics { + pub search_cutoff_ms: Option, +} + +impl SearchCutoffMsAnalytics { + pub fn new(setting: Option<&u64>) -> Self { + Self { search_cutoff_ms: setting.copied() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +pub struct LocalesAnalytics { + pub locales: Option>, +} + +impl LocalesAnalytics { + pub fn new(rules: Option<&Vec>) -> Self { + LocalesAnalytics { + locales: rules.as_ref().map(|rules| { + rules + .iter() + .flat_map(|rule| rule.locales.iter().cloned()) + .collect::>() + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { locales: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DictionaryAnalytics { + pub total: Option, +} + +impl DictionaryAnalytics { + pub fn new(dictionary: Option<&BTreeSet>) -> Self { + Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { dictionary: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SeparatorTokensAnalytics { + pub total: Option, +} + +impl SeparatorTokensAnalytics { + pub fn new(separator_tokens: Option<&BTreeSet>) -> Self { + Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { separator_tokens: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct NonSeparatorTokensAnalytics { + pub total: Option, +} + +impl NonSeparatorTokensAnalytics { + pub fn new(non_separator_tokens: Option<&BTreeSet>) -> Self { + Self { + total: non_separator_tokens + .as_ref() + .map(|non_separator_tokens| non_separator_tokens.len()), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { non_separator_tokens: self, ..Default::default() } + } +}