From 4c641b79a2aa72ebfd07baf69318bcc28cebe271 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Mon, 11 Sep 2023 19:49:09 +0530 Subject: [PATCH] use rust struct destructuring for SearchAggregator --- .../src/analytics/segment_analytics.rs | 268 ++++++++++++------ 1 file changed, 185 insertions(+), 83 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 28e715852..45773603b 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -615,19 +615,43 @@ pub struct SearchAggregator { impl SearchAggregator { pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self { + let SearchQuery { + q, + vector, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve: _, + attributes_to_crop: _, + crop_length, + attributes_to_highlight: _, + show_matches_position, + show_ranking_score, + show_ranking_score_details, + filter, + sort, + facets: _, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + attributes_to_search_on, + } = query; + let mut ret = Self::default(); ret.timestamp = Some(OffsetDateTime::now_utc()); ret.total_received = 1; ret.user_agents = extract_user_agents(request).into_iter().collect(); - if let Some(ref sort) = query.sort { + if let Some(ref sort) = sort { ret.sort_total_number_of_criteria = 1; ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint(")); ret.sort_sum_of_criteria_terms = sort.len(); } - if let Some(ref filter) = query.filter { + if let Some(ref filter) = filter { static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); ret.filter_total_number_of_criteria = 1; @@ -652,80 +676,124 @@ impl SearchAggregator { } // attributes_to_search_on - if let Some(_) = query.attributes_to_search_on { + if let Some(_) = attributes_to_search_on { ret.attributes_to_search_on_total_number_of_uses = 1; } - if let Some(ref q) = query.q { + if let Some(ref q) = q { ret.max_terms_number = q.split_whitespace().count(); } - if let Some(ref vector) = query.vector { + if let Some(ref vector) = vector { ret.max_vector_size = vector.len(); } if query.is_finite_pagination() { - let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); ret.max_limit = limit; - ret.max_offset = query.page.unwrap_or(1).saturating_sub(1) * limit; + ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit; ret.finite_pagination = 1; } else { - ret.max_limit = query.limit; - ret.max_offset = query.offset; + ret.max_limit = *limit; + ret.max_offset = *offset; ret.finite_pagination = 0; } - ret.matching_strategy.insert(format!("{:?}", query.matching_strategy), 1); + ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1); - ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); - ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); - ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER(); - ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH(); - ret.show_matches_position = query.show_matches_position; + ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); + ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); + ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER(); + ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH(); + ret.show_matches_position = *show_matches_position; - ret.show_ranking_score = query.show_ranking_score; - ret.show_ranking_score_details = query.show_ranking_score_details; + ret.show_ranking_score = *show_ranking_score; + ret.show_ranking_score_details = *show_ranking_score_details; ret } pub fn succeed(&mut self, result: &SearchResult) { + let SearchResult { + hits: _, + query: _, + vector: _, + processing_time_ms, + hits_info: _, + facet_distribution: _, + facet_stats: _, + } = result; + self.total_succeeded = self.total_succeeded.saturating_add(1); - self.time_spent.push(result.processing_time_ms as usize); + self.time_spent.push(*processing_time_ms as usize); } /// Aggregate one [SearchAggregator] into another. pub fn aggregate(&mut self, mut other: Self) { + let Self { + timestamp, + user_agents, + total_received, + total_succeeded, + ref mut time_spent, + sort_with_geo_point, + sort_sum_of_criteria_terms, + sort_total_number_of_criteria, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + attributes_to_search_on_total_number_of_uses, + max_terms_number, + max_vector_size, + matching_strategy, + max_limit, + max_offset, + finite_pagination, + max_attributes_to_retrieve, + max_attributes_to_highlight, + highlight_pre_tag, + highlight_post_tag, + max_attributes_to_crop, + crop_marker, + show_matches_position, + crop_length, + facets_sum_of_terms, + facets_total_number_of_facets, + show_ranking_score, + show_ranking_score_details, + } = other; + if self.timestamp.is_none() { - self.timestamp = other.timestamp; + self.timestamp = timestamp; } // context - for user_agent in other.user_agents.into_iter() { + for user_agent in user_agents.into_iter() { self.user_agents.insert(user_agent); } // request - self.total_received = self.total_received.saturating_add(other.total_received); - self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded); - self.time_spent.append(&mut other.time_spent); + self.total_received = self.total_received.saturating_add(total_received); + self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.time_spent.append(time_spent); // sort - self.sort_with_geo_point |= other.sort_with_geo_point; + self.sort_with_geo_point |= sort_with_geo_point; self.sort_sum_of_criteria_terms = - self.sort_sum_of_criteria_terms.saturating_add(other.sort_sum_of_criteria_terms); + self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms); self.sort_total_number_of_criteria = - self.sort_total_number_of_criteria.saturating_add(other.sort_total_number_of_criteria); + self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); // filter - self.filter_with_geo_radius |= other.filter_with_geo_radius; - self.filter_with_geo_bounding_box |= other.filter_with_geo_bounding_box; + self.filter_with_geo_radius |= filter_with_geo_radius; + self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; self.filter_sum_of_criteria_terms = - self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms); - self.filter_total_number_of_criteria = self - .filter_total_number_of_criteria - .saturating_add(other.filter_total_number_of_criteria); - for (key, value) in other.used_syntax.into_iter() { + self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); + self.filter_total_number_of_criteria = + self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); + for (key, value) in used_syntax.into_iter() { let used_syntax = self.used_syntax.entry(key).or_insert(0); *used_syntax = used_syntax.saturating_add(value); } @@ -733,115 +801,149 @@ impl SearchAggregator { // attributes_to_search_on self.attributes_to_search_on_total_number_of_uses = self .attributes_to_search_on_total_number_of_uses - .saturating_add(other.attributes_to_search_on_total_number_of_uses); + .saturating_add(attributes_to_search_on_total_number_of_uses); // q - self.max_terms_number = self.max_terms_number.max(other.max_terms_number); + self.max_terms_number = self.max_terms_number.max(max_terms_number); // vector - self.max_vector_size = self.max_vector_size.max(other.max_vector_size); + self.max_vector_size = self.max_vector_size.max(max_vector_size); // pagination - self.max_limit = self.max_limit.max(other.max_limit); - self.max_offset = self.max_offset.max(other.max_offset); - self.finite_pagination += other.finite_pagination; + self.max_limit = self.max_limit.max(max_limit); + self.max_offset = self.max_offset.max(max_offset); + self.finite_pagination += finite_pagination; // formatting self.max_attributes_to_retrieve = - self.max_attributes_to_retrieve.max(other.max_attributes_to_retrieve); + self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); self.max_attributes_to_highlight = - self.max_attributes_to_highlight.max(other.max_attributes_to_highlight); - self.highlight_pre_tag |= other.highlight_pre_tag; - self.highlight_post_tag |= other.highlight_post_tag; - self.max_attributes_to_crop = self.max_attributes_to_crop.max(other.max_attributes_to_crop); - self.crop_marker |= other.crop_marker; - self.show_matches_position |= other.show_matches_position; - self.crop_length |= other.crop_length; + self.max_attributes_to_highlight.max(max_attributes_to_highlight); + self.highlight_pre_tag |= highlight_pre_tag; + self.highlight_post_tag |= highlight_post_tag; + self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop); + self.crop_marker |= crop_marker; + self.show_matches_position |= show_matches_position; + self.crop_length |= crop_length; // facets - self.facets_sum_of_terms = - self.facets_sum_of_terms.saturating_add(other.facets_sum_of_terms); + self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms); self.facets_total_number_of_facets = - self.facets_total_number_of_facets.saturating_add(other.facets_total_number_of_facets); + self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets); // matching strategy - for (key, value) in other.matching_strategy.into_iter() { + for (key, value) in matching_strategy.into_iter() { let matching_strategy = self.matching_strategy.entry(key).or_insert(0); *matching_strategy = matching_strategy.saturating_add(value); } // scoring - self.show_ranking_score |= other.show_ranking_score; - self.show_ranking_score_details |= other.show_ranking_score_details; + self.show_ranking_score |= show_ranking_score; + self.show_ranking_score_details |= show_ranking_score_details; } pub fn into_event(self, user: &User, event_name: &str) -> Option { + let Self { + timestamp, + user_agents, + total_received, + total_succeeded, + time_spent, + sort_with_geo_point, + sort_sum_of_criteria_terms, + sort_total_number_of_criteria, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + attributes_to_search_on_total_number_of_uses, + max_terms_number, + max_vector_size, + matching_strategy, + max_limit, + max_offset, + finite_pagination, + max_attributes_to_retrieve, + max_attributes_to_highlight, + highlight_pre_tag, + highlight_post_tag, + max_attributes_to_crop, + crop_marker, + show_matches_position, + crop_length, + facets_sum_of_terms, + facets_total_number_of_facets, + show_ranking_score, + show_ranking_score_details, + } = self; + if self.total_received == 0 { None } else { // we get all the values in a sorted manner - let time_spent = self.time_spent.into_sorted_vec(); + let time_spent = time_spent.into_sorted_vec(); // the index of the 99th percentage of value let percentile_99th = time_spent.len() * 99 / 100; // We are only interested by the slowest value of the 99th fastest results let time_spent = time_spent.get(percentile_99th); let properties = json!({ - "user-agent": self.user_agents, + "user-agent": user_agents, "requests": { "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": self.total_succeeded, - "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics - "total_received": self.total_received, + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, }, "sort": { - "with_geoPoint": self.sort_with_geo_point, - "avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64), + "with_geoPoint": sort_with_geo_point, + "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), }, "filter": { - "with_geoRadius": self.filter_with_geo_radius, - "with_geoBoundingBox": self.filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64), - "most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), }, "attributes_to_search_on": { - "total_number_of_uses": self.attributes_to_search_on_total_number_of_uses, + "total_number_of_uses": attributes_to_search_on_total_number_of_uses, }, "q": { - "max_terms_number": self.max_terms_number, + "max_terms_number": max_terms_number, }, "vector": { - "max_vector_size": self.max_vector_size, + "max_vector_size": max_vector_size, }, "pagination": { - "max_limit": self.max_limit, - "max_offset": self.max_offset, - "most_used_navigation": if self.finite_pagination > (self.total_received / 2) { "exhaustive" } else { "estimated" }, + "max_limit": max_limit, + "max_offset": max_offset, + "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, }, "formatting": { - "max_attributes_to_retrieve": self.max_attributes_to_retrieve, - "max_attributes_to_highlight": self.max_attributes_to_highlight, - "highlight_pre_tag": self.highlight_pre_tag, - "highlight_post_tag": self.highlight_post_tag, - "max_attributes_to_crop": self.max_attributes_to_crop, - "crop_marker": self.crop_marker, - "show_matches_position": self.show_matches_position, - "crop_length": self.crop_length, + "max_attributes_to_retrieve": max_attributes_to_retrieve, + "max_attributes_to_highlight": max_attributes_to_highlight, + "highlight_pre_tag": highlight_pre_tag, + "highlight_post_tag": highlight_post_tag, + "max_attributes_to_crop": max_attributes_to_crop, + "crop_marker": crop_marker, + "show_matches_position": show_matches_position, + "crop_length": crop_length, }, "facets": { - "avg_facets_number": format!("{:.2}", self.facets_sum_of_terms as f64 / self.facets_total_number_of_facets as f64), + "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), }, "matching_strategy": { - "most_used_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), }, "scoring": { - "show_ranking_score": self.show_ranking_score, - "show_ranking_score_details": self.show_ranking_score_details, + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, }, }); Some(Track { - timestamp: self.timestamp, + timestamp: timestamp, user: user.clone(), event: event_name.to_string(), properties,