mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
move all the searches structures to new modules
This commit is contained in:
parent
af589c85ec
commit
5675585fe8
@ -15,13 +15,9 @@ use platform_dirs::AppDirs;
|
|||||||
|
|
||||||
// if the feature analytics is enabled we use the real analytics
|
// if the feature analytics is enabled we use the real analytics
|
||||||
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||||
pub use segment_analytics::SearchAggregator;
|
|
||||||
pub use segment_analytics::SimilarAggregator;
|
|
||||||
|
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
pub use self::segment_analytics::MultiSearchAggregator;
|
|
||||||
|
|
||||||
/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
|
/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! empty_analytics {
|
macro_rules! empty_analytics {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::any::TypeId;
|
use std::any::TypeId;
|
||||||
use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@ -11,10 +11,8 @@ use byte_unit::Byte;
|
|||||||
use index_scheduler::IndexScheduler;
|
use index_scheduler::IndexScheduler;
|
||||||
use meilisearch_auth::{AuthController, AuthFilter};
|
use meilisearch_auth::{AuthController, AuthFilter};
|
||||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||||
use meilisearch_types::locales::Locale;
|
|
||||||
use meilisearch_types::InstanceUid;
|
use meilisearch_types::InstanceUid;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use regex::Regex;
|
|
||||||
use segment::message::{Identify, Track, User};
|
use segment::message::{Identify, Track, User};
|
||||||
use segment::{AutoBatcher, Batcher, HttpClient};
|
use segment::{AutoBatcher, Batcher, HttpClient};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
@ -25,17 +23,12 @@ use tokio::select;
|
|||||||
use tokio::sync::mpsc::{self, Receiver, Sender};
|
use tokio::sync::mpsc::{self, Receiver, Sender};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::{config_user_id_path, Aggregate, AggregateMethod, MEILISEARCH_CONFIG_PATH};
|
use super::{config_user_id_path, Aggregate, MEILISEARCH_CONFIG_PATH};
|
||||||
use crate::option::{
|
use crate::option::{
|
||||||
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot,
|
||||||
};
|
};
|
||||||
use crate::routes::{create_all_stats, Stats};
|
use crate::routes::{create_all_stats, Stats};
|
||||||
use crate::search::{
|
use crate::Opt;
|
||||||
FederatedSearch, SearchQuery, SearchQueryWithIndex, SearchResult, SimilarQuery, SimilarResult,
|
|
||||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
|
||||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
|
|
||||||
};
|
|
||||||
use crate::{aggregate_methods, Opt};
|
|
||||||
|
|
||||||
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
|
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
|
||||||
|
|
||||||
@ -489,858 +482,3 @@ impl Segment {
|
|||||||
let _ = self.batcher.flush().await;
|
let _ = self.batcher.flush().await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct SearchAggregator<Method: AggregateMethod> {
|
|
||||||
// requests
|
|
||||||
total_received: usize,
|
|
||||||
total_succeeded: usize,
|
|
||||||
total_degraded: usize,
|
|
||||||
total_used_negative_operator: usize,
|
|
||||||
time_spent: BinaryHeap<usize>,
|
|
||||||
|
|
||||||
// sort
|
|
||||||
sort_with_geo_point: bool,
|
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
|
||||||
sort_sum_of_criteria_terms: usize,
|
|
||||||
// every time a request has a filter, this field must be incremented by one
|
|
||||||
sort_total_number_of_criteria: usize,
|
|
||||||
|
|
||||||
// distinct
|
|
||||||
distinct: bool,
|
|
||||||
|
|
||||||
// filter
|
|
||||||
filter_with_geo_radius: bool,
|
|
||||||
filter_with_geo_bounding_box: bool,
|
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
|
||||||
filter_sum_of_criteria_terms: usize,
|
|
||||||
// every time a request has a filter, this field must be incremented by one
|
|
||||||
filter_total_number_of_criteria: usize,
|
|
||||||
used_syntax: HashMap<String, usize>,
|
|
||||||
|
|
||||||
// attributes_to_search_on
|
|
||||||
// every time a search is done using attributes_to_search_on
|
|
||||||
attributes_to_search_on_total_number_of_uses: usize,
|
|
||||||
|
|
||||||
// q
|
|
||||||
// The maximum number of terms in a q request
|
|
||||||
max_terms_number: usize,
|
|
||||||
|
|
||||||
// vector
|
|
||||||
// The maximum number of floats in a vector request
|
|
||||||
max_vector_size: usize,
|
|
||||||
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
|
||||||
semantic_ratio: bool,
|
|
||||||
hybrid: bool,
|
|
||||||
retrieve_vectors: bool,
|
|
||||||
|
|
||||||
// every time a search is done, we increment the counter linked to the used settings
|
|
||||||
matching_strategy: HashMap<String, usize>,
|
|
||||||
|
|
||||||
// List of the unique Locales passed as parameter
|
|
||||||
locales: BTreeSet<Locale>,
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
max_limit: usize,
|
|
||||||
max_offset: usize,
|
|
||||||
finite_pagination: usize,
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
max_attributes_to_retrieve: usize,
|
|
||||||
max_attributes_to_highlight: usize,
|
|
||||||
highlight_pre_tag: bool,
|
|
||||||
highlight_post_tag: bool,
|
|
||||||
max_attributes_to_crop: usize,
|
|
||||||
crop_marker: bool,
|
|
||||||
show_matches_position: bool,
|
|
||||||
crop_length: bool,
|
|
||||||
|
|
||||||
// facets
|
|
||||||
facets_sum_of_terms: usize,
|
|
||||||
facets_total_number_of_facets: usize,
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
show_ranking_score: bool,
|
|
||||||
show_ranking_score_details: bool,
|
|
||||||
ranking_score_threshold: bool,
|
|
||||||
|
|
||||||
marker: std::marker::PhantomData<Method>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|
||||||
#[allow(clippy::field_reassign_with_default)]
|
|
||||||
pub fn from_query(query: &SearchQuery) -> Self {
|
|
||||||
let SearchQuery {
|
|
||||||
q,
|
|
||||||
vector,
|
|
||||||
offset,
|
|
||||||
limit,
|
|
||||||
page,
|
|
||||||
hits_per_page,
|
|
||||||
attributes_to_retrieve: _,
|
|
||||||
retrieve_vectors,
|
|
||||||
attributes_to_crop: _,
|
|
||||||
crop_length,
|
|
||||||
attributes_to_highlight: _,
|
|
||||||
show_matches_position,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
filter,
|
|
||||||
sort,
|
|
||||||
distinct,
|
|
||||||
facets: _,
|
|
||||||
highlight_pre_tag,
|
|
||||||
highlight_post_tag,
|
|
||||||
crop_marker,
|
|
||||||
matching_strategy,
|
|
||||||
attributes_to_search_on,
|
|
||||||
hybrid,
|
|
||||||
ranking_score_threshold,
|
|
||||||
locales,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
let mut ret = Self::default();
|
|
||||||
|
|
||||||
ret.total_received = 1;
|
|
||||||
|
|
||||||
if let Some(ref sort) = sort {
|
|
||||||
ret.sort_total_number_of_criteria = 1;
|
|
||||||
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
|
|
||||||
ret.sort_sum_of_criteria_terms = sort.len();
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.distinct = distinct.is_some();
|
|
||||||
|
|
||||||
if let Some(ref filter) = filter {
|
|
||||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
|
||||||
ret.filter_total_number_of_criteria = 1;
|
|
||||||
|
|
||||||
let syntax = match filter {
|
|
||||||
Value::String(_) => "string".to_string(),
|
|
||||||
Value::Array(values) => {
|
|
||||||
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
|
||||||
"mixed".to_string()
|
|
||||||
} else {
|
|
||||||
"array".to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => "none".to_string(),
|
|
||||||
};
|
|
||||||
// convert the string to a HashMap
|
|
||||||
ret.used_syntax.insert(syntax, 1);
|
|
||||||
|
|
||||||
let stringified_filters = filter.to_string();
|
|
||||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
|
||||||
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
|
||||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
|
||||||
}
|
|
||||||
|
|
||||||
// attributes_to_search_on
|
|
||||||
if attributes_to_search_on.is_some() {
|
|
||||||
ret.attributes_to_search_on_total_number_of_uses = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref q) = q {
|
|
||||||
ret.max_terms_number = q.split_whitespace().count();
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref vector) = vector {
|
|
||||||
ret.max_vector_size = vector.len();
|
|
||||||
}
|
|
||||||
ret.retrieve_vectors |= retrieve_vectors;
|
|
||||||
|
|
||||||
if query.is_finite_pagination() {
|
|
||||||
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
|
||||||
ret.max_limit = limit;
|
|
||||||
ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
|
|
||||||
ret.finite_pagination = 1;
|
|
||||||
} else {
|
|
||||||
ret.max_limit = *limit;
|
|
||||||
ret.max_offset = *offset;
|
|
||||||
ret.finite_pagination = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
|
|
||||||
|
|
||||||
if let Some(locales) = locales {
|
|
||||||
ret.locales = locales.iter().copied().collect();
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
|
||||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
|
||||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
|
||||||
ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
|
|
||||||
ret.show_matches_position = *show_matches_position;
|
|
||||||
|
|
||||||
ret.show_ranking_score = *show_ranking_score;
|
|
||||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
|
||||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
|
||||||
|
|
||||||
if let Some(hybrid) = hybrid {
|
|
||||||
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
|
||||||
ret.hybrid = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn succeed(&mut self, result: &SearchResult) {
|
|
||||||
let SearchResult {
|
|
||||||
hits: _,
|
|
||||||
query: _,
|
|
||||||
processing_time_ms,
|
|
||||||
hits_info: _,
|
|
||||||
semantic_hit_count: _,
|
|
||||||
facet_distribution: _,
|
|
||||||
facet_stats: _,
|
|
||||||
degraded,
|
|
||||||
used_negative_operator,
|
|
||||||
} = result;
|
|
||||||
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
||||||
if *degraded {
|
|
||||||
self.total_degraded = self.total_degraded.saturating_add(1);
|
|
||||||
}
|
|
||||||
if *used_negative_operator {
|
|
||||||
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
|
|
||||||
}
|
|
||||||
self.time_spent.push(*processing_time_ms as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
aggregate_methods!(
|
|
||||||
SearchGET => "Documents Searched GET",
|
|
||||||
SearchPOST => "Documents Searched POST",
|
|
||||||
);
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
Method::event_name()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
mut time_spent,
|
|
||||||
sort_with_geo_point,
|
|
||||||
sort_sum_of_criteria_terms,
|
|
||||||
sort_total_number_of_criteria,
|
|
||||||
distinct,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
attributes_to_search_on_total_number_of_uses,
|
|
||||||
max_terms_number,
|
|
||||||
max_vector_size,
|
|
||||||
retrieve_vectors,
|
|
||||||
matching_strategy,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
finite_pagination,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
max_attributes_to_highlight,
|
|
||||||
highlight_pre_tag,
|
|
||||||
highlight_post_tag,
|
|
||||||
max_attributes_to_crop,
|
|
||||||
crop_marker,
|
|
||||||
show_matches_position,
|
|
||||||
crop_length,
|
|
||||||
facets_sum_of_terms,
|
|
||||||
facets_total_number_of_facets,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
semantic_ratio,
|
|
||||||
hybrid,
|
|
||||||
total_degraded,
|
|
||||||
total_used_negative_operator,
|
|
||||||
ranking_score_threshold,
|
|
||||||
mut locales,
|
|
||||||
marker: _,
|
|
||||||
} = *new;
|
|
||||||
|
|
||||||
// request
|
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
|
||||||
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
|
|
||||||
self.total_used_negative_operator =
|
|
||||||
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
|
|
||||||
self.time_spent.append(&mut time_spent);
|
|
||||||
|
|
||||||
// sort
|
|
||||||
self.sort_with_geo_point |= sort_with_geo_point;
|
|
||||||
self.sort_sum_of_criteria_terms =
|
|
||||||
self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
|
|
||||||
self.sort_total_number_of_criteria =
|
|
||||||
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
|
|
||||||
|
|
||||||
// distinct
|
|
||||||
self.distinct |= distinct;
|
|
||||||
|
|
||||||
// filter
|
|
||||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
|
||||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
|
||||||
self.filter_sum_of_criteria_terms =
|
|
||||||
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
|
||||||
self.filter_total_number_of_criteria =
|
|
||||||
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
|
||||||
for (key, value) in used_syntax.into_iter() {
|
|
||||||
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
|
||||||
*used_syntax = used_syntax.saturating_add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// attributes_to_search_on
|
|
||||||
self.attributes_to_search_on_total_number_of_uses = self
|
|
||||||
.attributes_to_search_on_total_number_of_uses
|
|
||||||
.saturating_add(attributes_to_search_on_total_number_of_uses);
|
|
||||||
|
|
||||||
// q
|
|
||||||
self.max_terms_number = self.max_terms_number.max(max_terms_number);
|
|
||||||
|
|
||||||
// vector
|
|
||||||
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
|
||||||
self.retrieve_vectors |= retrieve_vectors;
|
|
||||||
self.semantic_ratio |= semantic_ratio;
|
|
||||||
self.hybrid |= hybrid;
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
|
||||||
self.max_offset = self.max_offset.max(max_offset);
|
|
||||||
self.finite_pagination += finite_pagination;
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
self.max_attributes_to_retrieve =
|
|
||||||
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
|
||||||
self.max_attributes_to_highlight =
|
|
||||||
self.max_attributes_to_highlight.max(max_attributes_to_highlight);
|
|
||||||
self.highlight_pre_tag |= highlight_pre_tag;
|
|
||||||
self.highlight_post_tag |= highlight_post_tag;
|
|
||||||
self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
|
|
||||||
self.crop_marker |= crop_marker;
|
|
||||||
self.show_matches_position |= show_matches_position;
|
|
||||||
self.crop_length |= crop_length;
|
|
||||||
|
|
||||||
// facets
|
|
||||||
self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
|
|
||||||
self.facets_total_number_of_facets =
|
|
||||||
self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
|
|
||||||
|
|
||||||
// matching strategy
|
|
||||||
for (key, value) in matching_strategy.into_iter() {
|
|
||||||
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
|
|
||||||
*matching_strategy = matching_strategy.saturating_add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
self.show_ranking_score |= show_ranking_score;
|
|
||||||
self.show_ranking_score_details |= show_ranking_score_details;
|
|
||||||
self.ranking_score_threshold |= ranking_score_threshold;
|
|
||||||
|
|
||||||
// locales
|
|
||||||
self.locales.append(&mut locales);
|
|
||||||
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
time_spent,
|
|
||||||
sort_with_geo_point,
|
|
||||||
sort_sum_of_criteria_terms,
|
|
||||||
sort_total_number_of_criteria,
|
|
||||||
distinct,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
attributes_to_search_on_total_number_of_uses,
|
|
||||||
max_terms_number,
|
|
||||||
max_vector_size,
|
|
||||||
retrieve_vectors,
|
|
||||||
matching_strategy,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
finite_pagination,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
max_attributes_to_highlight,
|
|
||||||
highlight_pre_tag,
|
|
||||||
highlight_post_tag,
|
|
||||||
max_attributes_to_crop,
|
|
||||||
crop_marker,
|
|
||||||
show_matches_position,
|
|
||||||
crop_length,
|
|
||||||
facets_sum_of_terms,
|
|
||||||
facets_total_number_of_facets,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
semantic_ratio,
|
|
||||||
hybrid,
|
|
||||||
total_degraded,
|
|
||||||
total_used_negative_operator,
|
|
||||||
ranking_score_threshold,
|
|
||||||
locales,
|
|
||||||
marker: _,
|
|
||||||
} = *self;
|
|
||||||
|
|
||||||
// we get all the values in a sorted manner
|
|
||||||
let time_spent = time_spent.into_sorted_vec();
|
|
||||||
// the index of the 99th percentage of value
|
|
||||||
let percentile_99th = time_spent.len() * 99 / 100;
|
|
||||||
// We are only interested by the slowest value of the 99th fastest results
|
|
||||||
let time_spent = time_spent.get(percentile_99th);
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"requests": {
|
|
||||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
|
||||||
"total_succeeded": total_succeeded,
|
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
||||||
"total_received": total_received,
|
|
||||||
"total_degraded": total_degraded,
|
|
||||||
"total_used_negative_operator": total_used_negative_operator,
|
|
||||||
},
|
|
||||||
"sort": {
|
|
||||||
"with_geoPoint": sort_with_geo_point,
|
|
||||||
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
|
|
||||||
},
|
|
||||||
"distinct": distinct,
|
|
||||||
"filter": {
|
|
||||||
"with_geoRadius": filter_with_geo_radius,
|
|
||||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
|
||||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
|
||||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
||||||
},
|
|
||||||
"attributes_to_search_on": {
|
|
||||||
"total_number_of_uses": attributes_to_search_on_total_number_of_uses,
|
|
||||||
},
|
|
||||||
"q": {
|
|
||||||
"max_terms_number": max_terms_number,
|
|
||||||
},
|
|
||||||
"vector": {
|
|
||||||
"max_vector_size": max_vector_size,
|
|
||||||
"retrieve_vectors": retrieve_vectors,
|
|
||||||
},
|
|
||||||
"hybrid": {
|
|
||||||
"enabled": hybrid,
|
|
||||||
"semantic_ratio": semantic_ratio,
|
|
||||||
},
|
|
||||||
"pagination": {
|
|
||||||
"max_limit": max_limit,
|
|
||||||
"max_offset": max_offset,
|
|
||||||
"most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
|
|
||||||
},
|
|
||||||
"formatting": {
|
|
||||||
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
|
||||||
"max_attributes_to_highlight": max_attributes_to_highlight,
|
|
||||||
"highlight_pre_tag": highlight_pre_tag,
|
|
||||||
"highlight_post_tag": highlight_post_tag,
|
|
||||||
"max_attributes_to_crop": max_attributes_to_crop,
|
|
||||||
"crop_marker": crop_marker,
|
|
||||||
"show_matches_position": show_matches_position,
|
|
||||||
"crop_length": crop_length,
|
|
||||||
},
|
|
||||||
"facets": {
|
|
||||||
"avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
|
|
||||||
},
|
|
||||||
"matching_strategy": {
|
|
||||||
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
||||||
},
|
|
||||||
"locales": locales,
|
|
||||||
"scoring": {
|
|
||||||
"show_ranking_score": show_ranking_score,
|
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
|
||||||
"ranking_score_threshold": ranking_score_threshold,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct MultiSearchAggregator {
|
|
||||||
// requests
|
|
||||||
total_received: usize,
|
|
||||||
total_succeeded: usize,
|
|
||||||
|
|
||||||
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
|
|
||||||
total_distinct_index_count: usize,
|
|
||||||
// number of queries with a single index, use with total_received to compute a proportion
|
|
||||||
total_single_index: usize,
|
|
||||||
|
|
||||||
// sum of the number of search queries in the requests, use with total_received to compute an average
|
|
||||||
total_search_count: usize,
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
show_ranking_score: bool,
|
|
||||||
show_ranking_score_details: bool,
|
|
||||||
|
|
||||||
// federation
|
|
||||||
use_federation: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MultiSearchAggregator {
|
|
||||||
pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
|
|
||||||
let use_federation = federated_search.federation.is_some();
|
|
||||||
|
|
||||||
let distinct_indexes: HashSet<_> = federated_search
|
|
||||||
.queries
|
|
||||||
.iter()
|
|
||||||
.map(|query| {
|
|
||||||
let query = &query;
|
|
||||||
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
|
||||||
let SearchQueryWithIndex {
|
|
||||||
index_uid,
|
|
||||||
federation_options: _,
|
|
||||||
q: _,
|
|
||||||
vector: _,
|
|
||||||
offset: _,
|
|
||||||
limit: _,
|
|
||||||
page: _,
|
|
||||||
hits_per_page: _,
|
|
||||||
attributes_to_retrieve: _,
|
|
||||||
retrieve_vectors: _,
|
|
||||||
attributes_to_crop: _,
|
|
||||||
crop_length: _,
|
|
||||||
attributes_to_highlight: _,
|
|
||||||
show_ranking_score: _,
|
|
||||||
show_ranking_score_details: _,
|
|
||||||
show_matches_position: _,
|
|
||||||
filter: _,
|
|
||||||
sort: _,
|
|
||||||
distinct: _,
|
|
||||||
facets: _,
|
|
||||||
highlight_pre_tag: _,
|
|
||||||
highlight_post_tag: _,
|
|
||||||
crop_marker: _,
|
|
||||||
matching_strategy: _,
|
|
||||||
attributes_to_search_on: _,
|
|
||||||
hybrid: _,
|
|
||||||
ranking_score_threshold: _,
|
|
||||||
locales: _,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
index_uid.as_str()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let show_ranking_score =
|
|
||||||
federated_search.queries.iter().any(|query| query.show_ranking_score);
|
|
||||||
let show_ranking_score_details =
|
|
||||||
federated_search.queries.iter().any(|query| query.show_ranking_score_details);
|
|
||||||
|
|
||||||
Self {
|
|
||||||
total_received: 1,
|
|
||||||
total_succeeded: 0,
|
|
||||||
total_distinct_index_count: distinct_indexes.len(),
|
|
||||||
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
|
||||||
total_search_count: federated_search.queries.len(),
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
use_federation,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn succeed(&mut self) {
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Aggregate for MultiSearchAggregator {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
"Documents Searched by Multi-Search POST"
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Aggregate one [MultiSearchAggregator] into another.
|
|
||||||
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
// write the aggregate in a way that will cause a compilation error if a field is added.
|
|
||||||
|
|
||||||
// get ownership of self, replacing it by a default value.
|
|
||||||
let this = *self;
|
|
||||||
|
|
||||||
let total_received = this.total_received.saturating_add(new.total_received);
|
|
||||||
let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
|
|
||||||
let total_distinct_index_count =
|
|
||||||
this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
|
|
||||||
let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
|
|
||||||
let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
|
|
||||||
let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
|
|
||||||
let show_ranking_score_details =
|
|
||||||
this.show_ranking_score_details || new.show_ranking_score_details;
|
|
||||||
let use_federation = this.use_federation || new.use_federation;
|
|
||||||
|
|
||||||
Box::new(Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
total_distinct_index_count,
|
|
||||||
total_single_index,
|
|
||||||
total_search_count,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
use_federation,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
total_distinct_index_count,
|
|
||||||
total_single_index,
|
|
||||||
total_search_count,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
use_federation,
|
|
||||||
} = *self;
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"requests": {
|
|
||||||
"total_succeeded": total_succeeded,
|
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
||||||
"total_received": total_received,
|
|
||||||
},
|
|
||||||
"indexes": {
|
|
||||||
"total_single_index": total_single_index,
|
|
||||||
"total_distinct_index_count": total_distinct_index_count,
|
|
||||||
"avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
|
|
||||||
},
|
|
||||||
"searches": {
|
|
||||||
"total_search_count": total_search_count,
|
|
||||||
"avg_search_count": (total_search_count as f64) / (total_received as f64),
|
|
||||||
},
|
|
||||||
"scoring": {
|
|
||||||
"show_ranking_score": show_ranking_score,
|
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
|
||||||
},
|
|
||||||
"federation": {
|
|
||||||
"use_federation": use_federation,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
aggregate_methods!(
|
|
||||||
SimilarPOST => "Similar POST",
|
|
||||||
SimilarGET => "Similar GET",
|
|
||||||
);
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct SimilarAggregator<Method: AggregateMethod> {
|
|
||||||
// requests
|
|
||||||
total_received: usize,
|
|
||||||
total_succeeded: usize,
|
|
||||||
time_spent: BinaryHeap<usize>,
|
|
||||||
|
|
||||||
// filter
|
|
||||||
filter_with_geo_radius: bool,
|
|
||||||
filter_with_geo_bounding_box: bool,
|
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
|
||||||
filter_sum_of_criteria_terms: usize,
|
|
||||||
// every time a request has a filter, this field must be incremented by one
|
|
||||||
filter_total_number_of_criteria: usize,
|
|
||||||
used_syntax: HashMap<String, usize>,
|
|
||||||
|
|
||||||
// Whether a non-default embedder was specified
|
|
||||||
retrieve_vectors: bool,
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
max_limit: usize,
|
|
||||||
max_offset: usize,
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
max_attributes_to_retrieve: usize,
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
show_ranking_score: bool,
|
|
||||||
show_ranking_score_details: bool,
|
|
||||||
ranking_score_threshold: bool,
|
|
||||||
|
|
||||||
marker: std::marker::PhantomData<Method>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> SimilarAggregator<Method> {
|
|
||||||
#[allow(clippy::field_reassign_with_default)]
|
|
||||||
pub fn from_query(query: &SimilarQuery) -> Self {
|
|
||||||
let SimilarQuery {
|
|
||||||
id: _,
|
|
||||||
embedder: _,
|
|
||||||
offset,
|
|
||||||
limit,
|
|
||||||
attributes_to_retrieve: _,
|
|
||||||
retrieve_vectors,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
filter,
|
|
||||||
ranking_score_threshold,
|
|
||||||
} = query;
|
|
||||||
|
|
||||||
let mut ret = Self::default();
|
|
||||||
|
|
||||||
ret.total_received = 1;
|
|
||||||
|
|
||||||
if let Some(ref filter) = filter {
|
|
||||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
|
||||||
ret.filter_total_number_of_criteria = 1;
|
|
||||||
|
|
||||||
let syntax = match filter {
|
|
||||||
Value::String(_) => "string".to_string(),
|
|
||||||
Value::Array(values) => {
|
|
||||||
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
|
||||||
"mixed".to_string()
|
|
||||||
} else {
|
|
||||||
"array".to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => "none".to_string(),
|
|
||||||
};
|
|
||||||
// convert the string to a HashMap
|
|
||||||
ret.used_syntax.insert(syntax, 1);
|
|
||||||
|
|
||||||
let stringified_filters = filter.to_string();
|
|
||||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
|
||||||
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
|
||||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.max_limit = *limit;
|
|
||||||
ret.max_offset = *offset;
|
|
||||||
|
|
||||||
ret.show_ranking_score = *show_ranking_score;
|
|
||||||
ret.show_ranking_score_details = *show_ranking_score_details;
|
|
||||||
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
|
||||||
|
|
||||||
ret.retrieve_vectors = *retrieve_vectors;
|
|
||||||
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn succeed(&mut self, result: &SimilarResult) {
|
|
||||||
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
|
|
||||||
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
|
||||||
|
|
||||||
self.time_spent.push(*processing_time_ms as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
|
|
||||||
fn event_name(&self) -> &'static str {
|
|
||||||
Method::event_name()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Aggregate one [SimilarAggregator] into another.
|
|
||||||
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
mut time_spent,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
ranking_score_threshold,
|
|
||||||
retrieve_vectors,
|
|
||||||
marker: _,
|
|
||||||
} = *new;
|
|
||||||
|
|
||||||
// request
|
|
||||||
self.total_received = self.total_received.saturating_add(total_received);
|
|
||||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
|
||||||
self.time_spent.append(&mut time_spent);
|
|
||||||
|
|
||||||
// filter
|
|
||||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
|
||||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
|
||||||
self.filter_sum_of_criteria_terms =
|
|
||||||
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
|
||||||
self.filter_total_number_of_criteria =
|
|
||||||
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
|
||||||
for (key, value) in used_syntax.into_iter() {
|
|
||||||
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
|
||||||
*used_syntax = used_syntax.saturating_add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
self.retrieve_vectors |= retrieve_vectors;
|
|
||||||
|
|
||||||
// pagination
|
|
||||||
self.max_limit = self.max_limit.max(max_limit);
|
|
||||||
self.max_offset = self.max_offset.max(max_offset);
|
|
||||||
|
|
||||||
// formatting
|
|
||||||
self.max_attributes_to_retrieve =
|
|
||||||
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
|
||||||
|
|
||||||
// scoring
|
|
||||||
self.show_ranking_score |= show_ranking_score;
|
|
||||||
self.show_ranking_score_details |= show_ranking_score_details;
|
|
||||||
self.ranking_score_threshold |= ranking_score_threshold;
|
|
||||||
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_event(self: Box<Self>) -> serde_json::Value {
|
|
||||||
let Self {
|
|
||||||
total_received,
|
|
||||||
total_succeeded,
|
|
||||||
time_spent,
|
|
||||||
filter_with_geo_radius,
|
|
||||||
filter_with_geo_bounding_box,
|
|
||||||
filter_sum_of_criteria_terms,
|
|
||||||
filter_total_number_of_criteria,
|
|
||||||
used_syntax,
|
|
||||||
max_limit,
|
|
||||||
max_offset,
|
|
||||||
max_attributes_to_retrieve,
|
|
||||||
show_ranking_score,
|
|
||||||
show_ranking_score_details,
|
|
||||||
ranking_score_threshold,
|
|
||||||
retrieve_vectors,
|
|
||||||
marker: _,
|
|
||||||
} = *self;
|
|
||||||
|
|
||||||
// we get all the values in a sorted manner
|
|
||||||
let time_spent = time_spent.into_sorted_vec();
|
|
||||||
// the index of the 99th percentage of value
|
|
||||||
let percentile_99th = time_spent.len() * 99 / 100;
|
|
||||||
// We are only interested by the slowest value of the 99th fastest results
|
|
||||||
let time_spent = time_spent.get(percentile_99th);
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"requests": {
|
|
||||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
|
||||||
"total_succeeded": total_succeeded,
|
|
||||||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
|
||||||
"total_received": total_received,
|
|
||||||
},
|
|
||||||
"filter": {
|
|
||||||
"with_geoRadius": filter_with_geo_radius,
|
|
||||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
|
||||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
|
||||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
|
||||||
},
|
|
||||||
"vector": {
|
|
||||||
"retrieve_vectors": retrieve_vectors,
|
|
||||||
},
|
|
||||||
"pagination": {
|
|
||||||
"max_limit": max_limit,
|
|
||||||
"max_offset": max_offset,
|
|
||||||
},
|
|
||||||
"formatting": {
|
|
||||||
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
|
||||||
},
|
|
||||||
"scoring": {
|
|
||||||
"show_ranking_score": show_ranking_score,
|
|
||||||
"show_ranking_score_details": show_ranking_score_details,
|
|
||||||
"ranking_score_threshold": ranking_score_threshold,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -28,9 +28,11 @@ use crate::Opt;
|
|||||||
pub mod documents;
|
pub mod documents;
|
||||||
pub mod facet_search;
|
pub mod facet_search;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
|
mod search_analytics;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
mod settings_analytics;
|
mod settings_analytics;
|
||||||
pub mod similar;
|
pub mod similar;
|
||||||
|
mod similar_analytics;
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(
|
cfg.service(
|
||||||
|
@ -13,13 +13,13 @@ use meilisearch_types::serde_cs::vec::CS;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::segment_analytics::{SearchGET, SearchPOST};
|
use crate::analytics::Analytics;
|
||||||
use crate::analytics::{Analytics, SearchAggregator};
|
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::*;
|
use crate::extractors::authentication::policies::*;
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||||
|
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||||
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||||
|
485
meilisearch/src/routes/indexes/search_analytics.rs
Normal file
485
meilisearch/src/routes/indexes/search_analytics.rs
Normal file
@ -0,0 +1,485 @@
|
|||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
use std::collections::{BTreeSet, BinaryHeap, HashMap};
|
||||||
|
|
||||||
|
use meilisearch_types::locales::Locale;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
aggregate_methods,
|
||||||
|
analytics::{Aggregate, AggregateMethod},
|
||||||
|
search::{
|
||||||
|
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
|
DEFAULT_SEMANTIC_RATIO,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
aggregate_methods!(
|
||||||
|
SearchGET => "Documents Searched GET",
|
||||||
|
SearchPOST => "Documents Searched POST",
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct SearchAggregator<Method: AggregateMethod> {
|
||||||
|
// requests
|
||||||
|
total_received: usize,
|
||||||
|
total_succeeded: usize,
|
||||||
|
total_degraded: usize,
|
||||||
|
total_used_negative_operator: usize,
|
||||||
|
time_spent: BinaryHeap<usize>,
|
||||||
|
|
||||||
|
// sort
|
||||||
|
sort_with_geo_point: bool,
|
||||||
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||||
|
sort_sum_of_criteria_terms: usize,
|
||||||
|
// every time a request has a filter, this field must be incremented by one
|
||||||
|
sort_total_number_of_criteria: usize,
|
||||||
|
|
||||||
|
// distinct
|
||||||
|
distinct: bool,
|
||||||
|
|
||||||
|
// filter
|
||||||
|
filter_with_geo_radius: bool,
|
||||||
|
filter_with_geo_bounding_box: bool,
|
||||||
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||||
|
filter_sum_of_criteria_terms: usize,
|
||||||
|
// every time a request has a filter, this field must be incremented by one
|
||||||
|
filter_total_number_of_criteria: usize,
|
||||||
|
used_syntax: HashMap<String, usize>,
|
||||||
|
|
||||||
|
// attributes_to_search_on
|
||||||
|
// every time a search is done using attributes_to_search_on
|
||||||
|
attributes_to_search_on_total_number_of_uses: usize,
|
||||||
|
|
||||||
|
// q
|
||||||
|
// The maximum number of terms in a q request
|
||||||
|
max_terms_number: usize,
|
||||||
|
|
||||||
|
// vector
|
||||||
|
// The maximum number of floats in a vector request
|
||||||
|
max_vector_size: usize,
|
||||||
|
// Whether the semantic ratio passed to a hybrid search equals the default ratio.
|
||||||
|
semantic_ratio: bool,
|
||||||
|
hybrid: bool,
|
||||||
|
retrieve_vectors: bool,
|
||||||
|
|
||||||
|
// every time a search is done, we increment the counter linked to the used settings
|
||||||
|
matching_strategy: HashMap<String, usize>,
|
||||||
|
|
||||||
|
// List of the unique Locales passed as parameter
|
||||||
|
locales: BTreeSet<Locale>,
|
||||||
|
|
||||||
|
// pagination
|
||||||
|
max_limit: usize,
|
||||||
|
max_offset: usize,
|
||||||
|
finite_pagination: usize,
|
||||||
|
|
||||||
|
// formatting
|
||||||
|
max_attributes_to_retrieve: usize,
|
||||||
|
max_attributes_to_highlight: usize,
|
||||||
|
highlight_pre_tag: bool,
|
||||||
|
highlight_post_tag: bool,
|
||||||
|
max_attributes_to_crop: usize,
|
||||||
|
crop_marker: bool,
|
||||||
|
show_matches_position: bool,
|
||||||
|
crop_length: bool,
|
||||||
|
|
||||||
|
// facets
|
||||||
|
facets_sum_of_terms: usize,
|
||||||
|
facets_total_number_of_facets: usize,
|
||||||
|
|
||||||
|
// scoring
|
||||||
|
show_ranking_score: bool,
|
||||||
|
show_ranking_score_details: bool,
|
||||||
|
ranking_score_threshold: bool,
|
||||||
|
|
||||||
|
marker: std::marker::PhantomData<Method>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||||
|
#[allow(clippy::field_reassign_with_default)]
|
||||||
|
pub fn from_query(query: &SearchQuery) -> Self {
|
||||||
|
let SearchQuery {
|
||||||
|
q,
|
||||||
|
vector,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
page,
|
||||||
|
hits_per_page,
|
||||||
|
attributes_to_retrieve: _,
|
||||||
|
retrieve_vectors,
|
||||||
|
attributes_to_crop: _,
|
||||||
|
crop_length,
|
||||||
|
attributes_to_highlight: _,
|
||||||
|
show_matches_position,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
filter,
|
||||||
|
sort,
|
||||||
|
distinct,
|
||||||
|
facets: _,
|
||||||
|
highlight_pre_tag,
|
||||||
|
highlight_post_tag,
|
||||||
|
crop_marker,
|
||||||
|
matching_strategy,
|
||||||
|
attributes_to_search_on,
|
||||||
|
hybrid,
|
||||||
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
|
} = query;
|
||||||
|
|
||||||
|
let mut ret = Self::default();
|
||||||
|
|
||||||
|
ret.total_received = 1;
|
||||||
|
|
||||||
|
if let Some(ref sort) = sort {
|
||||||
|
ret.sort_total_number_of_criteria = 1;
|
||||||
|
ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint("));
|
||||||
|
ret.sort_sum_of_criteria_terms = sort.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.distinct = distinct.is_some();
|
||||||
|
|
||||||
|
if let Some(ref filter) = filter {
|
||||||
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
||||||
|
ret.filter_total_number_of_criteria = 1;
|
||||||
|
|
||||||
|
let syntax = match filter {
|
||||||
|
Value::String(_) => "string".to_string(),
|
||||||
|
Value::Array(values) => {
|
||||||
|
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
||||||
|
"mixed".to_string()
|
||||||
|
} else {
|
||||||
|
"array".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => "none".to_string(),
|
||||||
|
};
|
||||||
|
// convert the string to a HashMap
|
||||||
|
ret.used_syntax.insert(syntax, 1);
|
||||||
|
|
||||||
|
let stringified_filters = filter.to_string();
|
||||||
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
||||||
|
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
||||||
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
// attributes_to_search_on
|
||||||
|
if attributes_to_search_on.is_some() {
|
||||||
|
ret.attributes_to_search_on_total_number_of_uses = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref q) = q {
|
||||||
|
ret.max_terms_number = q.split_whitespace().count();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref vector) = vector {
|
||||||
|
ret.max_vector_size = vector.len();
|
||||||
|
}
|
||||||
|
ret.retrieve_vectors |= retrieve_vectors;
|
||||||
|
|
||||||
|
if query.is_finite_pagination() {
|
||||||
|
let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
|
ret.max_limit = limit;
|
||||||
|
ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit;
|
||||||
|
ret.finite_pagination = 1;
|
||||||
|
} else {
|
||||||
|
ret.max_limit = *limit;
|
||||||
|
ret.max_offset = *offset;
|
||||||
|
ret.finite_pagination = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
|
||||||
|
|
||||||
|
if let Some(locales) = locales {
|
||||||
|
ret.locales = locales.iter().copied().collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||||
|
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||||
|
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
||||||
|
ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH();
|
||||||
|
ret.show_matches_position = *show_matches_position;
|
||||||
|
|
||||||
|
ret.show_ranking_score = *show_ranking_score;
|
||||||
|
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||||
|
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||||
|
|
||||||
|
if let Some(hybrid) = hybrid {
|
||||||
|
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
|
||||||
|
ret.hybrid = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, result: &SearchResult) {
|
||||||
|
let SearchResult {
|
||||||
|
hits: _,
|
||||||
|
query: _,
|
||||||
|
processing_time_ms,
|
||||||
|
hits_info: _,
|
||||||
|
semantic_hit_count: _,
|
||||||
|
facet_distribution: _,
|
||||||
|
facet_stats: _,
|
||||||
|
degraded,
|
||||||
|
used_negative_operator,
|
||||||
|
} = result;
|
||||||
|
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||||
|
if *degraded {
|
||||||
|
self.total_degraded = self.total_degraded.saturating_add(1);
|
||||||
|
}
|
||||||
|
if *used_negative_operator {
|
||||||
|
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
|
||||||
|
}
|
||||||
|
self.time_spent.push(*processing_time_ms as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||||
|
fn event_name(&self) -> &'static str {
|
||||||
|
Method::event_name()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
||||||
|
let Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
mut time_spent,
|
||||||
|
sort_with_geo_point,
|
||||||
|
sort_sum_of_criteria_terms,
|
||||||
|
sort_total_number_of_criteria,
|
||||||
|
distinct,
|
||||||
|
filter_with_geo_radius,
|
||||||
|
filter_with_geo_bounding_box,
|
||||||
|
filter_sum_of_criteria_terms,
|
||||||
|
filter_total_number_of_criteria,
|
||||||
|
used_syntax,
|
||||||
|
attributes_to_search_on_total_number_of_uses,
|
||||||
|
max_terms_number,
|
||||||
|
max_vector_size,
|
||||||
|
retrieve_vectors,
|
||||||
|
matching_strategy,
|
||||||
|
max_limit,
|
||||||
|
max_offset,
|
||||||
|
finite_pagination,
|
||||||
|
max_attributes_to_retrieve,
|
||||||
|
max_attributes_to_highlight,
|
||||||
|
highlight_pre_tag,
|
||||||
|
highlight_post_tag,
|
||||||
|
max_attributes_to_crop,
|
||||||
|
crop_marker,
|
||||||
|
show_matches_position,
|
||||||
|
crop_length,
|
||||||
|
facets_sum_of_terms,
|
||||||
|
facets_total_number_of_facets,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
semantic_ratio,
|
||||||
|
hybrid,
|
||||||
|
total_degraded,
|
||||||
|
total_used_negative_operator,
|
||||||
|
ranking_score_threshold,
|
||||||
|
mut locales,
|
||||||
|
marker: _,
|
||||||
|
} = *new;
|
||||||
|
|
||||||
|
// request
|
||||||
|
self.total_received = self.total_received.saturating_add(total_received);
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||||
|
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
|
||||||
|
self.total_used_negative_operator =
|
||||||
|
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
|
||||||
|
self.time_spent.append(&mut time_spent);
|
||||||
|
|
||||||
|
// sort
|
||||||
|
self.sort_with_geo_point |= sort_with_geo_point;
|
||||||
|
self.sort_sum_of_criteria_terms =
|
||||||
|
self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms);
|
||||||
|
self.sort_total_number_of_criteria =
|
||||||
|
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
|
||||||
|
|
||||||
|
// distinct
|
||||||
|
self.distinct |= distinct;
|
||||||
|
|
||||||
|
// filter
|
||||||
|
self.filter_with_geo_radius |= filter_with_geo_radius;
|
||||||
|
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
||||||
|
self.filter_sum_of_criteria_terms =
|
||||||
|
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
||||||
|
self.filter_total_number_of_criteria =
|
||||||
|
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
||||||
|
for (key, value) in used_syntax.into_iter() {
|
||||||
|
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
||||||
|
*used_syntax = used_syntax.saturating_add(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// attributes_to_search_on
|
||||||
|
self.attributes_to_search_on_total_number_of_uses = self
|
||||||
|
.attributes_to_search_on_total_number_of_uses
|
||||||
|
.saturating_add(attributes_to_search_on_total_number_of_uses);
|
||||||
|
|
||||||
|
// q
|
||||||
|
self.max_terms_number = self.max_terms_number.max(max_terms_number);
|
||||||
|
|
||||||
|
// vector
|
||||||
|
self.max_vector_size = self.max_vector_size.max(max_vector_size);
|
||||||
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
|
self.semantic_ratio |= semantic_ratio;
|
||||||
|
self.hybrid |= hybrid;
|
||||||
|
|
||||||
|
// pagination
|
||||||
|
self.max_limit = self.max_limit.max(max_limit);
|
||||||
|
self.max_offset = self.max_offset.max(max_offset);
|
||||||
|
self.finite_pagination += finite_pagination;
|
||||||
|
|
||||||
|
// formatting
|
||||||
|
self.max_attributes_to_retrieve =
|
||||||
|
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
||||||
|
self.max_attributes_to_highlight =
|
||||||
|
self.max_attributes_to_highlight.max(max_attributes_to_highlight);
|
||||||
|
self.highlight_pre_tag |= highlight_pre_tag;
|
||||||
|
self.highlight_post_tag |= highlight_post_tag;
|
||||||
|
self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop);
|
||||||
|
self.crop_marker |= crop_marker;
|
||||||
|
self.show_matches_position |= show_matches_position;
|
||||||
|
self.crop_length |= crop_length;
|
||||||
|
|
||||||
|
// facets
|
||||||
|
self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms);
|
||||||
|
self.facets_total_number_of_facets =
|
||||||
|
self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets);
|
||||||
|
|
||||||
|
// matching strategy
|
||||||
|
for (key, value) in matching_strategy.into_iter() {
|
||||||
|
let matching_strategy = self.matching_strategy.entry(key).or_insert(0);
|
||||||
|
*matching_strategy = matching_strategy.saturating_add(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// scoring
|
||||||
|
self.show_ranking_score |= show_ranking_score;
|
||||||
|
self.show_ranking_score_details |= show_ranking_score_details;
|
||||||
|
self.ranking_score_threshold |= ranking_score_threshold;
|
||||||
|
|
||||||
|
// locales
|
||||||
|
self.locales.append(&mut locales);
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_event(self: Box<Self>) -> serde_json::Value {
|
||||||
|
let Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
time_spent,
|
||||||
|
sort_with_geo_point,
|
||||||
|
sort_sum_of_criteria_terms,
|
||||||
|
sort_total_number_of_criteria,
|
||||||
|
distinct,
|
||||||
|
filter_with_geo_radius,
|
||||||
|
filter_with_geo_bounding_box,
|
||||||
|
filter_sum_of_criteria_terms,
|
||||||
|
filter_total_number_of_criteria,
|
||||||
|
used_syntax,
|
||||||
|
attributes_to_search_on_total_number_of_uses,
|
||||||
|
max_terms_number,
|
||||||
|
max_vector_size,
|
||||||
|
retrieve_vectors,
|
||||||
|
matching_strategy,
|
||||||
|
max_limit,
|
||||||
|
max_offset,
|
||||||
|
finite_pagination,
|
||||||
|
max_attributes_to_retrieve,
|
||||||
|
max_attributes_to_highlight,
|
||||||
|
highlight_pre_tag,
|
||||||
|
highlight_post_tag,
|
||||||
|
max_attributes_to_crop,
|
||||||
|
crop_marker,
|
||||||
|
show_matches_position,
|
||||||
|
crop_length,
|
||||||
|
facets_sum_of_terms,
|
||||||
|
facets_total_number_of_facets,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
semantic_ratio,
|
||||||
|
hybrid,
|
||||||
|
total_degraded,
|
||||||
|
total_used_negative_operator,
|
||||||
|
ranking_score_threshold,
|
||||||
|
locales,
|
||||||
|
marker: _,
|
||||||
|
} = *self;
|
||||||
|
|
||||||
|
// we get all the values in a sorted manner
|
||||||
|
let time_spent = time_spent.into_sorted_vec();
|
||||||
|
// the index of the 99th percentage of value
|
||||||
|
let percentile_99th = time_spent.len() * 99 / 100;
|
||||||
|
// We are only interested by the slowest value of the 99th fastest results
|
||||||
|
let time_spent = time_spent.get(percentile_99th);
|
||||||
|
|
||||||
|
json!({
|
||||||
|
"requests": {
|
||||||
|
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||||
|
"total_succeeded": total_succeeded,
|
||||||
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||||
|
"total_received": total_received,
|
||||||
|
"total_degraded": total_degraded,
|
||||||
|
"total_used_negative_operator": total_used_negative_operator,
|
||||||
|
},
|
||||||
|
"sort": {
|
||||||
|
"with_geoPoint": sort_with_geo_point,
|
||||||
|
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
|
||||||
|
},
|
||||||
|
"distinct": distinct,
|
||||||
|
"filter": {
|
||||||
|
"with_geoRadius": filter_with_geo_radius,
|
||||||
|
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
||||||
|
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
||||||
|
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
|
},
|
||||||
|
"attributes_to_search_on": {
|
||||||
|
"total_number_of_uses": attributes_to_search_on_total_number_of_uses,
|
||||||
|
},
|
||||||
|
"q": {
|
||||||
|
"max_terms_number": max_terms_number,
|
||||||
|
},
|
||||||
|
"vector": {
|
||||||
|
"max_vector_size": max_vector_size,
|
||||||
|
"retrieve_vectors": retrieve_vectors,
|
||||||
|
},
|
||||||
|
"hybrid": {
|
||||||
|
"enabled": hybrid,
|
||||||
|
"semantic_ratio": semantic_ratio,
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"max_limit": max_limit,
|
||||||
|
"max_offset": max_offset,
|
||||||
|
"most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
|
||||||
|
},
|
||||||
|
"formatting": {
|
||||||
|
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
||||||
|
"max_attributes_to_highlight": max_attributes_to_highlight,
|
||||||
|
"highlight_pre_tag": highlight_pre_tag,
|
||||||
|
"highlight_post_tag": highlight_post_tag,
|
||||||
|
"max_attributes_to_crop": max_attributes_to_crop,
|
||||||
|
"crop_marker": crop_marker,
|
||||||
|
"show_matches_position": show_matches_position,
|
||||||
|
"crop_length": crop_length,
|
||||||
|
},
|
||||||
|
"facets": {
|
||||||
|
"avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
|
||||||
|
},
|
||||||
|
"matching_strategy": {
|
||||||
|
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
|
},
|
||||||
|
"locales": locales,
|
||||||
|
"scoring": {
|
||||||
|
"show_ranking_score": show_ranking_score,
|
||||||
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
|
"ranking_score_threshold": ranking_score_threshold,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -13,10 +13,10 @@ use serde_json::Value;
|
|||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use super::ActionPolicy;
|
use super::ActionPolicy;
|
||||||
use crate::analytics::segment_analytics::{SimilarGET, SimilarPOST};
|
use crate::analytics::Analytics;
|
||||||
use crate::analytics::{Analytics, SimilarAggregator};
|
|
||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
||||||
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
|
235
meilisearch/src/routes/indexes/similar_analytics.rs
Normal file
235
meilisearch/src/routes/indexes/similar_analytics.rs
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
use std::collections::{BinaryHeap, HashMap};
|
||||||
|
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
aggregate_methods,
|
||||||
|
analytics::{Aggregate, AggregateMethod},
|
||||||
|
search::{SimilarQuery, SimilarResult},
|
||||||
|
};
|
||||||
|
|
||||||
|
aggregate_methods!(
|
||||||
|
SimilarPOST => "Similar POST",
|
||||||
|
SimilarGET => "Similar GET",
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct SimilarAggregator<Method: AggregateMethod> {
|
||||||
|
// requests
|
||||||
|
total_received: usize,
|
||||||
|
total_succeeded: usize,
|
||||||
|
time_spent: BinaryHeap<usize>,
|
||||||
|
|
||||||
|
// filter
|
||||||
|
filter_with_geo_radius: bool,
|
||||||
|
filter_with_geo_bounding_box: bool,
|
||||||
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||||
|
filter_sum_of_criteria_terms: usize,
|
||||||
|
// every time a request has a filter, this field must be incremented by one
|
||||||
|
filter_total_number_of_criteria: usize,
|
||||||
|
used_syntax: HashMap<String, usize>,
|
||||||
|
|
||||||
|
// Whether a non-default embedder was specified
|
||||||
|
retrieve_vectors: bool,
|
||||||
|
|
||||||
|
// pagination
|
||||||
|
max_limit: usize,
|
||||||
|
max_offset: usize,
|
||||||
|
|
||||||
|
// formatting
|
||||||
|
max_attributes_to_retrieve: usize,
|
||||||
|
|
||||||
|
// scoring
|
||||||
|
show_ranking_score: bool,
|
||||||
|
show_ranking_score_details: bool,
|
||||||
|
ranking_score_threshold: bool,
|
||||||
|
|
||||||
|
marker: std::marker::PhantomData<Method>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Method: AggregateMethod> SimilarAggregator<Method> {
|
||||||
|
#[allow(clippy::field_reassign_with_default)]
|
||||||
|
pub fn from_query(query: &SimilarQuery) -> Self {
|
||||||
|
let SimilarQuery {
|
||||||
|
id: _,
|
||||||
|
embedder: _,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
attributes_to_retrieve: _,
|
||||||
|
retrieve_vectors,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
filter,
|
||||||
|
ranking_score_threshold,
|
||||||
|
} = query;
|
||||||
|
|
||||||
|
let mut ret = Self::default();
|
||||||
|
|
||||||
|
ret.total_received = 1;
|
||||||
|
|
||||||
|
if let Some(ref filter) = filter {
|
||||||
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
||||||
|
ret.filter_total_number_of_criteria = 1;
|
||||||
|
|
||||||
|
let syntax = match filter {
|
||||||
|
Value::String(_) => "string".to_string(),
|
||||||
|
Value::Array(values) => {
|
||||||
|
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
||||||
|
"mixed".to_string()
|
||||||
|
} else {
|
||||||
|
"array".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => "none".to_string(),
|
||||||
|
};
|
||||||
|
// convert the string to a HashMap
|
||||||
|
ret.used_syntax.insert(syntax, 1);
|
||||||
|
|
||||||
|
let stringified_filters = filter.to_string();
|
||||||
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
||||||
|
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
||||||
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.max_limit = *limit;
|
||||||
|
ret.max_offset = *offset;
|
||||||
|
|
||||||
|
ret.show_ranking_score = *show_ranking_score;
|
||||||
|
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||||
|
ret.ranking_score_threshold = ranking_score_threshold.is_some();
|
||||||
|
|
||||||
|
ret.retrieve_vectors = *retrieve_vectors;
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, result: &SimilarResult) {
|
||||||
|
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
|
||||||
|
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||||
|
|
||||||
|
self.time_spent.push(*processing_time_ms as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Method: AggregateMethod> Aggregate for SimilarAggregator<Method> {
|
||||||
|
fn event_name(&self) -> &'static str {
|
||||||
|
Method::event_name()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregate one [SimilarAggregator] into another.
|
||||||
|
fn aggregate(mut self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
||||||
|
let Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
mut time_spent,
|
||||||
|
filter_with_geo_radius,
|
||||||
|
filter_with_geo_bounding_box,
|
||||||
|
filter_sum_of_criteria_terms,
|
||||||
|
filter_total_number_of_criteria,
|
||||||
|
used_syntax,
|
||||||
|
max_limit,
|
||||||
|
max_offset,
|
||||||
|
max_attributes_to_retrieve,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
ranking_score_threshold,
|
||||||
|
retrieve_vectors,
|
||||||
|
marker: _,
|
||||||
|
} = *new;
|
||||||
|
|
||||||
|
// request
|
||||||
|
self.total_received = self.total_received.saturating_add(total_received);
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||||
|
self.time_spent.append(&mut time_spent);
|
||||||
|
|
||||||
|
// filter
|
||||||
|
self.filter_with_geo_radius |= filter_with_geo_radius;
|
||||||
|
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
||||||
|
self.filter_sum_of_criteria_terms =
|
||||||
|
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
||||||
|
self.filter_total_number_of_criteria =
|
||||||
|
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
||||||
|
for (key, value) in used_syntax.into_iter() {
|
||||||
|
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
||||||
|
*used_syntax = used_syntax.saturating_add(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.retrieve_vectors |= retrieve_vectors;
|
||||||
|
|
||||||
|
// pagination
|
||||||
|
self.max_limit = self.max_limit.max(max_limit);
|
||||||
|
self.max_offset = self.max_offset.max(max_offset);
|
||||||
|
|
||||||
|
// formatting
|
||||||
|
self.max_attributes_to_retrieve =
|
||||||
|
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
||||||
|
|
||||||
|
// scoring
|
||||||
|
self.show_ranking_score |= show_ranking_score;
|
||||||
|
self.show_ranking_score_details |= show_ranking_score_details;
|
||||||
|
self.ranking_score_threshold |= ranking_score_threshold;
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_event(self: Box<Self>) -> serde_json::Value {
|
||||||
|
let Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
time_spent,
|
||||||
|
filter_with_geo_radius,
|
||||||
|
filter_with_geo_bounding_box,
|
||||||
|
filter_sum_of_criteria_terms,
|
||||||
|
filter_total_number_of_criteria,
|
||||||
|
used_syntax,
|
||||||
|
max_limit,
|
||||||
|
max_offset,
|
||||||
|
max_attributes_to_retrieve,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
ranking_score_threshold,
|
||||||
|
retrieve_vectors,
|
||||||
|
marker: _,
|
||||||
|
} = *self;
|
||||||
|
|
||||||
|
// we get all the values in a sorted manner
|
||||||
|
let time_spent = time_spent.into_sorted_vec();
|
||||||
|
// the index of the 99th percentage of value
|
||||||
|
let percentile_99th = time_spent.len() * 99 / 100;
|
||||||
|
// We are only interested by the slowest value of the 99th fastest results
|
||||||
|
let time_spent = time_spent.get(percentile_99th);
|
||||||
|
|
||||||
|
json!({
|
||||||
|
"requests": {
|
||||||
|
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||||
|
"total_succeeded": total_succeeded,
|
||||||
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||||
|
"total_received": total_received,
|
||||||
|
},
|
||||||
|
"filter": {
|
||||||
|
"with_geoRadius": filter_with_geo_radius,
|
||||||
|
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
||||||
|
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
||||||
|
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
|
},
|
||||||
|
"vector": {
|
||||||
|
"retrieve_vectors": retrieve_vectors,
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"max_limit": max_limit,
|
||||||
|
"max_offset": max_offset,
|
||||||
|
},
|
||||||
|
"formatting": {
|
||||||
|
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
||||||
|
},
|
||||||
|
"scoring": {
|
||||||
|
"show_ranking_score": show_ranking_score,
|
||||||
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
|
"ranking_score_threshold": ranking_score_threshold,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -25,6 +25,7 @@ pub mod indexes;
|
|||||||
mod logs;
|
mod logs;
|
||||||
mod metrics;
|
mod metrics;
|
||||||
mod multi_search;
|
mod multi_search;
|
||||||
|
mod multi_search_analytics;
|
||||||
mod snapshot;
|
mod snapshot;
|
||||||
mod swap_indexes;
|
mod swap_indexes;
|
||||||
pub mod tasks;
|
pub mod tasks;
|
||||||
|
@ -9,7 +9,7 @@ use meilisearch_types::keys::actions;
|
|||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::analytics::{Analytics, MultiSearchAggregator};
|
use crate::analytics::Analytics;
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||||
@ -21,6 +21,8 @@ use crate::search::{
|
|||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
|
use super::multi_search_analytics::MultiSearchAggregator;
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
|
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
|
||||||
}
|
}
|
||||||
|
170
meilisearch/src/routes/multi_search_analytics.rs
Normal file
170
meilisearch/src/routes/multi_search_analytics.rs
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
analytics::Aggregate,
|
||||||
|
search::{FederatedSearch, SearchQueryWithIndex},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct MultiSearchAggregator {
|
||||||
|
// requests
|
||||||
|
total_received: usize,
|
||||||
|
total_succeeded: usize,
|
||||||
|
|
||||||
|
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
|
||||||
|
total_distinct_index_count: usize,
|
||||||
|
// number of queries with a single index, use with total_received to compute a proportion
|
||||||
|
total_single_index: usize,
|
||||||
|
|
||||||
|
// sum of the number of search queries in the requests, use with total_received to compute an average
|
||||||
|
total_search_count: usize,
|
||||||
|
|
||||||
|
// scoring
|
||||||
|
show_ranking_score: bool,
|
||||||
|
show_ranking_score_details: bool,
|
||||||
|
|
||||||
|
// federation
|
||||||
|
use_federation: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MultiSearchAggregator {
|
||||||
|
pub fn from_federated_search(federated_search: &FederatedSearch) -> Self {
|
||||||
|
let use_federation = federated_search.federation.is_some();
|
||||||
|
|
||||||
|
let distinct_indexes: HashSet<_> = federated_search
|
||||||
|
.queries
|
||||||
|
.iter()
|
||||||
|
.map(|query| {
|
||||||
|
let query = &query;
|
||||||
|
// make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex
|
||||||
|
let SearchQueryWithIndex {
|
||||||
|
index_uid,
|
||||||
|
federation_options: _,
|
||||||
|
q: _,
|
||||||
|
vector: _,
|
||||||
|
offset: _,
|
||||||
|
limit: _,
|
||||||
|
page: _,
|
||||||
|
hits_per_page: _,
|
||||||
|
attributes_to_retrieve: _,
|
||||||
|
retrieve_vectors: _,
|
||||||
|
attributes_to_crop: _,
|
||||||
|
crop_length: _,
|
||||||
|
attributes_to_highlight: _,
|
||||||
|
show_ranking_score: _,
|
||||||
|
show_ranking_score_details: _,
|
||||||
|
show_matches_position: _,
|
||||||
|
filter: _,
|
||||||
|
sort: _,
|
||||||
|
distinct: _,
|
||||||
|
facets: _,
|
||||||
|
highlight_pre_tag: _,
|
||||||
|
highlight_post_tag: _,
|
||||||
|
crop_marker: _,
|
||||||
|
matching_strategy: _,
|
||||||
|
attributes_to_search_on: _,
|
||||||
|
hybrid: _,
|
||||||
|
ranking_score_threshold: _,
|
||||||
|
locales: _,
|
||||||
|
} = query;
|
||||||
|
|
||||||
|
index_uid.as_str()
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let show_ranking_score =
|
||||||
|
federated_search.queries.iter().any(|query| query.show_ranking_score);
|
||||||
|
let show_ranking_score_details =
|
||||||
|
federated_search.queries.iter().any(|query| query.show_ranking_score_details);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
total_received: 1,
|
||||||
|
total_succeeded: 0,
|
||||||
|
total_distinct_index_count: distinct_indexes.len(),
|
||||||
|
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
|
||||||
|
total_search_count: federated_search.queries.len(),
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
use_federation,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self) {
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Aggregate for MultiSearchAggregator {
|
||||||
|
fn event_name(&self) -> &'static str {
|
||||||
|
"Documents Searched by Multi-Search POST"
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregate one [MultiSearchAggregator] into another.
|
||||||
|
fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> {
|
||||||
|
// write the aggregate in a way that will cause a compilation error if a field is added.
|
||||||
|
|
||||||
|
// get ownership of self, replacing it by a default value.
|
||||||
|
let this = *self;
|
||||||
|
|
||||||
|
let total_received = this.total_received.saturating_add(new.total_received);
|
||||||
|
let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded);
|
||||||
|
let total_distinct_index_count =
|
||||||
|
this.total_distinct_index_count.saturating_add(new.total_distinct_index_count);
|
||||||
|
let total_single_index = this.total_single_index.saturating_add(new.total_single_index);
|
||||||
|
let total_search_count = this.total_search_count.saturating_add(new.total_search_count);
|
||||||
|
let show_ranking_score = this.show_ranking_score || new.show_ranking_score;
|
||||||
|
let show_ranking_score_details =
|
||||||
|
this.show_ranking_score_details || new.show_ranking_score_details;
|
||||||
|
let use_federation = this.use_federation || new.use_federation;
|
||||||
|
|
||||||
|
Box::new(Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
total_distinct_index_count,
|
||||||
|
total_single_index,
|
||||||
|
total_search_count,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
use_federation,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_event(self: Box<Self>) -> serde_json::Value {
|
||||||
|
let Self {
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
total_distinct_index_count,
|
||||||
|
total_single_index,
|
||||||
|
total_search_count,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
use_federation,
|
||||||
|
} = *self;
|
||||||
|
|
||||||
|
json!({
|
||||||
|
"requests": {
|
||||||
|
"total_succeeded": total_succeeded,
|
||||||
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||||
|
"total_received": total_received,
|
||||||
|
},
|
||||||
|
"indexes": {
|
||||||
|
"total_single_index": total_single_index,
|
||||||
|
"total_distinct_index_count": total_distinct_index_count,
|
||||||
|
"avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early
|
||||||
|
},
|
||||||
|
"searches": {
|
||||||
|
"total_search_count": total_search_count,
|
||||||
|
"avg_search_count": (total_search_count as f64) / (total_received as f64),
|
||||||
|
},
|
||||||
|
"scoring": {
|
||||||
|
"show_ranking_score": show_ranking_score,
|
||||||
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
|
},
|
||||||
|
"federation": {
|
||||||
|
"use_federation": use_federation,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user