mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Merge #3789
3789: Improve the metrics r=dureuill a=irevoire # Pull Request ## Related issue Implements https://github.com/meilisearch/meilisearch/issues/3790 Associated specification: https://github.com/meilisearch/specifications/pull/242 ## Be cautious; it's DB-breaking 😱 While reviewing and after merging this PR, be cautious; if you already have a `data.ms` and run meilisearch with this code on it, it won't work because we need to cache a new information on the index stats (that are backed up on disk). You'll get internal errors. ### About the breaking-change label We only break the API of the metrics route, which does not pose any problem since it's experimental. ## What does this PR do? - Create a method to get the « facet distribution » of the task queue. - Prefix all the metrics by `meilisearch_` - Add the real database size used by meilisearch - Add metrics on the task queue - Update the grafana dashboard to these new changes - Move the dashboard to the `assets` directory - Provide a new prometheus file to scrape meilisearch easily Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
d96d8bb0dd
1376
assets/grafana-dashboard.json
Normal file
1376
assets/grafana-dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
19
assets/prometheus-basic-scraper.yml
Normal file
19
assets/prometheus-basic-scraper.yml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
|
||||||
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
external_labels:
|
||||||
|
monitor: 'codelab-monitor'
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: 'meilisearch'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:7700']
|
File diff suppressed because it is too large
Load Diff
@ -88,8 +88,17 @@ pub enum IndexStatus {
|
|||||||
pub struct IndexStats {
|
pub struct IndexStats {
|
||||||
/// Number of documents in the index.
|
/// Number of documents in the index.
|
||||||
pub number_of_documents: u64,
|
pub number_of_documents: u64,
|
||||||
/// Size of the index' DB, in bytes.
|
/// Size taken up by the index' DB, in bytes.
|
||||||
|
///
|
||||||
|
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||||
|
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||||
|
/// `used_database_size` that only includes the size of the used pages.
|
||||||
pub database_size: u64,
|
pub database_size: u64,
|
||||||
|
/// Size taken by the used pages of the index' DB, in bytes.
|
||||||
|
///
|
||||||
|
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||||
|
/// this value is typically smaller than `database_size`.
|
||||||
|
pub used_database_size: u64,
|
||||||
/// Association of every field name with the number of times it occurs in the documents.
|
/// Association of every field name with the number of times it occurs in the documents.
|
||||||
pub field_distribution: FieldDistribution,
|
pub field_distribution: FieldDistribution,
|
||||||
/// Creation date of the index.
|
/// Creation date of the index.
|
||||||
@ -105,10 +114,10 @@ impl IndexStats {
|
|||||||
///
|
///
|
||||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||||
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||||
let database_size = index.on_disk_size()?;
|
|
||||||
Ok(IndexStats {
|
Ok(IndexStats {
|
||||||
number_of_documents: index.number_of_documents(rtxn)?,
|
number_of_documents: index.number_of_documents(rtxn)?,
|
||||||
database_size,
|
database_size: index.on_disk_size()?,
|
||||||
|
used_database_size: index.used_size()?,
|
||||||
field_distribution: index.field_distribution(rtxn)?,
|
field_distribution: index.field_distribution(rtxn)?,
|
||||||
created_at: index.created_at(rtxn)?,
|
created_at: index.created_at(rtxn)?,
|
||||||
updated_at: index.updated_at(rtxn)?,
|
updated_at: index.updated_at(rtxn)?,
|
||||||
|
@ -31,7 +31,7 @@ mod uuid_codec;
|
|||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
pub type TaskId = u32;
|
pub type TaskId = u32;
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
@ -554,10 +554,16 @@ impl IndexScheduler {
|
|||||||
&self.index_mapper.indexer_config
|
&self.index_mapper.indexer_config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the real database size (i.e.: The size **with** the free pages)
|
||||||
pub fn size(&self) -> Result<u64> {
|
pub fn size(&self) -> Result<u64> {
|
||||||
Ok(self.env.real_disk_size()?)
|
Ok(self.env.real_disk_size()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the used database size (i.e.: The size **without** the free pages)
|
||||||
|
pub fn used_size(&self) -> Result<u64> {
|
||||||
|
Ok(self.env.non_free_pages_size()?)
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the index corresponding to the name.
|
/// Return the index corresponding to the name.
|
||||||
///
|
///
|
||||||
/// * If the index wasn't opened before, the index will be opened.
|
/// * If the index wasn't opened before, the index will be opened.
|
||||||
@ -737,6 +743,38 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The returned structure contains:
|
||||||
|
/// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`.
|
||||||
|
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
|
||||||
|
/// 3. The number of times the properties appeared.
|
||||||
|
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||||
|
let rtxn = self.read_txn()?;
|
||||||
|
|
||||||
|
let mut res = BTreeMap::new();
|
||||||
|
|
||||||
|
res.insert(
|
||||||
|
"statuses".to_string(),
|
||||||
|
enum_iterator::all::<Status>()
|
||||||
|
.map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len())))
|
||||||
|
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||||
|
);
|
||||||
|
res.insert(
|
||||||
|
"types".to_string(),
|
||||||
|
enum_iterator::all::<Kind>()
|
||||||
|
.map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len())))
|
||||||
|
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||||
|
);
|
||||||
|
res.insert(
|
||||||
|
"indexes".to_string(),
|
||||||
|
self.index_tasks
|
||||||
|
.iter(&rtxn)?
|
||||||
|
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
|
||||||
|
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(res)
|
||||||
|
}
|
||||||
|
|
||||||
/// Return true iff there is at least one task associated with this index
|
/// Return true iff there is at least one task associated with this index
|
||||||
/// that is processing.
|
/// that is processing.
|
||||||
pub fn is_index_processing(&self, index: &str) -> Result<bool> {
|
pub fn is_index_processing(&self, index: &str) -> Result<bool> {
|
||||||
|
@ -45,6 +45,11 @@ impl AuthController {
|
|||||||
self.store.size()
|
self.store.size()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the used size of the `AuthController` database in bytes.
|
||||||
|
pub fn used_size(&self) -> Result<u64> {
|
||||||
|
self.store.used_size()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
|
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
|
||||||
match self.store.get_api_key(create_key.uid)? {
|
match self.store.get_api_key(create_key.uid)? {
|
||||||
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
|
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
|
||||||
|
@ -73,6 +73,11 @@ impl HeedAuthStore {
|
|||||||
Ok(self.env.real_disk_size()?)
|
Ok(self.env.real_disk_size()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the number of bytes actually used in the database
|
||||||
|
pub fn used_size(&self) -> Result<u64> {
|
||||||
|
Ok(self.env.non_free_pages_size()?)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn set_drop_on_close(&mut self, v: bool) {
|
pub fn set_drop_on_close(&mut self, v: bool) {
|
||||||
self.should_close_on_drop = v;
|
self.should_close_on_drop = v;
|
||||||
}
|
}
|
||||||
|
@ -4,19 +4,31 @@ use prometheus::{
|
|||||||
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
|
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
|
||||||
};
|
};
|
||||||
|
|
||||||
const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[
|
/// Create evenly distributed buckets
|
||||||
0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015,
|
fn create_buckets() -> [f64; 29] {
|
||||||
0.002, 0.003, 1.0,
|
(0..10)
|
||||||
];
|
.chain((10..100).step_by(10))
|
||||||
|
.chain((100..=1000).step_by(100))
|
||||||
|
.map(|i| i as f64 / 1000.)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.try_into()
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets();
|
||||||
opts!("http_requests_total", "HTTP requests total"),
|
pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||||
|
opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"),
|
||||||
&["method", "path"]
|
&["method", "path"]
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
|
||||||
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes"))
|
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
|
||||||
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!(
|
||||||
|
"meilisearch_used_db_size_bytes",
|
||||||
|
"Meilisearch Used DB Size In Bytes"
|
||||||
|
))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
|
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
|
||||||
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
|
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
|
||||||
@ -26,11 +38,16 @@ lazy_static! {
|
|||||||
&["index"]
|
&["index"]
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
|
pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
|
||||||
"http_response_time_seconds",
|
"http_response_time_seconds",
|
||||||
"HTTP response times",
|
"HTTP response times",
|
||||||
&["method", "path"],
|
&["method", "path"],
|
||||||
HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
|
HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!(
|
||||||
|
opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"),
|
||||||
|
&["kind", "value"]
|
||||||
|
)
|
||||||
|
.expect("Can't create a metric");
|
||||||
}
|
}
|
||||||
|
@ -52,11 +52,11 @@ where
|
|||||||
if is_registered_resource {
|
if is_registered_resource {
|
||||||
let request_method = req.method().to_string();
|
let request_method = req.method().to_string();
|
||||||
histogram_timer = Some(
|
histogram_timer = Some(
|
||||||
crate::metrics::HTTP_RESPONSE_TIME_SECONDS
|
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
|
||||||
.with_label_values(&[&request_method, request_path])
|
.with_label_values(&[&request_method, request_path])
|
||||||
.start_timer(),
|
.start_timer(),
|
||||||
);
|
);
|
||||||
crate::metrics::HTTP_REQUESTS_TOTAL
|
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
|
||||||
.with_label_values(&[&request_method, request_path])
|
.with_label_values(&[&request_method, request_path])
|
||||||
.inc();
|
.inc();
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
|||||||
|
|
||||||
pub async fn get_metrics(
|
pub async fn get_metrics(
|
||||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
|
auth_controller: Data<AuthController>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
let auth_filters = index_scheduler.filters();
|
let auth_filters = index_scheduler.filters();
|
||||||
if !auth_filters.all_indexes_authorized() {
|
if !auth_filters.all_indexes_authorized() {
|
||||||
@ -28,10 +28,10 @@ pub async fn get_metrics(
|
|||||||
return Err(error);
|
return Err(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
let response =
|
let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?;
|
||||||
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?;
|
|
||||||
|
|
||||||
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
|
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
|
||||||
|
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
|
||||||
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
|
||||||
|
|
||||||
for (index, value) in response.indexes.iter() {
|
for (index, value) in response.indexes.iter() {
|
||||||
@ -40,6 +40,14 @@ pub async fn get_metrics(
|
|||||||
.set(value.number_of_documents as i64);
|
.set(value.number_of_documents as i64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (kind, value) in index_scheduler.get_stats()? {
|
||||||
|
for (value, count) in value {
|
||||||
|
crate::metrics::MEILISEARCH_NB_TASKS
|
||||||
|
.with_label_values(&[&kind, &value])
|
||||||
|
.set(count as i64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let encoder = TextEncoder::new();
|
let encoder = TextEncoder::new();
|
||||||
let mut buffer = vec![];
|
let mut buffer = vec![];
|
||||||
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
|
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
|
||||||
|
@ -231,6 +231,8 @@ pub async fn running() -> HttpResponse {
|
|||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct Stats {
|
pub struct Stats {
|
||||||
pub database_size: u64,
|
pub database_size: u64,
|
||||||
|
#[serde(skip)]
|
||||||
|
pub used_database_size: u64,
|
||||||
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
|
||||||
pub last_update: Option<OffsetDateTime>,
|
pub last_update: Option<OffsetDateTime>,
|
||||||
pub indexes: BTreeMap<String, indexes::IndexStats>,
|
pub indexes: BTreeMap<String, indexes::IndexStats>,
|
||||||
@ -259,6 +261,7 @@ pub fn create_all_stats(
|
|||||||
let mut last_task: Option<OffsetDateTime> = None;
|
let mut last_task: Option<OffsetDateTime> = None;
|
||||||
let mut indexes = BTreeMap::new();
|
let mut indexes = BTreeMap::new();
|
||||||
let mut database_size = 0;
|
let mut database_size = 0;
|
||||||
|
let mut used_database_size = 0;
|
||||||
|
|
||||||
for index_uid in index_scheduler.index_names()? {
|
for index_uid in index_scheduler.index_names()? {
|
||||||
// Accumulate the size of all indexes, even unauthorized ones, so
|
// Accumulate the size of all indexes, even unauthorized ones, so
|
||||||
@ -266,6 +269,7 @@ pub fn create_all_stats(
|
|||||||
// See <https://github.com/meilisearch/meilisearch/pull/3541#discussion_r1126747643> for context.
|
// See <https://github.com/meilisearch/meilisearch/pull/3541#discussion_r1126747643> for context.
|
||||||
let stats = index_scheduler.index_stats(&index_uid)?;
|
let stats = index_scheduler.index_stats(&index_uid)?;
|
||||||
database_size += stats.inner_stats.database_size;
|
database_size += stats.inner_stats.database_size;
|
||||||
|
used_database_size += stats.inner_stats.used_database_size;
|
||||||
|
|
||||||
if !filters.is_index_authorized(&index_uid) {
|
if !filters.is_index_authorized(&index_uid) {
|
||||||
continue;
|
continue;
|
||||||
@ -278,10 +282,14 @@ pub fn create_all_stats(
|
|||||||
}
|
}
|
||||||
|
|
||||||
database_size += index_scheduler.size()?;
|
database_size += index_scheduler.size()?;
|
||||||
|
used_database_size += index_scheduler.used_size()?;
|
||||||
database_size += auth_controller.size()?;
|
database_size += auth_controller.size()?;
|
||||||
database_size += index_scheduler.compute_update_file_size()?;
|
used_database_size += auth_controller.used_size()?;
|
||||||
|
let update_file_size = index_scheduler.compute_update_file_size()?;
|
||||||
|
database_size += update_file_size;
|
||||||
|
used_database_size += update_file_size;
|
||||||
|
|
||||||
let stats = Stats { database_size, last_update: last_task, indexes };
|
let stats = Stats { database_size, used_database_size, last_update: last_task, indexes };
|
||||||
Ok(stats)
|
Ok(stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user