5178: Add Prometheus metrics to measure task queue latency r=irevoire a=takaebato

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5046

## What does this PR do?

- Added Prometheus metrics to measure task queue latency

(Confirmed locally that latency is measured during parallel task execution in the benchmark.)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Takahiro Ebato <takahiro.ebato@gmail.com>
This commit is contained in:
meili-bors[bot] 2024-12-30 14:47:15 +00:00 committed by GitHub
commit 1a0d8810e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 131 additions and 11 deletions

View File

@ -1403,6 +1403,104 @@
"title": "Number of tasks by indexes", "title": "Number of tasks by indexes",
"type": "timeseries" "type": "timeseries"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 12,
"x": 12,
"y": 51
},
"id": 29,
"interval": "5s",
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.1.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_task_queue_latency_seconds{instance=\"$instance\", job=\"$job\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
"refId": "A"
}
],
"title": "Task queue latency",
"type": "timeseries"
},
{ {
"collapsed": true, "collapsed": true,
"datasource": { "datasource": {

View File

@ -1,7 +1,7 @@
use lazy_static::lazy_static; use lazy_static::lazy_static;
use prometheus::{ use prometheus::{
opts, register_histogram_vec, register_int_counter_vec, register_int_gauge, opts, register_gauge, register_histogram_vec, register_int_counter_vec, register_int_gauge,
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, register_int_gauge_vec, Gauge, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
}; };
lazy_static! { lazy_static! {
@ -63,4 +63,9 @@ lazy_static! {
"Meilisearch Searches Being Processed" "Meilisearch Searches Being Processed"
)) ))
.expect("Can't create a metric"); .expect("Can't create a metric");
pub static ref MEILISEARCH_TASK_QUEUE_LATENCY_SECONDS: Gauge = register_gauge!(
"meilisearch_task_queue_latency_seconds",
"Meilisearch Task Queue Latency in Seconds",
)
.expect("Can't create a metric");
} }

View File

@ -1,16 +1,17 @@
use actix_web::http::header;
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use prometheus::{Encoder, TextEncoder};
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::routes::create_all_stats; use crate::routes::create_all_stats;
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
use actix_web::http::header;
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
use index_scheduler::{IndexScheduler, Query};
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::tasks::Status;
use prometheus::{Encoder, TextEncoder};
use time::OffsetDateTime;
pub fn configure(config: &mut web::ServiceConfig) { pub fn configure(config: &mut web::ServiceConfig) {
config.service(web::resource("").route(web::get().to(get_metrics))); config.service(web::resource("").route(web::get().to(get_metrics)));
@ -61,6 +62,22 @@ pub async fn get_metrics(
} }
crate::metrics::MEILISEARCH_IS_INDEXING.set(index_scheduler.is_task_processing()? as i64); crate::metrics::MEILISEARCH_IS_INDEXING.set(index_scheduler.is_task_processing()? as i64);
let task_queue_latency_seconds = index_scheduler
.get_tasks_from_authorized_indexes(
Query {
limit: Some(1),
reverse: Some(true),
statuses: Some(vec![Status::Enqueued, Status::Processing]),
..Query::default()
},
auth_filters,
)?
.0
.first()
.map(|task| (OffsetDateTime::now_utc() - task.enqueued_at).as_seconds_f64())
.unwrap_or(0.0);
crate::metrics::MEILISEARCH_TASK_QUEUE_LATENCY_SECONDS.set(task_queue_latency_seconds);
let encoder = TextEncoder::new(); let encoder = TextEncoder::new();
let mut buffer = vec![]; let mut buffer = vec![];
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics"); encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");