From 9f0d33ec999920dfdec917aff14604df9f30e6b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 3 Jul 2025 15:05:15 +0200 Subject: [PATCH] Expose the number of tokens on the chat completions routes --- crates/meilisearch/src/metrics.rs | 5 +++ .../src/routes/chats/chat_completions.rs | 35 ++++++++++++++++--- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/crates/meilisearch/src/metrics.rs b/crates/meilisearch/src/metrics.rs index 1c7d0c3a4..9941bacae 100644 --- a/crates/meilisearch/src/metrics.rs +++ b/crates/meilisearch/src/metrics.rs @@ -21,6 +21,11 @@ lazy_static! { "Meilisearch number of search requests performed by the chat route itself" )) .expect("Can't create a metric"); + pub static ref MEILISEARCH_CHAT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!( + opts!("meilisearch_chat_tokens_usage", "Meilisearch Chat Tokens Usage"), + &["chat", "model", "type"] + ) + .expect("Can't create a metric"); pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) .expect("Can't create a metric"); diff --git a/crates/meilisearch/src/routes/chats/chat_completions.rs b/crates/meilisearch/src/routes/chats/chat_completions.rs index f6030f2bc..a7d878c6e 100644 --- a/crates/meilisearch/src/routes/chats/chat_completions.rs +++ b/crates/meilisearch/src/routes/chats/chat_completions.rs @@ -13,9 +13,9 @@ use async_openai::types::{ ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent, ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent, - ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, ChatCompletionToolType, - CreateChatCompletionRequest, CreateChatCompletionStreamResponse, FinishReason, FunctionCall, - FunctionCallStream, FunctionObjectArgs, + ChatCompletionStreamOptions, ChatCompletionStreamResponseDelta, ChatCompletionToolArgs, + ChatCompletionToolType, CreateChatCompletionRequest, CreateChatCompletionStreamResponse, + FinishReason, FunctionCall, FunctionCallStream, FunctionObjectArgs, }; use async_openai::Client; use bumpalo::Bump; @@ -49,7 +49,8 @@ use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _}; use crate::metrics::{ - MEILISEARCH_CHAT_INTERNAL_SEARCH_REQUESTS, MEILISEARCH_DEGRADED_SEARCH_REQUESTS, + MEILISEARCH_CHAT_INTERNAL_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOKENS_USAGE, + MEILISEARCH_DEGRADED_SEARCH_REQUESTS, }; use crate::routes::chats::utils::SseEventSender; use crate::routes::indexes::search::search_kind; @@ -490,6 +491,7 @@ async fn streamed_chat( let (tx, rx) = tokio::sync::mpsc::channel(10); let tx = SseEventSender::new(tx); + let workspace_uid = workspace_uid.to_string(); let _join_handle = Handle::current().spawn(async move { let client = Client::with_config(config.clone()); let mut global_tool_calls = HashMap::::new(); @@ -499,6 +501,7 @@ async fn streamed_chat( let output = run_conversation( &index_scheduler, &auth_ctrl, + &workspace_uid, &search_queue, &auth_token, &client, @@ -536,6 +539,7 @@ async fn run_conversation( Data, >, auth_ctrl: &web::Data, + workspace_uid: &str, search_queue: &web::Data, auth_token: &str, client: &Client, @@ -546,12 +550,33 @@ async fn run_conversation( function_support: FunctionSupport, ) -> Result, ()>, SendError> { let mut finish_reason = None; + chat_completion.stream_options = Some(ChatCompletionStreamOptions { include_usage: true }); // safety: unwrap: can only happens if `stream` was set to `false` let mut response = client.chat().create_stream(chat_completion.clone()).await.unwrap(); while let Some(result) = response.next().await { match result { Ok(resp) => { - let choice = &resp.choices[0]; + let choice = match resp.choices.get(0) { + Some(choice) => choice, + None => { + if let Some(usage) = resp.usage.as_ref() { + for (r#type, value) in &[ + ("prompt", usage.prompt_tokens), + ("completion", usage.completion_tokens), + ("total", usage.total_tokens), + ] { + MEILISEARCH_CHAT_TOKENS_USAGE + .with_label_values(&[ + workspace_uid, + &chat_completion.model, + r#type, + ]) + .inc_by(*value as u64); + } + } + break; + } + }; finish_reason = choice.finish_reason; let ChatCompletionStreamResponseDelta { ref tool_calls, .. } = &choice.delta;