dynamo_runtime/metrics/prometheus_names.rs
1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Prometheus metric name constants and sanitization utilities
5//!
6//! This module provides centralized Prometheus metric name constants and sanitization functions
7//! for various components to ensure consistency and avoid duplication across the codebase.
8//!
9//! ⚠️ **CRITICAL: REGENERATE PYTHON FILE AFTER CHANGES** ⚠️
10//! When modifying constants in this file, regenerate the Python module:
11//! cargo run -p dynamo-codegen --bin gen-python-prometheus-names
12//!
13//! This generates `lib/bindings/python/src/dynamo/prometheus_names.py`
14//! with pure Python constants (no Rust bindings needed).
15//!
16//! ## Naming Conventions
17//!
18//! All metric names should follow: `{prefix}_{name}_{suffix}`
19//!
20//! **Prefix**: Component identifier (`dynamo_component_`, `dynamo_frontend_`, etc.)
21//! **Name**: Descriptive snake_case name indicating what is measured
22//! **Suffix**:
23//! - Units: `_seconds`, `_bytes`, `_ms`, `_percent`, `_messages`, `_connections`
24//! - Counters: `_total` (not `total_` prefix) - for cumulative metrics that only increase
25//! - Gauges: No `_total` suffix - for current state metrics that can go up and down
26//! - Note: Do not use `_counter`, `_gauge`, `_time`, or `_size` in Prometheus names (too vague)
27//!
28//! **Common Transformations**:
29//! - ❌ `_counter` → ✅ `_total`
30//! - ❌ `_sum` → ✅ `_total`
31//! - ❌ `_gauge` → ✅ (no suffix needed for current values)
32//! - ❌ `_time` → ✅ `_seconds`, `_ms`, `_hours`, `_duration_seconds`
33//! - ❌ `_time_total` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
34//! - ❌ `_total_time` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
35//! - ❌ `_total_time_seconds` → ✅ `_seconds_total`
36//! - ❌ `_average_time` → ✅ `_seconds_avg`, `_ms_avg`
37//! - ❌ `_size` → ✅ `_bytes`, `_total`, `_length`
38//! - ❌ `_some_request_size` → ✅ `_some_request_bytes_avg`
39//! - ❌ `_rate` → ✅ `_per_second`, `_per_minute`
40//! - ❌ `disconnected_clients_total` → ✅ `disconnected_clients` (gauge, not counter)
41//! - ❌ `inflight_requests_total` → ✅ `inflight_requests` (gauge, not counter)
42//! - ❌ `connections_total` → ✅ `current_connections` (gauge, not counter)
43//!
44//! **Examples**:
45//! - ✅ `dynamo_frontend_requests_total` - Total request counter (not `incoming_requests`)
46//! - ✅ `dynamo_frontend_request_duration_seconds` - Request duration histogram (not `response_time`)
47//! - ✅ `dynamo_component_errors_total` - Total error counter (not `total_errors`)
48//! - ✅ `dynamo_component_memory_usage_bytes` - Memory usage gauge
49//! - ✅ `dynamo_frontend_inflight_requests` - Current inflight requests gauge
50//! - ✅ `dynamo_component_cpu_usage_percent` - CPU usage percentage
51//! - ✅ `dynamo_frontend_tokens_per_second` - Token generation rate
52//! - ✅ `dynamo_messaging_client_connection_duration_ms` - Connection time in milliseconds
53//! - ✅ `dynamo_messaging_client_current_connections` - Current active connections gauge
54//! - ✅ `dynamo_messaging_client_in_messages_total` - Total messages received counter
55//!
56//! ## Key Differences: Prometheus Metric Names vs Prometheus Label Names
57//!
58//! **Metric names**: Allow colons and `__` anywhere. **Label names**: No colons, no `__` prefix.
59//! Label names starting with `__` are reserved for Prometheus internal use.
60
61use once_cell::sync::Lazy;
62use regex::Regex;
63
64/// Metric name prefixes used across the metrics system.
65pub mod name_prefix {
66 /// Prefix for component-scoped metrics, auto-labeled with namespace/endpoint.
67 pub const COMPONENT: &str = "dynamo_component";
68
69 /// Prefix for frontend HTTP service metrics (requests, TTFT, ITL, disconnects).
70 pub const FRONTEND: &str = "dynamo_frontend";
71
72 /// Prefix for KV router instance metrics (carries `router_id` label).
73 pub const ROUTER: &str = "dynamo_router";
74
75 // Note: REQUEST_PLANE vs TRANSPORT: REQUEST_PLANE measures *what requests do* (latency,
76 // concurrency) and is transport-agnostic. TRANSPORT measures *how the wire behaves*
77 // (bytes transferred, protocol errors) and is protocol-specific (TCP/NATS).
78
79 /// Prefix for standalone KV indexer metrics
80 pub const KVINDEXER: &str = "dynamo_kvindexer";
81
82 /// Prefix for request-plane metrics at AddressedPushRouter.
83 /// Transport-agnostic: measures request lifecycle latency and concurrency
84 /// (queue → send → roundtrip TTFT, inflight gauge).
85 pub const REQUEST_PLANE: &str = "dynamo_request_plane";
86
87 /// Prefix for transport-layer metrics (TCP / NATS).
88 /// Protocol-specific: measures wire-level health (bytes sent/received, error counts).
89 pub const TRANSPORT: &str = "dynamo_transport";
90
91 /// Prefix for work-handler transport breakdown metrics (backend side)
92 pub const WORK_HANDLER: &str = "dynamo_work_handler";
93
94 /// Prefix for tokio runtime metrics (poll times, queue depths, stalls).
95 pub const TOKIO: &str = "dynamo_tokio";
96
97 /// Prefix for per-phase routing overhead latency (hashing, scheduling).
98 /// Raw Prometheus, not component-scoped.
99 pub const ROUTING_OVERHEAD: &str = "dynamo_routing_overhead";
100}
101
102/// Automatically inserted Prometheus label names used across the metrics system
103///
104/// These labels are auto-injected into metrics by the hierarchy system:
105/// - Rust: lib/runtime/src/metrics.rs create_metric() function
106/// - Python: components/src/dynamo/common/utils/prometheus.py register_engine_metrics_callback()
107///
108/// Python codegen: These constants are exported to lib/bindings/python/src/dynamo/prometheus_names.py
109pub mod labels {
110 /// Label for component identification
111 pub const COMPONENT: &str = "dynamo_component";
112
113 /// Label for namespace identification
114 pub const NAMESPACE: &str = "dynamo_namespace";
115
116 /// Label for endpoint identification
117 pub const ENDPOINT: &str = "dynamo_endpoint";
118
119 /// Label for worker data-parallel rank.
120 ///
121 /// Note: this is not an auto-inserted label like `dynamo_namespace`/`dynamo_component`.
122 /// It is used by worker/load-style metrics that need to disambiguate per-worker series.
123 pub const DP_RANK: &str = "dp_rank";
124
125 /// Label for worker instance ID (etcd lease ID).
126 pub const WORKER_ID: &str = "worker_id";
127
128 /// Label for model name/path (OpenAI API standard, injected by Dynamo)
129 /// This is the standard label name injected by all backends in metrics_labels=[("model", ...)].
130 /// Ensures compatibility with OpenAI-compatible tooling.
131 pub const MODEL: &str = "model";
132
133 /// Label for model name/path (alternative/native engine label, injected by Dynamo)
134 /// Some engines natively use model_name, so we inject both model and model_name
135 /// to ensure maximum compatibility with both OpenAI standard and engine-native tooling.
136 /// When a metric already has a label, injection does not overwrite it (original is preserved).
137 pub const MODEL_NAME: &str = "model_name";
138
139 /// Label for worker type (e.g., "aggregated", "prefill", "decode", "encoder", etc.)
140 pub const WORKER_TYPE: &str = "worker_type";
141
142 /// Label for router instance (discovery.instance_id() of the frontend)
143 pub const ROUTER_ID: &str = "router_id";
144}
145
146/// Well-known component names used as values for the `dynamo_component` label.
147///
148/// These are the canonical names passed to `namespace.component(name)` to create
149/// `Component` instances whose metrics carry `dynamo_component=<name>`.
150///
151/// Python codegen: These constants are exported to lib/bindings/python/src/dynamo/prometheus_names.py
152pub mod component_names {
153 /// Component name for the KV router (frontend-side request routing).
154 pub const ROUTER: &str = "router";
155
156 // TODO: add PREFILL = "prefill" and DECODE = "decode" component names
157 // and migrate backend worker component creation to use these constants.
158}
159
160/// Frontend service metrics (LLM HTTP service)
161///
162/// ⚠️ Python codegen: Run gen-python-prometheus-names after changes
163pub mod frontend_service {
164 // TODO: Remove DYN_METRICS_PREFIX — the custom prefix override was added for NIM
165 // compatibility (PR #2432) but is no longer needed. All frontend metrics should
166 // use the fixed `dynamo_frontend_` prefix from `name_prefix::FRONTEND`.
167 /// Environment variable that overrides the default metric prefix
168 pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX";
169
170 /// Total number of LLM requests processed
171 pub const REQUESTS_TOTAL: &str = "requests_total";
172
173 /// Number of requests waiting in HTTP queue before receiving the first response (gauge)
174 pub const QUEUED_REQUESTS: &str = "queued_requests";
175
176 /// Number of inflight/concurrent requests going to the engine (vLLM, SGLang, ...)
177 /// Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
178 pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
179
180 /// Number of disconnected clients (gauge that can go up and down)
181 pub const DISCONNECTED_CLIENTS: &str = "disconnected_clients";
182
183 /// Duration of LLM requests
184 pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
185
186 /// Input sequence length in tokens
187 pub const INPUT_SEQUENCE_TOKENS: &str = "input_sequence_tokens";
188
189 /// Output sequence length in tokens
190 pub const OUTPUT_SEQUENCE_TOKENS: &str = "output_sequence_tokens";
191
192 /// Predicted KV cache hit rate at routing time (0.0-1.0)
193 pub const KV_HIT_RATE: &str = "kv_hit_rate";
194
195 /// Upper-bound estimation of KV cache transfer latency in disaggregated serving (seconds)
196 pub const KV_TRANSFER_ESTIMATED_LATENCY_SECONDS: &str = "kv_transfer_estimated_latency_seconds";
197
198 /// Number of cached tokens (prefix cache hits) per request
199 pub const CACHED_TOKENS: &str = "cached_tokens";
200
201 /// Tokenizer latency in milliseconds
202 pub const TOKENIZER_LATENCY_MS: &str = "tokenizer_latency_ms";
203
204 /// Total number of output tokens generated (counter that updates in real-time)
205 pub const OUTPUT_TOKENS_TOTAL: &str = "output_tokens_total";
206
207 /// Time to first token in seconds
208 pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "time_to_first_token_seconds";
209
210 /// Inter-token latency in seconds
211 pub const INTER_TOKEN_LATENCY_SECONDS: &str = "inter_token_latency_seconds";
212
213 /// Model configuration metrics
214 ///
215 /// Runtime config metrics (from ModelRuntimeConfig):
216 /// Total KV blocks available for a worker serving the model
217 pub const MODEL_TOTAL_KV_BLOCKS: &str = "model_total_kv_blocks";
218
219 /// Maximum number of sequences for a worker serving the model (runtime config)
220 pub const MODEL_MAX_NUM_SEQS: &str = "model_max_num_seqs";
221
222 /// Maximum number of batched tokens for a worker serving the model (runtime config)
223 pub const MODEL_MAX_NUM_BATCHED_TOKENS: &str = "model_max_num_batched_tokens";
224
225 /// MDC metrics (from ModelDeploymentCard):
226 /// Maximum context length for a worker serving the model (MDC)
227 pub const MODEL_CONTEXT_LENGTH: &str = "model_context_length";
228
229 /// KV cache block size for a worker serving the model (MDC)
230 pub const MODEL_KV_CACHE_BLOCK_SIZE: &str = "model_kv_cache_block_size";
231
232 /// Request migration limit for a worker serving the model (MDC)
233 pub const MODEL_MIGRATION_LIMIT: &str = "model_migration_limit";
234
235 /// Total number of request migrations due to worker unavailability
236 pub const MODEL_MIGRATION_TOTAL: &str = "model_migration_total";
237
238 /// Total number of times migration was disabled because the sequence length
239 /// exceeded the configured max_seq_len limit
240 pub const MODEL_MIGRATION_MAX_SEQ_LEN_EXCEEDED_TOTAL: &str =
241 "model_migration_max_seq_len_exceeded_total";
242
243 /// Total number of request cancellations
244 pub const MODEL_CANCELLATION_TOTAL: &str = "model_cancellation_total";
245
246 /// Total number of requests rejected due to resource exhaustion
247 pub const MODEL_REJECTION_TOTAL: &str = "model_rejection_total";
248
249 /// Active decode blocks (KV cache blocks) per worker
250 /// Gauge metric tracking current KV cache block utilization for each worker
251 pub const WORKER_ACTIVE_DECODE_BLOCKS: &str = "worker_active_decode_blocks";
252
253 /// Active prefill tokens per worker
254 /// Gauge metric tracking current queued prefill tokens for each worker
255 pub const WORKER_ACTIVE_PREFILL_TOKENS: &str = "worker_active_prefill_tokens";
256
257 /// Last observed time to first token per worker (in seconds)
258 /// Gauge metric tracking the most recent TTFT for each worker
259 pub const WORKER_LAST_TIME_TO_FIRST_TOKEN_SECONDS: &str =
260 "worker_last_time_to_first_token_seconds";
261
262 /// Last observed input sequence tokens per worker
263 /// Gauge metric tracking the input token count from the same request as WORKER_LAST_TIME_TO_FIRST_TOKEN_SECONDS
264 /// Updated atomically with TTFT to correlate latency with input size
265 pub const WORKER_LAST_INPUT_SEQUENCE_TOKENS: &str = "worker_last_input_sequence_tokens";
266
267 /// Last observed inter-token latency per worker (in seconds)
268 /// Gauge metric tracking the most recent ITL for each worker
269 pub const WORKER_LAST_INTER_TOKEN_LATENCY_SECONDS: &str =
270 "worker_last_inter_token_latency_seconds";
271
272 /// Number of requests pending in the router's scheduler queue (gauge per worker_type)
273 pub const ROUTER_QUEUE_PENDING_REQUESTS: &str = "router_queue_pending_requests";
274
275 /// Label name for the type of migration
276 pub const MIGRATION_TYPE_LABEL: &str = "migration_type";
277
278 /// Label name for tokenizer operation
279 pub const OPERATION_LABEL: &str = "operation";
280
281 /// Operation label values for tokenizer latency metric
282 pub mod operation {
283 /// Tokenization operation
284 pub const TOKENIZE: &str = "tokenize";
285
286 /// Detokenization operation
287 pub const DETOKENIZE: &str = "detokenize";
288 }
289
290 /// Migration type label values
291 pub mod migration_type {
292 /// Migration during initial stream creation (NoResponders error)
293 pub const NEW_REQUEST: &str = "new_request";
294
295 /// Migration during ongoing request (stream disconnected)
296 pub const ONGOING_REQUEST: &str = "ongoing_request";
297 }
298
299 /// Status label values
300 pub mod status {
301 /// Value for successful requests
302 pub const SUCCESS: &str = "success";
303
304 /// Value for failed requests
305 pub const ERROR: &str = "error";
306 }
307
308 /// Request type label values
309 pub mod request_type {
310 /// Value for streaming requests
311 pub const STREAM: &str = "stream";
312
313 /// Value for unary requests
314 pub const UNARY: &str = "unary";
315 }
316
317 /// Error type label values for fine-grained error classification
318 pub mod error_type {
319 /// No error (used for successful requests)
320 pub const NONE: &str = "";
321
322 /// Client validation error (4xx with "Validation:" prefix)
323 pub const VALIDATION: &str = "validation";
324
325 /// Model or resource not found (404)
326 pub const NOT_FOUND: &str = "not_found";
327
328 /// Service overloaded, too many requests (503)
329 pub const OVERLOAD: &str = "overload";
330
331 /// Request cancelled by client or timeout
332 pub const CANCELLED: &str = "cancelled";
333
334 /// Backend accepted the request but stopped responding (response inactivity timeout)
335 pub const RESPONSE_TIMEOUT: &str = "response_timeout";
336
337 /// Internal server error (500 and other unexpected errors)
338 pub const INTERNAL: &str = "internal";
339
340 /// Feature not implemented (501)
341 pub const NOT_IMPLEMENTED: &str = "not_implemented";
342 }
343}
344
345/// Work handler Prometheus metric names
346pub mod work_handler {
347 /// Total number of requests processed by work handler
348 pub const REQUESTS_TOTAL: &str = "requests_total";
349
350 /// Total number of bytes received in requests by work handler
351 pub const REQUEST_BYTES_TOTAL: &str = "request_bytes_total";
352
353 /// Total number of bytes sent in responses by work handler
354 pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";
355
356 /// Number of requests currently being processed by work handler
357 /// Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
358 pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
359
360 /// Time spent processing requests by work handler (histogram)
361 pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
362
363 /// Total number of errors in work handler processing
364 pub const ERRORS_TOTAL: &str = "errors_total";
365
366 /// Total number of requests cancelled by work handler (client stop/kill or disconnect)
367 pub const CANCELLATION_TOTAL: &str = "cancellation_total";
368
369 /// Network transit: frontend send to backend receive (wall-clock, cross-process)
370 pub const NETWORK_TRANSIT_SECONDS: &str = "network_transit_seconds";
371
372 /// Backend processing: handle_payload entry to first response sent
373 pub const TIME_TO_FIRST_RESPONSE_SECONDS: &str = "time_to_first_response_seconds";
374
375 /// Label name for error type classification
376 pub const ERROR_TYPE_LABEL: &str = "error_type";
377
378 /// Error type values for work handler metrics
379 pub mod error_types {
380 /// Deserialization error
381 pub const DESERIALIZATION: &str = "deserialization";
382
383 /// Invalid message format error
384 pub const INVALID_MESSAGE: &str = "invalid_message";
385
386 /// Response stream creation error
387 pub const RESPONSE_STREAM: &str = "response_stream";
388
389 /// Generation error
390 pub const GENERATE: &str = "generate";
391
392 /// Response publishing error
393 pub const PUBLISH_RESPONSE: &str = "publish_response";
394
395 /// Final message publishing error
396 pub const PUBLISH_FINAL: &str = "publish_final";
397 }
398}
399
400/// Task tracker Prometheus metric name suffixes
401pub mod task_tracker {
402 /// Total number of tasks issued/submitted
403 pub const TASKS_ISSUED_TOTAL: &str = "tasks_issued_total";
404
405 /// Total number of tasks started
406 pub const TASKS_STARTED_TOTAL: &str = "tasks_started_total";
407
408 /// Total number of successfully completed tasks
409 pub const TASKS_SUCCESS_TOTAL: &str = "tasks_success_total";
410
411 /// Total number of cancelled tasks
412 pub const TASKS_CANCELLED_TOTAL: &str = "tasks_cancelled_total";
413
414 /// Total number of failed tasks
415 pub const TASKS_FAILED_TOTAL: &str = "tasks_failed_total";
416
417 /// Total number of rejected tasks
418 pub const TASKS_REJECTED_TOTAL: &str = "tasks_rejected_total";
419}
420
421/// DistributedRuntime core metrics
422pub mod distributed_runtime {
423 /// Total uptime of the DistributedRuntime in seconds
424 pub const UPTIME_SECONDS: &str = "uptime_seconds";
425}
426
427/// KVBM
428pub mod kvbm {
429 /// The number of offload blocks from device to host
430 pub const OFFLOAD_BLOCKS_D2H: &str = "offload_blocks_d2h";
431
432 /// The number of offload blocks from host to disk
433 pub const OFFLOAD_BLOCKS_H2D: &str = "offload_blocks_h2d";
434
435 /// The number of offload blocks from device to disk (bypassing host memory)
436 pub const OFFLOAD_BLOCKS_D2D: &str = "offload_blocks_d2d";
437
438 /// The number of onboard blocks from host to device
439 pub const ONBOARD_BLOCKS_H2D: &str = "onboard_blocks_h2d";
440
441 /// The number of onboard blocks from disk to device
442 pub const ONBOARD_BLOCKS_D2D: &str = "onboard_blocks_d2d";
443
444 /// The number of matched tokens
445 pub const MATCHED_TOKENS: &str = "matched_tokens";
446
447 /// Host cache hit rate (0.0-1.0) from the sliding window
448 pub const HOST_CACHE_HIT_RATE: &str = "host_cache_hit_rate";
449
450 /// Disk cache hit rate (0.0-1.0) from the sliding window
451 pub const DISK_CACHE_HIT_RATE: &str = "disk_cache_hit_rate";
452
453 /// Object storage cache hit rate (0.0-1.0) from the sliding window
454 pub const OBJECT_CACHE_HIT_RATE: &str = "object_cache_hit_rate";
455
456 /// Number of blocks offloaded from device to object storage
457 pub const OFFLOAD_BLOCKS_D2O: &str = "offload_blocks_d2o";
458
459 /// Number of blocks onboarded from object storage to device
460 pub const ONBOARD_BLOCKS_O2D: &str = "onboard_blocks_o2d";
461
462 /// Bytes transferred to object storage (offload)
463 pub const OFFLOAD_BYTES_OBJECT: &str = "offload_bytes_object";
464
465 /// Bytes transferred from object storage (onboard)
466 pub const ONBOARD_BYTES_OBJECT: &str = "onboard_bytes_object";
467
468 /// Number of failed object storage read operations (blocks)
469 pub const OBJECT_READ_FAILURES: &str = "object_read_failures";
470
471 /// Number of failed object storage write operations (blocks)
472 pub const OBJECT_WRITE_FAILURES: &str = "object_write_failures";
473}
474
475/// Router per-request metrics (component-scoped via `MetricsHierarchy`).
476///
477/// Metric names are composed as `"{METRIC_PREFIX}{frontend_service::*}"` at init time,
478/// then passed to `component.metrics().create_*()` which auto-prepends `dynamo_component_`,
479/// yielding e.g. `dynamo_component_router_requests_total`.
480/// See `lib/llm/src/kv_router/metrics.rs` `RouterRequestMetrics::from_component()`.
481pub mod router_request {
482 /// Prefix prepended to `frontend_service::*` names to form router metric names.
483 /// e.g. `"router_"` + `frontend_service::REQUESTS_TOTAL` → `"router_requests_total"`.
484 pub const METRIC_PREFIX: &str = "router_";
485}
486
487/// Routing overhead phase latency histogram suffixes.
488///
489/// Combined with `name_prefix::ROUTER` ("dynamo_router") in `RoutingOverheadMetrics::register()`,
490/// yielding e.g. `dynamo_router_overhead_block_hashing_ms{router_id="..."}`.
491/// See `lib/llm/src/kv_router/metrics.rs`.
492pub mod routing_overhead {
493 /// Time spent computing block hashes
494 pub const BLOCK_HASHING_MS: &str = "overhead_block_hashing_ms";
495
496 /// Time spent in indexer find_matches
497 pub const INDEXER_FIND_MATCHES_MS: &str = "overhead_indexer_find_matches_ms";
498
499 /// Time spent computing sequence hashes
500 pub const SEQ_HASHING_MS: &str = "overhead_seq_hashing_ms";
501
502 /// Time spent in scheduler worker selection
503 pub const SCHEDULING_MS: &str = "overhead_scheduling_ms";
504
505 /// Total routing overhead per request
506 pub const TOTAL_MS: &str = "overhead_total_ms";
507}
508
509/// Router request metrics (component-scoped aggregate histograms + counter)
510///
511/// These constants are the suffix portions of full metric names, combined with
512/// [`name_prefix::COMPONENT`] to form the complete name, e.g.
513/// `dynamo_component_router_requests_total`.
514///
515/// ⚠️ Python codegen: Run gen-python-prometheus-names after changes
516pub mod router {
517 /// Total number of requests processed by the router
518 pub const REQUESTS_TOTAL: &str = "router_requests_total";
519
520 /// Total number of remote indexer overlap queries that failed
521 pub const REMOTE_INDEXER_QUERY_FAILURES_TOTAL: &str =
522 "router_remote_indexer_query_failures_total";
523
524 /// Total number of remote indexer routing-decision writes that failed
525 pub const REMOTE_INDEXER_WRITE_FAILURES_TOTAL: &str =
526 "router_remote_indexer_write_failures_total";
527
528 /// Time to first token observed at the router (seconds)
529 pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "router_time_to_first_token_seconds";
530
531 /// Average inter-token latency observed at the router (seconds)
532 pub const INTER_TOKEN_LATENCY_SECONDS: &str = "router_inter_token_latency_seconds";
533
534 /// Input sequence length in tokens observed at the router
535 pub const INPUT_SEQUENCE_TOKENS: &str = "router_input_sequence_tokens";
536
537 /// Output sequence length in tokens observed at the router
538 pub const OUTPUT_SEQUENCE_TOKENS: &str = "router_output_sequence_tokens";
539}
540
541/// Frontend pipeline stage and event-loop metrics
542pub mod frontend_perf {
543 /// Per-stage latency histogram (label: stage = preprocess|route|transport_roundtrip|postprocess)
544 pub const STAGE_DURATION_SECONDS: &str = "stage_duration_seconds";
545 /// Tokenization time in preprocessor
546 pub const TOKENIZE_SECONDS: &str = "tokenize_seconds";
547 /// Template application time in preprocessor
548 pub const TEMPLATE_SECONDS: &str = "template_seconds";
549 /// Cumulative detokenization time (microseconds); pair with DETOKENIZE_TOKEN_COUNT
550 pub const DETOKENIZE_TOTAL_US: &str = "detokenize_total_us";
551 /// Total tokens detokenized; use rate(total_us)/rate(count) for per-token average
552 pub const DETOKENIZE_TOKEN_COUNT: &str = "detokenize_token_count";
553 /// Event loop delay canary (sleep 10ms, measure drift)
554 pub const EVENT_LOOP_DELAY_SECONDS: &str = "event_loop_delay_seconds";
555 /// Count of event loop stalls (delay > 5ms)
556 pub const EVENT_LOOP_STALL_TOTAL: &str = "event_loop_stall_total";
557}
558
559/// Tokio runtime metrics
560pub mod tokio_perf {
561 pub const WORKER_MEAN_POLL_TIME_NS: &str = "worker_mean_poll_time_ns";
562 pub const GLOBAL_QUEUE_DEPTH: &str = "global_queue_depth";
563 pub const BUDGET_FORCED_YIELD_TOTAL: &str = "budget_forced_yield_total";
564 pub const WORKER_BUSY_RATIO: &str = "worker_busy_ratio";
565 pub const WORKER_PARK_COUNT_TOTAL: &str = "worker_park_count_total";
566 pub const WORKER_LOCAL_QUEUE_DEPTH: &str = "worker_local_queue_depth";
567 pub const WORKER_STEAL_COUNT_TOTAL: &str = "worker_steal_count_total";
568 pub const WORKER_OVERFLOW_COUNT_TOTAL: &str = "worker_overflow_count_total";
569 pub const BLOCKING_THREADS: &str = "blocking_threads";
570 pub const BLOCKING_IDLE_THREADS: &str = "blocking_idle_threads";
571 pub const BLOCKING_QUEUE_DEPTH: &str = "blocking_queue_depth";
572 pub const ALIVE_TASKS: &str = "alive_tasks";
573}
574
575/// Standalone KV indexer HTTP service metrics
576pub mod kvindexer {
577 /// HTTP request latency
578 pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
579
580 /// Total HTTP requests
581 pub const REQUESTS_TOTAL: &str = "requests_total";
582
583 /// HTTP error responses (4xx/5xx)
584 pub const ERRORS_TOTAL: &str = "errors_total";
585
586 /// Number of active model+tenant indexers
587 pub const MODELS: &str = "models";
588
589 /// Number of registered worker instances
590 pub const WORKERS: &str = "workers";
591}
592
593/// Request plane metrics at AddressedPushRouter
594pub mod request_plane {
595 /// Time from generate() entry to send_request() (serialization + encoding)
596 pub const QUEUE_SECONDS: &str = "queue_seconds";
597 /// Time for send_request() to complete (frontend view: network + queue + ack)
598 pub const SEND_SECONDS: &str = "send_seconds";
599 /// Time from send_request() to first response item (transport roundtrip TTFT)
600 pub const ROUNDTRIP_TTFT_SECONDS: &str = "roundtrip_ttft_seconds";
601 /// Currently in-flight requests (gauge)
602 pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
603}
604
605/// Transport-specific metrics (TCP / NATS)
606pub mod transport {
607 pub mod tcp {
608 pub const POOL_ACTIVE: &str = "tcp_pool_active";
609 pub const POOL_IDLE: &str = "tcp_pool_idle";
610 pub const BYTES_SENT_TOTAL: &str = "tcp_bytes_sent_total";
611 pub const BYTES_RECEIVED_TOTAL: &str = "tcp_bytes_received_total";
612 pub const ERRORS_TOTAL: &str = "tcp_errors_total";
613 pub const SERVER_QUEUE_DEPTH: &str = "tcp_server_queue_depth";
614 }
615 pub mod nats {
616 pub const ERRORS_TOTAL: &str = "nats_errors_total";
617 }
618}
619
620// KvRouter (including KvIndexer) Prometheus metric names
621pub mod kvrouter {
622 /// Number of KV cache events applied to the index (including status)
623 pub const KV_CACHE_EVENTS_APPLIED: &str = "kv_cache_events_applied";
624}
625
626/// KV Publisher metrics
627pub mod kv_publisher {
628 /// Total number of raw events dropped by engines before reaching publisher (detected via event_id gaps)
629 pub const ENGINES_DROPPED_EVENTS_TOTAL: &str = "kv_publisher_engines_dropped_events_total";
630}
631
632/// Additional TRT-LLM worker metrics beyond what the engine natively provides.
633///
634/// These metrics are Python-only (registered via `prometheus_client`) and share the
635/// `trtllm_` prefix so they are captured by the same prefix filter as engine metrics.
636///
637/// ⚠️ Python codegen: Run gen-python-prometheus-names after changes
638pub mod trtllm_additional {
639 /// Total number of aborted/cancelled requests
640 pub const NUM_ABORTED_REQUESTS_TOTAL: &str = "trtllm_num_aborted_requests_total";
641
642 /// Total number of requests containing image content
643 pub const REQUEST_TYPE_IMAGE_TOTAL: &str = "trtllm_request_type_image_total";
644
645 /// Total number of requests using guided/structured decoding
646 pub const REQUEST_TYPE_STRUCTURED_OUTPUT_TOTAL: &str =
647 "trtllm_request_type_structured_output_total";
648
649 /// Total number of successful KV cache transfers
650 pub const KV_TRANSFER_SUCCESS_TOTAL: &str = "trtllm_kv_transfer_success_total";
651
652 /// KV cache transfer latency per request in seconds
653 pub const KV_TRANSFER_LATENCY_SECONDS: &str = "trtllm_kv_transfer_latency_seconds";
654
655 /// KV cache transfer size per request in bytes
656 pub const KV_TRANSFER_BYTES: &str = "trtllm_kv_transfer_bytes";
657
658 /// KV cache transfer speed per request in GB/s
659 pub const KV_TRANSFER_SPEED_GB_S: &str = "trtllm_kv_transfer_speed_gb_s";
660}
661
662// KV cache statistics metrics
663pub mod kvstats {
664 /// Total number of KV cache blocks available on the worker
665 pub const TOTAL_BLOCKS: &str = "total_blocks";
666
667 /// GPU cache usage as a percentage (0.0-1.0)
668 pub const GPU_CACHE_USAGE_PERCENT: &str = "gpu_cache_usage_percent";
669}
670
671// Model information metrics
672pub mod model_info {
673 /// Model load time in seconds
674 pub const LOAD_TIME_SECONDS: &str = "model_load_time_seconds";
675}
676
677// Shared regex patterns for Prometheus sanitization
678static METRIC_INVALID_CHARS_PATTERN: Lazy<Regex> =
679 Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_:]").unwrap());
680static LABEL_INVALID_CHARS_PATTERN: Lazy<Regex> =
681 Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
682static INVALID_FIRST_CHAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[^a-zA-Z_]").unwrap());
683
684/// Sanitizes a Prometheus metric name by converting invalid characters to underscores
685/// and ensuring the first character is valid. Uses regex for clear validation.
686/// Returns an error if the input cannot be sanitized into a valid name.
687///
688/// **Rules**: Pattern `[a-zA-Z_:][a-zA-Z0-9_:]*`. Allows colons and `__` anywhere.
689pub fn sanitize_prometheus_name(raw: &str) -> anyhow::Result<String> {
690 if raw.is_empty() {
691 return Err(anyhow::anyhow!(
692 "Cannot sanitize empty string into valid Prometheus name"
693 ));
694 }
695
696 // Replace all invalid characters with underscores
697 let mut sanitized = METRIC_INVALID_CHARS_PATTERN
698 .replace_all(raw, "_")
699 .to_string();
700
701 // Ensure first character is valid (letter, underscore, or colon)
702 if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
703 sanitized = format!("_{}", sanitized);
704 }
705
706 // Check if the result is all underscores (invalid input)
707 if sanitized.chars().all(|c| c == '_') {
708 return Err(anyhow::anyhow!(
709 "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus name",
710 raw
711 ));
712 }
713
714 Ok(sanitized)
715}
716
717/// Sanitizes a Prometheus label name by converting invalid characters to underscores
718/// and ensuring the first character is valid. Uses regex for clear validation.
719/// Label names have stricter rules than metric names (no colons allowed).
720/// Returns an error if the input cannot be sanitized into a valid label name.
721///
722/// **Rules**: Pattern `[a-zA-Z_][a-zA-Z0-9_]*`. No colons, no `__` prefix (reserved).
723pub fn sanitize_prometheus_label(raw: &str) -> anyhow::Result<String> {
724 if raw.is_empty() {
725 return Err(anyhow::anyhow!(
726 "Cannot sanitize empty string into valid Prometheus label"
727 ));
728 }
729
730 // Replace all invalid characters with underscores (no colons allowed in labels)
731 let mut sanitized = LABEL_INVALID_CHARS_PATTERN
732 .replace_all(raw, "_")
733 .to_string();
734
735 // Ensure first character is valid (letter or underscore only)
736 if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
737 sanitized = format!("_{}", sanitized);
738 }
739
740 // Prevent __ prefix (reserved for Prometheus internal use) but allow __ elsewhere
741 if sanitized.starts_with("__") {
742 sanitized = sanitized
743 .strip_prefix("__")
744 .unwrap_or(&sanitized)
745 .to_string();
746 if sanitized.is_empty() || !sanitized.chars().next().unwrap().is_ascii_alphabetic() {
747 sanitized = format!("_{}", sanitized);
748 }
749 }
750
751 // Check if the result is all underscores (invalid input)
752 if sanitized.chars().all(|c| c == '_') {
753 return Err(anyhow::anyhow!(
754 "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus label",
755 raw
756 ));
757 }
758
759 Ok(sanitized)
760}
761
762/// Sanitizes a Prometheus frontend metric prefix by converting invalid characters to underscores
763/// and ensuring the first character is valid. Uses the general prometheus name sanitization
764/// but with frontend-specific fallback behavior.
765pub fn sanitize_frontend_prometheus_prefix(raw: &str) -> String {
766 if raw.is_empty() {
767 return name_prefix::FRONTEND.to_string();
768 }
769
770 // Reuse the general prometheus name sanitization logic, fallback to frontend prefix on error
771 sanitize_prometheus_name(raw).unwrap_or_else(|_| name_prefix::FRONTEND.to_string())
772}
773
774/// Builds a full component metric name by prepending the component prefix
775/// Sanitizes the metric name to ensure it's valid for Prometheus
776pub fn build_component_metric_name(metric_name: &str) -> String {
777 let sanitized_name =
778 sanitize_prometheus_name(metric_name).expect("metric name should be valid or sanitizable");
779 format!("{}_{}", name_prefix::COMPONENT, sanitized_name)
780}
781
782/// Safely converts a u64 value to i64 for Prometheus metrics
783///
784/// Since Prometheus IntGaugeVec uses i64 but our data types use u64,
785/// this function clamps large u64 values to i64::MAX to prevent overflow
786/// and ensure metrics remain positive.
787///
788/// # Arguments
789/// * `value` - The u64 value to convert
790///
791/// # Returns
792/// An i64 value, clamped to i64::MAX if the input exceeds i64::MAX
793///
794/// # Examples
795/// ```
796/// use dynamo_runtime::metrics::prometheus_names::clamp_u64_to_i64;
797///
798/// assert_eq!(clamp_u64_to_i64(100), 100);
799/// assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
800/// ```
801pub fn clamp_u64_to_i64(value: u64) -> i64 {
802 if value > i64::MAX as u64 {
803 i64::MAX
804 } else {
805 value as i64
806 }
807}
808
809#[cfg(test)]
810mod tests {
811 use super::*;
812
813 #[test]
814 fn test_sanitize_frontend_prometheus_prefix() {
815 // Test that valid prefixes remain unchanged
816 assert_eq!(
817 sanitize_frontend_prometheus_prefix("dynamo_frontend"),
818 "dynamo_frontend"
819 );
820 assert_eq!(
821 sanitize_frontend_prometheus_prefix("custom_prefix"),
822 "custom_prefix"
823 );
824 assert_eq!(sanitize_frontend_prometheus_prefix("test123"), "test123");
825
826 // Test that invalid characters are converted to underscores
827 assert_eq!(
828 sanitize_frontend_prometheus_prefix("test prefix"),
829 "test_prefix"
830 );
831 assert_eq!(
832 sanitize_frontend_prometheus_prefix("test.prefix"),
833 "test_prefix"
834 );
835 assert_eq!(
836 sanitize_frontend_prometheus_prefix("test@prefix"),
837 "test_prefix"
838 );
839 assert_eq!(
840 sanitize_frontend_prometheus_prefix("test-prefix"),
841 "test_prefix"
842 );
843
844 // Test that invalid first characters are fixed
845 assert_eq!(sanitize_frontend_prometheus_prefix("123test"), "_123test");
846 assert_eq!(sanitize_frontend_prometheus_prefix("@test"), "_test");
847
848 // Test empty string fallback
849 assert_eq!(
850 sanitize_frontend_prometheus_prefix(""),
851 name_prefix::FRONTEND
852 );
853 }
854
855 #[test]
856 fn test_sanitize_prometheus_name() {
857 // Test that valid names remain unchanged
858 assert_eq!(
859 sanitize_prometheus_name("valid_name").unwrap(),
860 "valid_name"
861 );
862 assert_eq!(sanitize_prometheus_name("test123").unwrap(), "test123");
863 assert_eq!(
864 sanitize_prometheus_name("test_name_123").unwrap(),
865 "test_name_123"
866 );
867 assert_eq!(sanitize_prometheus_name("test:name").unwrap(), "test:name"); // colons allowed
868
869 // Test that invalid characters are converted to underscores
870 assert_eq!(sanitize_prometheus_name("test name").unwrap(), "test_name");
871 assert_eq!(sanitize_prometheus_name("test.name").unwrap(), "test_name");
872 assert_eq!(sanitize_prometheus_name("test@name").unwrap(), "test_name");
873 assert_eq!(sanitize_prometheus_name("test-name").unwrap(), "test_name");
874 assert_eq!(
875 sanitize_prometheus_name("test$name#123").unwrap(),
876 "test_name_123"
877 );
878
879 // Test that double underscores are ALLOWED in metric names (unlike labels)
880 assert_eq!(
881 sanitize_prometheus_name("test__name").unwrap(),
882 "test__name"
883 );
884 assert_eq!(
885 sanitize_prometheus_name("test___name").unwrap(),
886 "test___name"
887 );
888 assert_eq!(sanitize_prometheus_name("__test").unwrap(), "__test"); // Leading double underscore OK
889
890 // Test that invalid first characters are fixed
891 assert_eq!(sanitize_prometheus_name("123test").unwrap(), "_123test");
892 assert_eq!(sanitize_prometheus_name("@test").unwrap(), "_test"); // @ becomes _, no double underscore
893 assert_eq!(sanitize_prometheus_name("-test").unwrap(), "_test"); // - becomes _, no double underscore
894 assert_eq!(sanitize_prometheus_name(".test").unwrap(), "_test"); // . becomes _, no double underscore
895
896 // Test empty string returns error
897 assert!(sanitize_prometheus_name("").is_err());
898
899 // Test complex cases
900 assert_eq!(
901 sanitize_prometheus_name("123.test-name@domain").unwrap(),
902 "_123_test_name_domain"
903 );
904
905 // Test that strings with only invalid characters return error
906 assert!(sanitize_prometheus_name("@#$%").is_err());
907 assert!(sanitize_prometheus_name("!!!!").is_err());
908 }
909
910 #[test]
911 fn test_sanitize_prometheus_label() {
912 // Test that valid labels remain unchanged
913 assert_eq!(
914 sanitize_prometheus_label("valid_label").unwrap(),
915 "valid_label"
916 );
917 assert_eq!(sanitize_prometheus_label("test123").unwrap(), "test123");
918 assert_eq!(
919 sanitize_prometheus_label("test_label_123").unwrap(),
920 "test_label_123"
921 );
922
923 // Test that colons are NOT allowed in labels (stricter than names)
924 assert_eq!(
925 sanitize_prometheus_label("test:label").unwrap(),
926 "test_label"
927 );
928
929 // Test that invalid characters are converted to underscores
930 assert_eq!(
931 sanitize_prometheus_label("test label").unwrap(),
932 "test_label"
933 );
934 assert_eq!(
935 sanitize_prometheus_label("test.label").unwrap(),
936 "test_label"
937 );
938 assert_eq!(
939 sanitize_prometheus_label("test@label").unwrap(),
940 "test_label"
941 );
942 assert_eq!(
943 sanitize_prometheus_label("test-label").unwrap(),
944 "test_label"
945 );
946 assert_eq!(
947 sanitize_prometheus_label("test$label#123").unwrap(),
948 "test_label_123"
949 );
950
951 // Test that double underscores are ALLOWED in middle but NOT at start
952 assert_eq!(
953 sanitize_prometheus_label("test__label").unwrap(),
954 "test__label"
955 ); // OK in middle
956 assert_eq!(
957 sanitize_prometheus_label("test___label").unwrap(),
958 "test___label"
959 ); // OK in middle
960 assert_eq!(
961 sanitize_prometheus_label("test____label").unwrap(),
962 "test____label"
963 ); // OK in middle
964 assert_eq!(sanitize_prometheus_label("__test").unwrap(), "test"); // Leading __ removed
965 assert!(sanitize_prometheus_label("____").is_err()); // All underscores should error
966
967 // Test that invalid first characters are fixed (no colons allowed)
968 assert_eq!(sanitize_prometheus_label("123test").unwrap(), "_123test");
969 assert_eq!(sanitize_prometheus_label("@test").unwrap(), "_test");
970 assert_eq!(sanitize_prometheus_label(":test").unwrap(), "_test"); // colon not allowed
971 assert_eq!(sanitize_prometheus_label("-test").unwrap(), "_test");
972
973 // Test empty string returns error
974 assert!(sanitize_prometheus_label("").is_err());
975
976 // Test complex cases
977 assert_eq!(
978 sanitize_prometheus_label("123:test-label@domain").unwrap(),
979 "_123_test_label_domain"
980 );
981
982 // Test that strings with only invalid characters return error
983 assert!(sanitize_prometheus_label("@#$%").is_err()); // @#$% -> ____ -> ___ -> all underscores error
984 assert!(sanitize_prometheus_label("!!!!").is_err()); // !!!! -> ____ -> ___ -> all underscores error
985 }
986
987 #[test]
988 fn test_build_component_metric_name() {
989 // Test that valid names work correctly
990 assert_eq!(
991 build_component_metric_name("test_metric"),
992 "dynamo_component_test_metric"
993 );
994 assert_eq!(
995 build_component_metric_name("requests_total"),
996 "dynamo_component_requests_total"
997 );
998
999 // Test that invalid characters are sanitized
1000 assert_eq!(
1001 build_component_metric_name("test metric"),
1002 "dynamo_component_test_metric"
1003 );
1004 assert_eq!(
1005 build_component_metric_name("test.metric"),
1006 "dynamo_component_test_metric"
1007 );
1008 assert_eq!(
1009 build_component_metric_name("test@metric"),
1010 "dynamo_component_test_metric"
1011 );
1012
1013 // Test that invalid first characters are fixed
1014 assert_eq!(
1015 build_component_metric_name("123metric"),
1016 "dynamo_component__123metric"
1017 );
1018 }
1019
1020 #[test]
1021 #[should_panic(expected = "metric name should be valid or sanitizable")]
1022 fn test_build_component_metric_name_panics_on_invalid_input() {
1023 // Test that completely invalid input panics with clear message
1024 build_component_metric_name("@#$%");
1025 }
1026
1027 #[test]
1028 #[should_panic(expected = "metric name should be valid or sanitizable")]
1029 fn test_build_component_metric_name_panics_on_empty_input() {
1030 // Test that empty input panics with clear message
1031 build_component_metric_name("");
1032 }
1033
1034 #[test]
1035 fn test_clamp_u64_to_i64() {
1036 // Test normal values within i64 range
1037 assert_eq!(clamp_u64_to_i64(0), 0);
1038 assert_eq!(clamp_u64_to_i64(100), 100);
1039 assert_eq!(clamp_u64_to_i64(1000000), 1000000);
1040
1041 // Test maximum i64 value
1042 assert_eq!(clamp_u64_to_i64(i64::MAX as u64), i64::MAX);
1043
1044 // Test values that exceed i64::MAX
1045 assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
1046 assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1), i64::MAX);
1047 assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1000), i64::MAX);
1048 }
1049}