dynamo_runtime/metrics/
prometheus_names.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Prometheus metric name constants and sanitization utilities
5//!
6//! This module provides centralized Prometheus metric name constants and sanitization functions
7//! for various components to ensure consistency and avoid duplication across the codebase.
8//!
9//! ⚠️  **CRITICAL: REGENERATE PYTHON FILE AFTER CHANGES** ⚠️
10//! When modifying constants in this file, regenerate the Python module:
11//!     cargo run -p dynamo-codegen --bin gen-python-prometheus-names
12//!
13//! This generates `lib/bindings/python/src/dynamo/prometheus_names.py`
14//! with pure Python constants (no Rust bindings needed).
15//!
16//! ## Naming Conventions
17//!
18//! All metric names should follow: `{prefix}_{name}_{suffix}`
19//!
20//! **Prefix**: Component identifier (`dynamo_component_`, `dynamo_frontend_`, etc.)
21//! **Name**: Descriptive snake_case name indicating what is measured
22//! **Suffix**:
23//!   - Units: `_seconds`, `_bytes`, `_ms`, `_percent`, `_messages`, `_connections`
24//!   - Counters: `_total` (not `total_` prefix) - for cumulative metrics that only increase
25//!   - Gauges: No `_total` suffix - for current state metrics that can go up and down
26//!   - Note: Do not use `_counter`, `_gauge`, `_time`, or `_size` in Prometheus names (too vague)
27//!
28//! **Common Transformations**:
29//! - ❌ `_counter` → ✅ `_total`
30//! - ❌ `_sum` → ✅ `_total`
31//! - ❌ `_gauge` → ✅ (no suffix needed for current values)
32//! - ❌ `_time` → ✅ `_seconds`, `_ms`, `_hours`, `_duration_seconds`
33//! - ❌ `_time_total` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
34//! - ❌ `_total_time` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
35//! - ❌ `_total_time_seconds` → ✅ `_seconds_total`
36//! - ❌ `_average_time` → ✅ `_seconds_avg`, `_ms_avg`
37//! - ❌ `_size` → ✅ `_bytes`, `_total`, `_length`
38//! - ❌ `_some_request_size` → ✅ `_some_request_bytes_avg`
39//! - ❌ `_rate` → ✅ `_per_second`, `_per_minute`
40//! - ❌ `disconnected_clients_total` → ✅ `disconnected_clients` (gauge, not counter)
41//! - ❌ `inflight_requests_total` → ✅ `inflight_requests` (gauge, not counter)
42//! - ❌ `connections_total` → ✅ `current_connections` (gauge, not counter)
43//!
44//! **Examples**:
45//! - ✅ `dynamo_frontend_requests_total` - Total request counter (not `incoming_requests`)
46//! - ✅ `dynamo_frontend_request_duration_seconds` - Request duration histogram (not `response_time`)
47//! - ✅ `dynamo_component_errors_total` - Total error counter (not `total_errors`)
48//! - ✅ `dynamo_component_memory_usage_bytes` - Memory usage gauge
49//! - ✅ `dynamo_frontend_inflight_requests` - Current inflight requests gauge
50//! - ✅ `dynamo_component_cpu_usage_percent` - CPU usage percentage
51//! - ✅ `dynamo_frontend_tokens_per_second` - Token generation rate
52//! - ✅ `dynamo_messaging_client_connection_duration_ms` - Connection time in milliseconds
53//! - ✅ `dynamo_messaging_client_current_connections` - Current active connections gauge
54//! - ✅ `dynamo_messaging_client_in_messages_total` - Total messages received counter
55//!
56//! ## Key Differences: Prometheus Metric Names vs Prometheus Label Names
57//!
58//! **Metric names**: Allow colons and `__` anywhere. **Label names**: No colons, no `__` prefix.
59//! Label names starting with `__` are reserved for Prometheus internal use.
60
61use once_cell::sync::Lazy;
62use regex::Regex;
63
64/// Metric name prefixes used across the metrics system.
65pub mod name_prefix {
66    /// Prefix for component-scoped metrics, auto-labeled with namespace/endpoint.
67    pub const COMPONENT: &str = "dynamo_component";
68
69    /// Prefix for frontend HTTP service metrics (requests, TTFT, ITL, disconnects).
70    pub const FRONTEND: &str = "dynamo_frontend";
71
72    /// Prefix for KV router instance metrics (carries `router_id` label).
73    pub const ROUTER: &str = "dynamo_router";
74
75    // Note: REQUEST_PLANE vs TRANSPORT: REQUEST_PLANE measures *what requests do* (latency,
76    // concurrency) and is transport-agnostic. TRANSPORT measures *how the wire behaves*
77    // (bytes transferred, protocol errors) and is protocol-specific (TCP/NATS).
78
79    /// Prefix for standalone KV indexer metrics
80    pub const KVINDEXER: &str = "dynamo_kvindexer";
81
82    /// Prefix for request-plane metrics at AddressedPushRouter.
83    /// Transport-agnostic: measures request lifecycle latency and concurrency
84    /// (queue → send → roundtrip TTFT, inflight gauge).
85    pub const REQUEST_PLANE: &str = "dynamo_request_plane";
86
87    /// Prefix for transport-layer metrics (TCP / NATS).
88    /// Protocol-specific: measures wire-level health (bytes sent/received, error counts).
89    pub const TRANSPORT: &str = "dynamo_transport";
90
91    /// Prefix for work-handler transport breakdown metrics (backend side)
92    pub const WORK_HANDLER: &str = "dynamo_work_handler";
93
94    /// Prefix for tokio runtime metrics (poll times, queue depths, stalls).
95    pub const TOKIO: &str = "dynamo_tokio";
96
97    /// Prefix for per-phase routing overhead latency (hashing, scheduling).
98    /// Raw Prometheus, not component-scoped.
99    pub const ROUTING_OVERHEAD: &str = "dynamo_routing_overhead";
100}
101
102/// Automatically inserted Prometheus label names used across the metrics system
103///
104/// These labels are auto-injected into metrics by the hierarchy system:
105/// - Rust: lib/runtime/src/metrics.rs create_metric() function
106/// - Python: components/src/dynamo/common/utils/prometheus.py register_engine_metrics_callback()
107///
108/// Python codegen: These constants are exported to lib/bindings/python/src/dynamo/prometheus_names.py
109pub mod labels {
110    /// Label for component identification
111    pub const COMPONENT: &str = "dynamo_component";
112
113    /// Label for namespace identification
114    pub const NAMESPACE: &str = "dynamo_namespace";
115
116    /// Label for endpoint identification
117    pub const ENDPOINT: &str = "dynamo_endpoint";
118
119    /// Label for worker data-parallel rank.
120    ///
121    /// Note: this is not an auto-inserted label like `dynamo_namespace`/`dynamo_component`.
122    /// It is used by worker/load-style metrics that need to disambiguate per-worker series.
123    pub const DP_RANK: &str = "dp_rank";
124
125    /// Label for worker instance ID (etcd lease ID).
126    pub const WORKER_ID: &str = "worker_id";
127
128    /// Label for model name/path (OpenAI API standard, injected by Dynamo)
129    /// This is the standard label name injected by all backends in metrics_labels=[("model", ...)].
130    /// Ensures compatibility with OpenAI-compatible tooling.
131    pub const MODEL: &str = "model";
132
133    /// Label for model name/path (alternative/native engine label, injected by Dynamo)
134    /// Some engines natively use model_name, so we inject both model and model_name
135    /// to ensure maximum compatibility with both OpenAI standard and engine-native tooling.
136    /// When a metric already has a label, injection does not overwrite it (original is preserved).
137    pub const MODEL_NAME: &str = "model_name";
138
139    /// Label for worker type (e.g., "aggregated", "prefill", "decode", "encoder", etc.)
140    pub const WORKER_TYPE: &str = "worker_type";
141
142    /// Label for router instance (discovery.instance_id() of the frontend)
143    pub const ROUTER_ID: &str = "router_id";
144}
145
146/// Well-known component names used as values for the `dynamo_component` label.
147///
148/// These are the canonical names passed to `namespace.component(name)` to create
149/// `Component` instances whose metrics carry `dynamo_component=<name>`.
150///
151/// Python codegen: These constants are exported to lib/bindings/python/src/dynamo/prometheus_names.py
152pub mod component_names {
153    /// Component name for the KV router (frontend-side request routing).
154    pub const ROUTER: &str = "router";
155
156    // TODO: add PREFILL = "prefill" and DECODE = "decode" component names
157    // and migrate backend worker component creation to use these constants.
158}
159
160/// Frontend service metrics (LLM HTTP service)
161///
162/// ⚠️  Python codegen: Run gen-python-prometheus-names after changes
163pub mod frontend_service {
164    // TODO: Remove DYN_METRICS_PREFIX — the custom prefix override was added for NIM
165    // compatibility (PR #2432) but is no longer needed. All frontend metrics should
166    // use the fixed `dynamo_frontend_` prefix from `name_prefix::FRONTEND`.
167    /// Environment variable that overrides the default metric prefix
168    pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX";
169
170    /// Total number of LLM requests processed
171    pub const REQUESTS_TOTAL: &str = "requests_total";
172
173    /// Number of requests waiting in HTTP queue before receiving the first response (gauge)
174    pub const QUEUED_REQUESTS: &str = "queued_requests";
175
176    /// Number of inflight/concurrent requests going to the engine (vLLM, SGLang, ...)
177    /// Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
178    pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
179
180    /// Number of disconnected clients (gauge that can go up and down)
181    pub const DISCONNECTED_CLIENTS: &str = "disconnected_clients";
182
183    /// Duration of LLM requests
184    pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
185
186    /// Input sequence length in tokens
187    pub const INPUT_SEQUENCE_TOKENS: &str = "input_sequence_tokens";
188
189    /// Output sequence length in tokens
190    pub const OUTPUT_SEQUENCE_TOKENS: &str = "output_sequence_tokens";
191
192    /// Predicted KV cache hit rate at routing time (0.0-1.0)
193    pub const KV_HIT_RATE: &str = "kv_hit_rate";
194
195    /// Upper-bound estimation of KV cache transfer latency in disaggregated serving (seconds)
196    pub const KV_TRANSFER_ESTIMATED_LATENCY_SECONDS: &str = "kv_transfer_estimated_latency_seconds";
197
198    /// Number of cached tokens (prefix cache hits) per request
199    pub const CACHED_TOKENS: &str = "cached_tokens";
200
201    /// Tokenizer latency in milliseconds
202    pub const TOKENIZER_LATENCY_MS: &str = "tokenizer_latency_ms";
203
204    /// Total number of output tokens generated (counter that updates in real-time)
205    pub const OUTPUT_TOKENS_TOTAL: &str = "output_tokens_total";
206
207    /// Time to first token in seconds
208    pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "time_to_first_token_seconds";
209
210    /// Inter-token latency in seconds
211    pub const INTER_TOKEN_LATENCY_SECONDS: &str = "inter_token_latency_seconds";
212
213    /// Model configuration metrics
214    ///
215    /// Runtime config metrics (from ModelRuntimeConfig):
216    /// Total KV blocks available for a worker serving the model
217    pub const MODEL_TOTAL_KV_BLOCKS: &str = "model_total_kv_blocks";
218
219    /// Maximum number of sequences for a worker serving the model (runtime config)
220    pub const MODEL_MAX_NUM_SEQS: &str = "model_max_num_seqs";
221
222    /// Maximum number of batched tokens for a worker serving the model (runtime config)
223    pub const MODEL_MAX_NUM_BATCHED_TOKENS: &str = "model_max_num_batched_tokens";
224
225    /// MDC metrics (from ModelDeploymentCard):
226    /// Maximum context length for a worker serving the model (MDC)
227    pub const MODEL_CONTEXT_LENGTH: &str = "model_context_length";
228
229    /// KV cache block size for a worker serving the model (MDC)
230    pub const MODEL_KV_CACHE_BLOCK_SIZE: &str = "model_kv_cache_block_size";
231
232    /// Request migration limit for a worker serving the model (MDC)
233    pub const MODEL_MIGRATION_LIMIT: &str = "model_migration_limit";
234
235    /// Total number of request migrations due to worker unavailability
236    pub const MODEL_MIGRATION_TOTAL: &str = "model_migration_total";
237
238    /// Total number of times migration was disabled because the sequence length
239    /// exceeded the configured max_seq_len limit
240    pub const MODEL_MIGRATION_MAX_SEQ_LEN_EXCEEDED_TOTAL: &str =
241        "model_migration_max_seq_len_exceeded_total";
242
243    /// Total number of request cancellations
244    pub const MODEL_CANCELLATION_TOTAL: &str = "model_cancellation_total";
245
246    /// Total number of requests rejected due to resource exhaustion
247    pub const MODEL_REJECTION_TOTAL: &str = "model_rejection_total";
248
249    /// Active decode blocks (KV cache blocks) per worker
250    /// Gauge metric tracking current KV cache block utilization for each worker
251    pub const WORKER_ACTIVE_DECODE_BLOCKS: &str = "worker_active_decode_blocks";
252
253    /// Active prefill tokens per worker
254    /// Gauge metric tracking current queued prefill tokens for each worker
255    pub const WORKER_ACTIVE_PREFILL_TOKENS: &str = "worker_active_prefill_tokens";
256
257    /// Last observed time to first token per worker (in seconds)
258    /// Gauge metric tracking the most recent TTFT for each worker
259    pub const WORKER_LAST_TIME_TO_FIRST_TOKEN_SECONDS: &str =
260        "worker_last_time_to_first_token_seconds";
261
262    /// Last observed input sequence tokens per worker
263    /// Gauge metric tracking the input token count from the same request as WORKER_LAST_TIME_TO_FIRST_TOKEN_SECONDS
264    /// Updated atomically with TTFT to correlate latency with input size
265    pub const WORKER_LAST_INPUT_SEQUENCE_TOKENS: &str = "worker_last_input_sequence_tokens";
266
267    /// Last observed inter-token latency per worker (in seconds)
268    /// Gauge metric tracking the most recent ITL for each worker
269    pub const WORKER_LAST_INTER_TOKEN_LATENCY_SECONDS: &str =
270        "worker_last_inter_token_latency_seconds";
271
272    /// Number of requests pending in the router's scheduler queue (gauge per worker_type)
273    pub const ROUTER_QUEUE_PENDING_REQUESTS: &str = "router_queue_pending_requests";
274
275    /// Label name for the type of migration
276    pub const MIGRATION_TYPE_LABEL: &str = "migration_type";
277
278    /// Label name for tokenizer operation
279    pub const OPERATION_LABEL: &str = "operation";
280
281    /// Operation label values for tokenizer latency metric
282    pub mod operation {
283        /// Tokenization operation
284        pub const TOKENIZE: &str = "tokenize";
285
286        /// Detokenization operation
287        pub const DETOKENIZE: &str = "detokenize";
288    }
289
290    /// Migration type label values
291    pub mod migration_type {
292        /// Migration during initial stream creation (NoResponders error)
293        pub const NEW_REQUEST: &str = "new_request";
294
295        /// Migration during ongoing request (stream disconnected)
296        pub const ONGOING_REQUEST: &str = "ongoing_request";
297    }
298
299    /// Status label values
300    pub mod status {
301        /// Value for successful requests
302        pub const SUCCESS: &str = "success";
303
304        /// Value for failed requests
305        pub const ERROR: &str = "error";
306    }
307
308    /// Request type label values
309    pub mod request_type {
310        /// Value for streaming requests
311        pub const STREAM: &str = "stream";
312
313        /// Value for unary requests
314        pub const UNARY: &str = "unary";
315    }
316
317    /// Error type label values for fine-grained error classification
318    pub mod error_type {
319        /// No error (used for successful requests)
320        pub const NONE: &str = "";
321
322        /// Client validation error (4xx with "Validation:" prefix)
323        pub const VALIDATION: &str = "validation";
324
325        /// Model or resource not found (404)
326        pub const NOT_FOUND: &str = "not_found";
327
328        /// Service overloaded, too many requests (503)
329        pub const OVERLOAD: &str = "overload";
330
331        /// Request cancelled by client or timeout
332        pub const CANCELLED: &str = "cancelled";
333
334        /// Backend accepted the request but stopped responding (response inactivity timeout)
335        pub const RESPONSE_TIMEOUT: &str = "response_timeout";
336
337        /// Internal server error (500 and other unexpected errors)
338        pub const INTERNAL: &str = "internal";
339
340        /// Feature not implemented (501)
341        pub const NOT_IMPLEMENTED: &str = "not_implemented";
342    }
343}
344
345/// Work handler Prometheus metric names
346pub mod work_handler {
347    /// Total number of requests processed by work handler
348    pub const REQUESTS_TOTAL: &str = "requests_total";
349
350    /// Total number of bytes received in requests by work handler
351    pub const REQUEST_BYTES_TOTAL: &str = "request_bytes_total";
352
353    /// Total number of bytes sent in responses by work handler
354    pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";
355
356    /// Number of requests currently being processed by work handler
357    /// Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
358    pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
359
360    /// Time spent processing requests by work handler (histogram)
361    pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
362
363    /// Total number of errors in work handler processing
364    pub const ERRORS_TOTAL: &str = "errors_total";
365
366    /// Total number of requests cancelled by work handler (client stop/kill or disconnect)
367    pub const CANCELLATION_TOTAL: &str = "cancellation_total";
368
369    /// Network transit: frontend send to backend receive (wall-clock, cross-process)
370    pub const NETWORK_TRANSIT_SECONDS: &str = "network_transit_seconds";
371
372    /// Backend processing: handle_payload entry to first response sent
373    pub const TIME_TO_FIRST_RESPONSE_SECONDS: &str = "time_to_first_response_seconds";
374
375    /// Label name for error type classification
376    pub const ERROR_TYPE_LABEL: &str = "error_type";
377
378    /// Error type values for work handler metrics
379    pub mod error_types {
380        /// Deserialization error
381        pub const DESERIALIZATION: &str = "deserialization";
382
383        /// Invalid message format error
384        pub const INVALID_MESSAGE: &str = "invalid_message";
385
386        /// Response stream creation error
387        pub const RESPONSE_STREAM: &str = "response_stream";
388
389        /// Generation error
390        pub const GENERATE: &str = "generate";
391
392        /// Response publishing error
393        pub const PUBLISH_RESPONSE: &str = "publish_response";
394
395        /// Final message publishing error
396        pub const PUBLISH_FINAL: &str = "publish_final";
397    }
398}
399
400/// Task tracker Prometheus metric name suffixes
401pub mod task_tracker {
402    /// Total number of tasks issued/submitted
403    pub const TASKS_ISSUED_TOTAL: &str = "tasks_issued_total";
404
405    /// Total number of tasks started
406    pub const TASKS_STARTED_TOTAL: &str = "tasks_started_total";
407
408    /// Total number of successfully completed tasks
409    pub const TASKS_SUCCESS_TOTAL: &str = "tasks_success_total";
410
411    /// Total number of cancelled tasks
412    pub const TASKS_CANCELLED_TOTAL: &str = "tasks_cancelled_total";
413
414    /// Total number of failed tasks
415    pub const TASKS_FAILED_TOTAL: &str = "tasks_failed_total";
416
417    /// Total number of rejected tasks
418    pub const TASKS_REJECTED_TOTAL: &str = "tasks_rejected_total";
419}
420
421/// DistributedRuntime core metrics
422pub mod distributed_runtime {
423    /// Total uptime of the DistributedRuntime in seconds
424    pub const UPTIME_SECONDS: &str = "uptime_seconds";
425}
426
427/// KVBM
428pub mod kvbm {
429    /// The number of offload blocks from device to host
430    pub const OFFLOAD_BLOCKS_D2H: &str = "offload_blocks_d2h";
431
432    /// The number of offload blocks from host to disk
433    pub const OFFLOAD_BLOCKS_H2D: &str = "offload_blocks_h2d";
434
435    /// The number of offload blocks from device to disk (bypassing host memory)
436    pub const OFFLOAD_BLOCKS_D2D: &str = "offload_blocks_d2d";
437
438    /// The number of onboard blocks from host to device
439    pub const ONBOARD_BLOCKS_H2D: &str = "onboard_blocks_h2d";
440
441    /// The number of onboard blocks from disk to device
442    pub const ONBOARD_BLOCKS_D2D: &str = "onboard_blocks_d2d";
443
444    /// The number of matched tokens
445    pub const MATCHED_TOKENS: &str = "matched_tokens";
446
447    /// Host cache hit rate (0.0-1.0) from the sliding window
448    pub const HOST_CACHE_HIT_RATE: &str = "host_cache_hit_rate";
449
450    /// Disk cache hit rate (0.0-1.0) from the sliding window
451    pub const DISK_CACHE_HIT_RATE: &str = "disk_cache_hit_rate";
452
453    /// Object storage cache hit rate (0.0-1.0) from the sliding window
454    pub const OBJECT_CACHE_HIT_RATE: &str = "object_cache_hit_rate";
455
456    /// Number of blocks offloaded from device to object storage
457    pub const OFFLOAD_BLOCKS_D2O: &str = "offload_blocks_d2o";
458
459    /// Number of blocks onboarded from object storage to device
460    pub const ONBOARD_BLOCKS_O2D: &str = "onboard_blocks_o2d";
461
462    /// Bytes transferred to object storage (offload)
463    pub const OFFLOAD_BYTES_OBJECT: &str = "offload_bytes_object";
464
465    /// Bytes transferred from object storage (onboard)
466    pub const ONBOARD_BYTES_OBJECT: &str = "onboard_bytes_object";
467
468    /// Number of failed object storage read operations (blocks)
469    pub const OBJECT_READ_FAILURES: &str = "object_read_failures";
470
471    /// Number of failed object storage write operations (blocks)
472    pub const OBJECT_WRITE_FAILURES: &str = "object_write_failures";
473}
474
475/// Router per-request metrics (component-scoped via `MetricsHierarchy`).
476///
477/// Metric names are composed as `"{METRIC_PREFIX}{frontend_service::*}"` at init time,
478/// then passed to `component.metrics().create_*()` which auto-prepends `dynamo_component_`,
479/// yielding e.g. `dynamo_component_router_requests_total`.
480/// See `lib/llm/src/kv_router/metrics.rs` `RouterRequestMetrics::from_component()`.
481pub mod router_request {
482    /// Prefix prepended to `frontend_service::*` names to form router metric names.
483    /// e.g. `"router_"` + `frontend_service::REQUESTS_TOTAL` → `"router_requests_total"`.
484    pub const METRIC_PREFIX: &str = "router_";
485}
486
487/// Routing overhead phase latency histogram suffixes.
488///
489/// Combined with `name_prefix::ROUTER` ("dynamo_router") in `RoutingOverheadMetrics::register()`,
490/// yielding e.g. `dynamo_router_overhead_block_hashing_ms{router_id="..."}`.
491/// See `lib/llm/src/kv_router/metrics.rs`.
492pub mod routing_overhead {
493    /// Time spent computing block hashes
494    pub const BLOCK_HASHING_MS: &str = "overhead_block_hashing_ms";
495
496    /// Time spent in indexer find_matches
497    pub const INDEXER_FIND_MATCHES_MS: &str = "overhead_indexer_find_matches_ms";
498
499    /// Time spent computing sequence hashes
500    pub const SEQ_HASHING_MS: &str = "overhead_seq_hashing_ms";
501
502    /// Time spent in scheduler worker selection
503    pub const SCHEDULING_MS: &str = "overhead_scheduling_ms";
504
505    /// Total routing overhead per request
506    pub const TOTAL_MS: &str = "overhead_total_ms";
507}
508
509/// Router request metrics (component-scoped aggregate histograms + counter)
510///
511/// These constants are the suffix portions of full metric names, combined with
512/// [`name_prefix::COMPONENT`] to form the complete name, e.g.
513/// `dynamo_component_router_requests_total`.
514///
515/// ⚠️  Python codegen: Run gen-python-prometheus-names after changes
516pub mod router {
517    /// Total number of requests processed by the router
518    pub const REQUESTS_TOTAL: &str = "router_requests_total";
519
520    /// Total number of remote indexer overlap queries that failed
521    pub const REMOTE_INDEXER_QUERY_FAILURES_TOTAL: &str =
522        "router_remote_indexer_query_failures_total";
523
524    /// Total number of remote indexer routing-decision writes that failed
525    pub const REMOTE_INDEXER_WRITE_FAILURES_TOTAL: &str =
526        "router_remote_indexer_write_failures_total";
527
528    /// Time to first token observed at the router (seconds)
529    pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "router_time_to_first_token_seconds";
530
531    /// Average inter-token latency observed at the router (seconds)
532    pub const INTER_TOKEN_LATENCY_SECONDS: &str = "router_inter_token_latency_seconds";
533
534    /// Input sequence length in tokens observed at the router
535    pub const INPUT_SEQUENCE_TOKENS: &str = "router_input_sequence_tokens";
536
537    /// Output sequence length in tokens observed at the router
538    pub const OUTPUT_SEQUENCE_TOKENS: &str = "router_output_sequence_tokens";
539}
540
541/// Frontend pipeline stage and event-loop metrics
542pub mod frontend_perf {
543    /// Per-stage latency histogram (label: stage = preprocess|route|transport_roundtrip|postprocess)
544    pub const STAGE_DURATION_SECONDS: &str = "stage_duration_seconds";
545    /// Tokenization time in preprocessor
546    pub const TOKENIZE_SECONDS: &str = "tokenize_seconds";
547    /// Template application time in preprocessor
548    pub const TEMPLATE_SECONDS: &str = "template_seconds";
549    /// Cumulative detokenization time (microseconds); pair with DETOKENIZE_TOKEN_COUNT
550    pub const DETOKENIZE_TOTAL_US: &str = "detokenize_total_us";
551    /// Total tokens detokenized; use rate(total_us)/rate(count) for per-token average
552    pub const DETOKENIZE_TOKEN_COUNT: &str = "detokenize_token_count";
553    /// Event loop delay canary (sleep 10ms, measure drift)
554    pub const EVENT_LOOP_DELAY_SECONDS: &str = "event_loop_delay_seconds";
555    /// Count of event loop stalls (delay > 5ms)
556    pub const EVENT_LOOP_STALL_TOTAL: &str = "event_loop_stall_total";
557}
558
559/// Tokio runtime metrics
560pub mod tokio_perf {
561    pub const WORKER_MEAN_POLL_TIME_NS: &str = "worker_mean_poll_time_ns";
562    pub const GLOBAL_QUEUE_DEPTH: &str = "global_queue_depth";
563    pub const BUDGET_FORCED_YIELD_TOTAL: &str = "budget_forced_yield_total";
564    pub const WORKER_BUSY_RATIO: &str = "worker_busy_ratio";
565    pub const WORKER_PARK_COUNT_TOTAL: &str = "worker_park_count_total";
566    pub const WORKER_LOCAL_QUEUE_DEPTH: &str = "worker_local_queue_depth";
567    pub const WORKER_STEAL_COUNT_TOTAL: &str = "worker_steal_count_total";
568    pub const WORKER_OVERFLOW_COUNT_TOTAL: &str = "worker_overflow_count_total";
569    pub const BLOCKING_THREADS: &str = "blocking_threads";
570    pub const BLOCKING_IDLE_THREADS: &str = "blocking_idle_threads";
571    pub const BLOCKING_QUEUE_DEPTH: &str = "blocking_queue_depth";
572    pub const ALIVE_TASKS: &str = "alive_tasks";
573}
574
575/// Standalone KV indexer HTTP service metrics
576pub mod kvindexer {
577    /// HTTP request latency
578    pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
579
580    /// Total HTTP requests
581    pub const REQUESTS_TOTAL: &str = "requests_total";
582
583    /// HTTP error responses (4xx/5xx)
584    pub const ERRORS_TOTAL: &str = "errors_total";
585
586    /// Number of active model+tenant indexers
587    pub const MODELS: &str = "models";
588
589    /// Number of registered worker instances
590    pub const WORKERS: &str = "workers";
591}
592
593/// Request plane metrics at AddressedPushRouter
594pub mod request_plane {
595    /// Time from generate() entry to send_request() (serialization + encoding)
596    pub const QUEUE_SECONDS: &str = "queue_seconds";
597    /// Time for send_request() to complete (frontend view: network + queue + ack)
598    pub const SEND_SECONDS: &str = "send_seconds";
599    /// Time from send_request() to first response item (transport roundtrip TTFT)
600    pub const ROUNDTRIP_TTFT_SECONDS: &str = "roundtrip_ttft_seconds";
601    /// Currently in-flight requests (gauge)
602    pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
603}
604
605/// Transport-specific metrics (TCP / NATS)
606pub mod transport {
607    pub mod tcp {
608        pub const POOL_ACTIVE: &str = "tcp_pool_active";
609        pub const POOL_IDLE: &str = "tcp_pool_idle";
610        pub const BYTES_SENT_TOTAL: &str = "tcp_bytes_sent_total";
611        pub const BYTES_RECEIVED_TOTAL: &str = "tcp_bytes_received_total";
612        pub const ERRORS_TOTAL: &str = "tcp_errors_total";
613        pub const SERVER_QUEUE_DEPTH: &str = "tcp_server_queue_depth";
614    }
615    pub mod nats {
616        pub const ERRORS_TOTAL: &str = "nats_errors_total";
617    }
618}
619
620// KvRouter (including KvIndexer) Prometheus metric names
621pub mod kvrouter {
622    /// Number of KV cache events applied to the index (including status)
623    pub const KV_CACHE_EVENTS_APPLIED: &str = "kv_cache_events_applied";
624}
625
626/// KV Publisher metrics
627pub mod kv_publisher {
628    /// Total number of raw events dropped by engines before reaching publisher (detected via event_id gaps)
629    pub const ENGINES_DROPPED_EVENTS_TOTAL: &str = "kv_publisher_engines_dropped_events_total";
630}
631
632/// Additional TRT-LLM worker metrics beyond what the engine natively provides.
633///
634/// These metrics are Python-only (registered via `prometheus_client`) and share the
635/// `trtllm_` prefix so they are captured by the same prefix filter as engine metrics.
636///
637/// ⚠️  Python codegen: Run gen-python-prometheus-names after changes
638pub mod trtllm_additional {
639    /// Total number of aborted/cancelled requests
640    pub const NUM_ABORTED_REQUESTS_TOTAL: &str = "trtllm_num_aborted_requests_total";
641
642    /// Total number of requests containing image content
643    pub const REQUEST_TYPE_IMAGE_TOTAL: &str = "trtllm_request_type_image_total";
644
645    /// Total number of requests using guided/structured decoding
646    pub const REQUEST_TYPE_STRUCTURED_OUTPUT_TOTAL: &str =
647        "trtllm_request_type_structured_output_total";
648
649    /// Total number of successful KV cache transfers
650    pub const KV_TRANSFER_SUCCESS_TOTAL: &str = "trtllm_kv_transfer_success_total";
651
652    /// KV cache transfer latency per request in seconds
653    pub const KV_TRANSFER_LATENCY_SECONDS: &str = "trtllm_kv_transfer_latency_seconds";
654
655    /// KV cache transfer size per request in bytes
656    pub const KV_TRANSFER_BYTES: &str = "trtllm_kv_transfer_bytes";
657
658    /// KV cache transfer speed per request in GB/s
659    pub const KV_TRANSFER_SPEED_GB_S: &str = "trtllm_kv_transfer_speed_gb_s";
660}
661
662// KV cache statistics metrics
663pub mod kvstats {
664    /// Total number of KV cache blocks available on the worker
665    pub const TOTAL_BLOCKS: &str = "total_blocks";
666
667    /// GPU cache usage as a percentage (0.0-1.0)
668    pub const GPU_CACHE_USAGE_PERCENT: &str = "gpu_cache_usage_percent";
669}
670
671// Model information metrics
672pub mod model_info {
673    /// Model load time in seconds
674    pub const LOAD_TIME_SECONDS: &str = "model_load_time_seconds";
675}
676
677// Shared regex patterns for Prometheus sanitization
678static METRIC_INVALID_CHARS_PATTERN: Lazy<Regex> =
679    Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_:]").unwrap());
680static LABEL_INVALID_CHARS_PATTERN: Lazy<Regex> =
681    Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
682static INVALID_FIRST_CHAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[^a-zA-Z_]").unwrap());
683
684/// Sanitizes a Prometheus metric name by converting invalid characters to underscores
685/// and ensuring the first character is valid. Uses regex for clear validation.
686/// Returns an error if the input cannot be sanitized into a valid name.
687///
688/// **Rules**: Pattern `[a-zA-Z_:][a-zA-Z0-9_:]*`. Allows colons and `__` anywhere.
689pub fn sanitize_prometheus_name(raw: &str) -> anyhow::Result<String> {
690    if raw.is_empty() {
691        return Err(anyhow::anyhow!(
692            "Cannot sanitize empty string into valid Prometheus name"
693        ));
694    }
695
696    // Replace all invalid characters with underscores
697    let mut sanitized = METRIC_INVALID_CHARS_PATTERN
698        .replace_all(raw, "_")
699        .to_string();
700
701    // Ensure first character is valid (letter, underscore, or colon)
702    if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
703        sanitized = format!("_{}", sanitized);
704    }
705
706    // Check if the result is all underscores (invalid input)
707    if sanitized.chars().all(|c| c == '_') {
708        return Err(anyhow::anyhow!(
709            "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus name",
710            raw
711        ));
712    }
713
714    Ok(sanitized)
715}
716
717/// Sanitizes a Prometheus label name by converting invalid characters to underscores
718/// and ensuring the first character is valid. Uses regex for clear validation.
719/// Label names have stricter rules than metric names (no colons allowed).
720/// Returns an error if the input cannot be sanitized into a valid label name.
721///
722/// **Rules**: Pattern `[a-zA-Z_][a-zA-Z0-9_]*`. No colons, no `__` prefix (reserved).
723pub fn sanitize_prometheus_label(raw: &str) -> anyhow::Result<String> {
724    if raw.is_empty() {
725        return Err(anyhow::anyhow!(
726            "Cannot sanitize empty string into valid Prometheus label"
727        ));
728    }
729
730    // Replace all invalid characters with underscores (no colons allowed in labels)
731    let mut sanitized = LABEL_INVALID_CHARS_PATTERN
732        .replace_all(raw, "_")
733        .to_string();
734
735    // Ensure first character is valid (letter or underscore only)
736    if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
737        sanitized = format!("_{}", sanitized);
738    }
739
740    // Prevent __ prefix (reserved for Prometheus internal use) but allow __ elsewhere
741    if sanitized.starts_with("__") {
742        sanitized = sanitized
743            .strip_prefix("__")
744            .unwrap_or(&sanitized)
745            .to_string();
746        if sanitized.is_empty() || !sanitized.chars().next().unwrap().is_ascii_alphabetic() {
747            sanitized = format!("_{}", sanitized);
748        }
749    }
750
751    // Check if the result is all underscores (invalid input)
752    if sanitized.chars().all(|c| c == '_') {
753        return Err(anyhow::anyhow!(
754            "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus label",
755            raw
756        ));
757    }
758
759    Ok(sanitized)
760}
761
762/// Sanitizes a Prometheus frontend metric prefix by converting invalid characters to underscores
763/// and ensuring the first character is valid. Uses the general prometheus name sanitization
764/// but with frontend-specific fallback behavior.
765pub fn sanitize_frontend_prometheus_prefix(raw: &str) -> String {
766    if raw.is_empty() {
767        return name_prefix::FRONTEND.to_string();
768    }
769
770    // Reuse the general prometheus name sanitization logic, fallback to frontend prefix on error
771    sanitize_prometheus_name(raw).unwrap_or_else(|_| name_prefix::FRONTEND.to_string())
772}
773
774/// Builds a full component metric name by prepending the component prefix
775/// Sanitizes the metric name to ensure it's valid for Prometheus
776pub fn build_component_metric_name(metric_name: &str) -> String {
777    let sanitized_name =
778        sanitize_prometheus_name(metric_name).expect("metric name should be valid or sanitizable");
779    format!("{}_{}", name_prefix::COMPONENT, sanitized_name)
780}
781
782/// Safely converts a u64 value to i64 for Prometheus metrics
783///
784/// Since Prometheus IntGaugeVec uses i64 but our data types use u64,
785/// this function clamps large u64 values to i64::MAX to prevent overflow
786/// and ensure metrics remain positive.
787///
788/// # Arguments
789/// * `value` - The u64 value to convert
790///
791/// # Returns
792/// An i64 value, clamped to i64::MAX if the input exceeds i64::MAX
793///
794/// # Examples
795/// ```
796/// use dynamo_runtime::metrics::prometheus_names::clamp_u64_to_i64;
797///
798/// assert_eq!(clamp_u64_to_i64(100), 100);
799/// assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
800/// ```
801pub fn clamp_u64_to_i64(value: u64) -> i64 {
802    if value > i64::MAX as u64 {
803        i64::MAX
804    } else {
805        value as i64
806    }
807}
808
809#[cfg(test)]
810mod tests {
811    use super::*;
812
813    #[test]
814    fn test_sanitize_frontend_prometheus_prefix() {
815        // Test that valid prefixes remain unchanged
816        assert_eq!(
817            sanitize_frontend_prometheus_prefix("dynamo_frontend"),
818            "dynamo_frontend"
819        );
820        assert_eq!(
821            sanitize_frontend_prometheus_prefix("custom_prefix"),
822            "custom_prefix"
823        );
824        assert_eq!(sanitize_frontend_prometheus_prefix("test123"), "test123");
825
826        // Test that invalid characters are converted to underscores
827        assert_eq!(
828            sanitize_frontend_prometheus_prefix("test prefix"),
829            "test_prefix"
830        );
831        assert_eq!(
832            sanitize_frontend_prometheus_prefix("test.prefix"),
833            "test_prefix"
834        );
835        assert_eq!(
836            sanitize_frontend_prometheus_prefix("test@prefix"),
837            "test_prefix"
838        );
839        assert_eq!(
840            sanitize_frontend_prometheus_prefix("test-prefix"),
841            "test_prefix"
842        );
843
844        // Test that invalid first characters are fixed
845        assert_eq!(sanitize_frontend_prometheus_prefix("123test"), "_123test");
846        assert_eq!(sanitize_frontend_prometheus_prefix("@test"), "_test");
847
848        // Test empty string fallback
849        assert_eq!(
850            sanitize_frontend_prometheus_prefix(""),
851            name_prefix::FRONTEND
852        );
853    }
854
855    #[test]
856    fn test_sanitize_prometheus_name() {
857        // Test that valid names remain unchanged
858        assert_eq!(
859            sanitize_prometheus_name("valid_name").unwrap(),
860            "valid_name"
861        );
862        assert_eq!(sanitize_prometheus_name("test123").unwrap(), "test123");
863        assert_eq!(
864            sanitize_prometheus_name("test_name_123").unwrap(),
865            "test_name_123"
866        );
867        assert_eq!(sanitize_prometheus_name("test:name").unwrap(), "test:name"); // colons allowed
868
869        // Test that invalid characters are converted to underscores
870        assert_eq!(sanitize_prometheus_name("test name").unwrap(), "test_name");
871        assert_eq!(sanitize_prometheus_name("test.name").unwrap(), "test_name");
872        assert_eq!(sanitize_prometheus_name("test@name").unwrap(), "test_name");
873        assert_eq!(sanitize_prometheus_name("test-name").unwrap(), "test_name");
874        assert_eq!(
875            sanitize_prometheus_name("test$name#123").unwrap(),
876            "test_name_123"
877        );
878
879        // Test that double underscores are ALLOWED in metric names (unlike labels)
880        assert_eq!(
881            sanitize_prometheus_name("test__name").unwrap(),
882            "test__name"
883        );
884        assert_eq!(
885            sanitize_prometheus_name("test___name").unwrap(),
886            "test___name"
887        );
888        assert_eq!(sanitize_prometheus_name("__test").unwrap(), "__test"); // Leading double underscore OK
889
890        // Test that invalid first characters are fixed
891        assert_eq!(sanitize_prometheus_name("123test").unwrap(), "_123test");
892        assert_eq!(sanitize_prometheus_name("@test").unwrap(), "_test"); // @ becomes _, no double underscore
893        assert_eq!(sanitize_prometheus_name("-test").unwrap(), "_test"); // - becomes _, no double underscore
894        assert_eq!(sanitize_prometheus_name(".test").unwrap(), "_test"); // . becomes _, no double underscore
895
896        // Test empty string returns error
897        assert!(sanitize_prometheus_name("").is_err());
898
899        // Test complex cases
900        assert_eq!(
901            sanitize_prometheus_name("123.test-name@domain").unwrap(),
902            "_123_test_name_domain"
903        );
904
905        // Test that strings with only invalid characters return error
906        assert!(sanitize_prometheus_name("@#$%").is_err());
907        assert!(sanitize_prometheus_name("!!!!").is_err());
908    }
909
910    #[test]
911    fn test_sanitize_prometheus_label() {
912        // Test that valid labels remain unchanged
913        assert_eq!(
914            sanitize_prometheus_label("valid_label").unwrap(),
915            "valid_label"
916        );
917        assert_eq!(sanitize_prometheus_label("test123").unwrap(), "test123");
918        assert_eq!(
919            sanitize_prometheus_label("test_label_123").unwrap(),
920            "test_label_123"
921        );
922
923        // Test that colons are NOT allowed in labels (stricter than names)
924        assert_eq!(
925            sanitize_prometheus_label("test:label").unwrap(),
926            "test_label"
927        );
928
929        // Test that invalid characters are converted to underscores
930        assert_eq!(
931            sanitize_prometheus_label("test label").unwrap(),
932            "test_label"
933        );
934        assert_eq!(
935            sanitize_prometheus_label("test.label").unwrap(),
936            "test_label"
937        );
938        assert_eq!(
939            sanitize_prometheus_label("test@label").unwrap(),
940            "test_label"
941        );
942        assert_eq!(
943            sanitize_prometheus_label("test-label").unwrap(),
944            "test_label"
945        );
946        assert_eq!(
947            sanitize_prometheus_label("test$label#123").unwrap(),
948            "test_label_123"
949        );
950
951        // Test that double underscores are ALLOWED in middle but NOT at start
952        assert_eq!(
953            sanitize_prometheus_label("test__label").unwrap(),
954            "test__label"
955        ); // OK in middle
956        assert_eq!(
957            sanitize_prometheus_label("test___label").unwrap(),
958            "test___label"
959        ); // OK in middle
960        assert_eq!(
961            sanitize_prometheus_label("test____label").unwrap(),
962            "test____label"
963        ); // OK in middle
964        assert_eq!(sanitize_prometheus_label("__test").unwrap(), "test"); // Leading __ removed
965        assert!(sanitize_prometheus_label("____").is_err()); // All underscores should error
966
967        // Test that invalid first characters are fixed (no colons allowed)
968        assert_eq!(sanitize_prometheus_label("123test").unwrap(), "_123test");
969        assert_eq!(sanitize_prometheus_label("@test").unwrap(), "_test");
970        assert_eq!(sanitize_prometheus_label(":test").unwrap(), "_test"); // colon not allowed
971        assert_eq!(sanitize_prometheus_label("-test").unwrap(), "_test");
972
973        // Test empty string returns error
974        assert!(sanitize_prometheus_label("").is_err());
975
976        // Test complex cases
977        assert_eq!(
978            sanitize_prometheus_label("123:test-label@domain").unwrap(),
979            "_123_test_label_domain"
980        );
981
982        // Test that strings with only invalid characters return error
983        assert!(sanitize_prometheus_label("@#$%").is_err()); // @#$% -> ____ -> ___ -> all underscores error
984        assert!(sanitize_prometheus_label("!!!!").is_err()); // !!!! -> ____ -> ___ -> all underscores error
985    }
986
987    #[test]
988    fn test_build_component_metric_name() {
989        // Test that valid names work correctly
990        assert_eq!(
991            build_component_metric_name("test_metric"),
992            "dynamo_component_test_metric"
993        );
994        assert_eq!(
995            build_component_metric_name("requests_total"),
996            "dynamo_component_requests_total"
997        );
998
999        // Test that invalid characters are sanitized
1000        assert_eq!(
1001            build_component_metric_name("test metric"),
1002            "dynamo_component_test_metric"
1003        );
1004        assert_eq!(
1005            build_component_metric_name("test.metric"),
1006            "dynamo_component_test_metric"
1007        );
1008        assert_eq!(
1009            build_component_metric_name("test@metric"),
1010            "dynamo_component_test_metric"
1011        );
1012
1013        // Test that invalid first characters are fixed
1014        assert_eq!(
1015            build_component_metric_name("123metric"),
1016            "dynamo_component__123metric"
1017        );
1018    }
1019
1020    #[test]
1021    #[should_panic(expected = "metric name should be valid or sanitizable")]
1022    fn test_build_component_metric_name_panics_on_invalid_input() {
1023        // Test that completely invalid input panics with clear message
1024        build_component_metric_name("@#$%");
1025    }
1026
1027    #[test]
1028    #[should_panic(expected = "metric name should be valid or sanitizable")]
1029    fn test_build_component_metric_name_panics_on_empty_input() {
1030        // Test that empty input panics with clear message
1031        build_component_metric_name("");
1032    }
1033
1034    #[test]
1035    fn test_clamp_u64_to_i64() {
1036        // Test normal values within i64 range
1037        assert_eq!(clamp_u64_to_i64(0), 0);
1038        assert_eq!(clamp_u64_to_i64(100), 100);
1039        assert_eq!(clamp_u64_to_i64(1000000), 1000000);
1040
1041        // Test maximum i64 value
1042        assert_eq!(clamp_u64_to_i64(i64::MAX as u64), i64::MAX);
1043
1044        // Test values that exceed i64::MAX
1045        assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
1046        assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1), i64::MAX);
1047        assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1000), i64::MAX);
1048    }
1049}
dynamo_runtime/metrics/prometheus_names.rs

dynamo_runtime/metrics/
prometheus_names.rs