dynamo-runtime 1.0.2

Dynamo Runtime Library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Prometheus metric name constants and sanitization utilities
//!
//! This module provides centralized Prometheus metric name constants and sanitization functions
//! for various components to ensure consistency and avoid duplication across the codebase.
//!
//! ⚠️  **CRITICAL: REGENERATE PYTHON FILE AFTER CHANGES** ⚠️
//! When modifying constants in this file, regenerate the Python module:
//!     cargo run -p dynamo-codegen --bin gen-python-prometheus-names
//!
//! This generates `lib/bindings/python/src/dynamo/prometheus_names.py`
//! with pure Python constants (no Rust bindings needed).
//!
//! ## Naming Conventions
//!
//! All metric names should follow: `{prefix}_{name}_{suffix}`
//!
//! **Prefix**: Component identifier (`dynamo_component_`, `dynamo_frontend_`, etc.)
//! **Name**: Descriptive snake_case name indicating what is measured
//! **Suffix**:
//!   - Units: `_seconds`, `_bytes`, `_ms`, `_percent`, `_messages`, `_connections`
//!   - Counters: `_total` (not `total_` prefix) - for cumulative metrics that only increase
//!   - Gauges: No `_total` suffix - for current state metrics that can go up and down
//!   - Note: Do not use `_counter`, `_gauge`, `_time`, or `_size` in Prometheus names (too vague)
//!
//! **Common Transformations**:
//! - ❌ `_counter` → ✅ `_total`
//! - ❌ `_sum` → ✅ `_total`
//! - ❌ `_gauge` → ✅ (no suffix needed for current values)
//! - ❌ `_time` → ✅ `_seconds`, `_ms`, `_hours`, `_duration_seconds`
//! - ❌ `_time_total` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
//! - ❌ `_total_time` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
//! - ❌ `_total_time_seconds` → ✅ `_seconds_total`
//! - ❌ `_average_time` → ✅ `_seconds_avg`, `_ms_avg`
//! - ❌ `_size` → ✅ `_bytes`, `_total`, `_length`
//! - ❌ `_some_request_size` → ✅ `_some_request_bytes_avg`
//! - ❌ `_rate` → ✅ `_per_second`, `_per_minute`
//! - ❌ `disconnected_clients_total` → ✅ `disconnected_clients` (gauge, not counter)
//! - ❌ `inflight_requests_total` → ✅ `inflight_requests` (gauge, not counter)
//! - ❌ `connections_total` → ✅ `current_connections` (gauge, not counter)
//!
//! **Examples**:
//! - ✅ `dynamo_frontend_requests_total` - Total request counter (not `incoming_requests`)
//! - ✅ `dynamo_frontend_request_duration_seconds` - Request duration histogram (not `response_time`)
//! - ✅ `dynamo_component_errors_total` - Total error counter (not `total_errors`)
//! - ✅ `dynamo_component_memory_usage_bytes` - Memory usage gauge
//! - ✅ `dynamo_frontend_inflight_requests` - Current inflight requests gauge
//! - ✅ `dynamo_component_cpu_usage_percent` - CPU usage percentage
//! - ✅ `dynamo_frontend_tokens_per_second` - Token generation rate
//! - ✅ `dynamo_messaging_client_connection_duration_ms` - Connection time in milliseconds
//! - ✅ `dynamo_messaging_client_current_connections` - Current active connections gauge
//! - ✅ `dynamo_messaging_client_in_messages_total` - Total messages received counter
//!
//! ## Key Differences: Prometheus Metric Names vs Prometheus Label Names
//!
//! **Metric names**: Allow colons and `__` anywhere. **Label names**: No colons, no `__` prefix.
//! Label names starting with `__` are reserved for Prometheus internal use.

use once_cell::sync::Lazy;
use regex::Regex;

/// Metric name prefixes used across the metrics system
pub mod name_prefix {
    /// Prefix for all Prometheus metric names.
    pub const COMPONENT: &str = "dynamo_component";

    /// Prefix for frontend service metrics
    pub const FRONTEND: &str = "dynamo_frontend";

    /// Prefix for KV router metrics (used with router_id label)
    pub const ROUTER: &str = "dynamo_router";
}

/// Automatically inserted Prometheus label names used across the metrics system
///
/// These labels are auto-injected into metrics by the hierarchy system:
/// - Rust: lib/runtime/src/metrics.rs create_metric() function
/// - Python: components/src/dynamo/common/utils/prometheus.py register_engine_metrics_callback()
///
/// Python codegen: These constants are exported to lib/bindings/python/src/dynamo/prometheus_names.py
pub mod labels {
    /// Label for component identification
    pub const COMPONENT: &str = "dynamo_component";

    /// Label for namespace identification
    pub const NAMESPACE: &str = "dynamo_namespace";

    /// Label for endpoint identification
    pub const ENDPOINT: &str = "dynamo_endpoint";

    /// Label for worker data-parallel rank.
    ///
    /// Note: this is not an auto-inserted label like `dynamo_namespace`/`dynamo_component`.
    /// It is used by worker/load-style metrics that need to disambiguate per-worker series.
    pub const DP_RANK: &str = "dp_rank";

    /// Label for worker instance ID (etcd lease ID).
    pub const WORKER_ID: &str = "worker_id";

    /// Label for model name/path (OpenAI API standard, injected by Dynamo)
    /// This is the standard label name injected by all backends in metrics_labels=[("model", ...)].
    /// Ensures compatibility with OpenAI-compatible tooling.
    pub const MODEL: &str = "model";

    /// Label for model name/path (alternative/native engine label, injected by Dynamo)
    /// Some engines natively use model_name, so we inject both model and model_name
    /// to ensure maximum compatibility with both OpenAI standard and engine-native tooling.
    /// When a metric already has a label, injection does not overwrite it (original is preserved).
    pub const MODEL_NAME: &str = "model_name";

    /// Label for worker type (e.g., "aggregated", "prefill", "decode", "encoder", etc.)
    pub const WORKER_TYPE: &str = "worker_type";

    /// Label for router instance (discovery.instance_id() of the frontend)
    pub const ROUTER_ID: &str = "router_id";
}

/// Well-known component names used as values for the `dynamo_component` label.
///
/// These are the canonical names passed to `namespace.component(name)` to create
/// `Component` instances whose metrics carry `dynamo_component=<name>`.
///
/// Python codegen: These constants are exported to lib/bindings/python/src/dynamo/prometheus_names.py
pub mod component_names {
    /// Component name for the KV router (frontend-side request routing).
    pub const ROUTER: &str = "router";

    // TODO: add PREFILL = "prefill" and DECODE = "decode" component names
    // and migrate backend worker component creation to use these constants.
}

/// Frontend service metrics (LLM HTTP service)
///
/// ⚠️  Python codegen: Run gen-python-prometheus-names after changes
pub mod frontend_service {
    // TODO: Remove DYN_METRICS_PREFIX — the custom prefix override was added for NIM
    // compatibility (PR #2432) but is no longer needed. All frontend metrics should
    // use the fixed `dynamo_frontend_` prefix from `name_prefix::FRONTEND`.
    /// Environment variable that overrides the default metric prefix
    pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX";

    /// Total number of LLM requests processed
    pub const REQUESTS_TOTAL: &str = "requests_total";

    /// Number of requests waiting in HTTP queue before receiving the first response (gauge)
    pub const QUEUED_REQUESTS: &str = "queued_requests";

    /// Number of inflight/concurrent requests going to the engine (vLLM, SGLang, ...)
    /// Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
    pub const INFLIGHT_REQUESTS: &str = "inflight_requests";

    /// Number of disconnected clients (gauge that can go up and down)
    pub const DISCONNECTED_CLIENTS: &str = "disconnected_clients";

    /// Duration of LLM requests
    pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";

    /// Input sequence length in tokens
    pub const INPUT_SEQUENCE_TOKENS: &str = "input_sequence_tokens";

    /// Output sequence length in tokens
    pub const OUTPUT_SEQUENCE_TOKENS: &str = "output_sequence_tokens";

    /// Predicted KV cache hit rate at routing time (0.0-1.0)
    pub const KV_HIT_RATE: &str = "kv_hit_rate";

    /// Number of cached tokens (prefix cache hits) per request
    pub const CACHED_TOKENS: &str = "cached_tokens";

    /// Tokenizer latency in milliseconds
    pub const TOKENIZER_LATENCY_MS: &str = "tokenizer_latency_ms";

    /// Total number of output tokens generated (counter that updates in real-time)
    pub const OUTPUT_TOKENS_TOTAL: &str = "output_tokens_total";

    /// Time to first token in seconds
    pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "time_to_first_token_seconds";

    /// Inter-token latency in seconds
    pub const INTER_TOKEN_LATENCY_SECONDS: &str = "inter_token_latency_seconds";

    /// Model configuration metrics
    ///
    /// Runtime config metrics (from ModelRuntimeConfig):
    /// Total KV blocks available for a worker serving the model
    pub const MODEL_TOTAL_KV_BLOCKS: &str = "model_total_kv_blocks";

    /// Maximum number of sequences for a worker serving the model (runtime config)
    pub const MODEL_MAX_NUM_SEQS: &str = "model_max_num_seqs";

    /// Maximum number of batched tokens for a worker serving the model (runtime config)
    pub const MODEL_MAX_NUM_BATCHED_TOKENS: &str = "model_max_num_batched_tokens";

    /// MDC metrics (from ModelDeploymentCard):
    /// Maximum context length for a worker serving the model (MDC)
    pub const MODEL_CONTEXT_LENGTH: &str = "model_context_length";

    /// KV cache block size for a worker serving the model (MDC)
    pub const MODEL_KV_CACHE_BLOCK_SIZE: &str = "model_kv_cache_block_size";

    /// Request migration limit for a worker serving the model (MDC)
    pub const MODEL_MIGRATION_LIMIT: &str = "model_migration_limit";

    /// Total number of request migrations due to worker unavailability
    pub const MODEL_MIGRATION_TOTAL: &str = "model_migration_total";

    /// Active decode blocks (KV cache blocks) per worker
    /// Gauge metric tracking current KV cache block utilization for each worker
    pub const WORKER_ACTIVE_DECODE_BLOCKS: &str = "worker_active_decode_blocks";

    /// Active prefill tokens per worker
    /// Gauge metric tracking current queued prefill tokens for each worker
    pub const WORKER_ACTIVE_PREFILL_TOKENS: &str = "worker_active_prefill_tokens";

    /// Last observed time to first token per worker (in seconds)
    /// Gauge metric tracking the most recent TTFT for each worker
    pub const WORKER_LAST_TIME_TO_FIRST_TOKEN_SECONDS: &str =
        "worker_last_time_to_first_token_seconds";

    /// Last observed input sequence tokens per worker
    /// Gauge metric tracking the input token count from the same request as WORKER_LAST_TIME_TO_FIRST_TOKEN_SECONDS
    /// Updated atomically with TTFT to correlate latency with input size
    pub const WORKER_LAST_INPUT_SEQUENCE_TOKENS: &str = "worker_last_input_sequence_tokens";

    /// Last observed inter-token latency per worker (in seconds)
    /// Gauge metric tracking the most recent ITL for each worker
    pub const WORKER_LAST_INTER_TOKEN_LATENCY_SECONDS: &str =
        "worker_last_inter_token_latency_seconds";

    /// Label name for the type of migration
    pub const MIGRATION_TYPE_LABEL: &str = "migration_type";

    /// Label name for tokenizer operation
    pub const OPERATION_LABEL: &str = "operation";

    /// Operation label values for tokenizer latency metric
    pub mod operation {
        /// Tokenization operation
        pub const TOKENIZE: &str = "tokenize";

        /// Detokenization operation
        pub const DETOKENIZE: &str = "detokenize";
    }

    /// Migration type label values
    pub mod migration_type {
        /// Migration during initial stream creation (NoResponders error)
        pub const NEW_REQUEST: &str = "new_request";

        /// Migration during ongoing request (stream disconnected)
        pub const ONGOING_REQUEST: &str = "ongoing_request";
    }

    /// Status label values
    pub mod status {
        /// Value for successful requests
        pub const SUCCESS: &str = "success";

        /// Value for failed requests
        pub const ERROR: &str = "error";
    }

    /// Request type label values
    pub mod request_type {
        /// Value for streaming requests
        pub const STREAM: &str = "stream";

        /// Value for unary requests
        pub const UNARY: &str = "unary";
    }

    /// Error type label values for fine-grained error classification
    pub mod error_type {
        /// No error (used for successful requests)
        pub const NONE: &str = "";

        /// Client validation error (4xx with "Validation:" prefix)
        pub const VALIDATION: &str = "validation";

        /// Model or resource not found (404)
        pub const NOT_FOUND: &str = "not_found";

        /// Service overloaded, too many requests (503)
        pub const OVERLOAD: &str = "overload";

        /// Request cancelled by client or timeout
        pub const CANCELLED: &str = "cancelled";

        /// Internal server error (500 and other unexpected errors)
        pub const INTERNAL: &str = "internal";

        /// Feature not implemented (501)
        pub const NOT_IMPLEMENTED: &str = "not_implemented";
    }
}

/// Work handler Prometheus metric names
pub mod work_handler {
    /// Total number of requests processed by work handler
    pub const REQUESTS_TOTAL: &str = "requests_total";

    /// Total number of bytes received in requests by work handler
    pub const REQUEST_BYTES_TOTAL: &str = "request_bytes_total";

    /// Total number of bytes sent in responses by work handler
    pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";

    /// Number of requests currently being processed by work handler
    /// Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
    pub const INFLIGHT_REQUESTS: &str = "inflight_requests";

    /// Time spent processing requests by work handler (histogram)
    pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";

    /// Total number of errors in work handler processing
    pub const ERRORS_TOTAL: &str = "errors_total";

    /// Label name for error type classification
    pub const ERROR_TYPE_LABEL: &str = "error_type";

    /// Error type values for work handler metrics
    pub mod error_types {
        /// Deserialization error
        pub const DESERIALIZATION: &str = "deserialization";

        /// Invalid message format error
        pub const INVALID_MESSAGE: &str = "invalid_message";

        /// Response stream creation error
        pub const RESPONSE_STREAM: &str = "response_stream";

        /// Generation error
        pub const GENERATE: &str = "generate";

        /// Response publishing error
        pub const PUBLISH_RESPONSE: &str = "publish_response";

        /// Final message publishing error
        pub const PUBLISH_FINAL: &str = "publish_final";
    }
}

/// Task tracker Prometheus metric name suffixes
pub mod task_tracker {
    /// Total number of tasks issued/submitted
    pub const TASKS_ISSUED_TOTAL: &str = "tasks_issued_total";

    /// Total number of tasks started
    pub const TASKS_STARTED_TOTAL: &str = "tasks_started_total";

    /// Total number of successfully completed tasks
    pub const TASKS_SUCCESS_TOTAL: &str = "tasks_success_total";

    /// Total number of cancelled tasks
    pub const TASKS_CANCELLED_TOTAL: &str = "tasks_cancelled_total";

    /// Total number of failed tasks
    pub const TASKS_FAILED_TOTAL: &str = "tasks_failed_total";

    /// Total number of rejected tasks
    pub const TASKS_REJECTED_TOTAL: &str = "tasks_rejected_total";
}

/// DistributedRuntime core metrics
pub mod distributed_runtime {
    /// Total uptime of the DistributedRuntime in seconds
    pub const UPTIME_SECONDS: &str = "uptime_seconds";
}

/// KVBM
pub mod kvbm {
    /// The number of offload blocks from device to host
    pub const OFFLOAD_BLOCKS_D2H: &str = "offload_blocks_d2h";

    /// The number of offload blocks from host to disk
    pub const OFFLOAD_BLOCKS_H2D: &str = "offload_blocks_h2d";

    /// The number of offload blocks from device to disk (bypassing host memory)
    pub const OFFLOAD_BLOCKS_D2D: &str = "offload_blocks_d2d";

    /// The number of onboard blocks from host to device
    pub const ONBOARD_BLOCKS_H2D: &str = "onboard_blocks_h2d";

    /// The number of onboard blocks from disk to device
    pub const ONBOARD_BLOCKS_D2D: &str = "onboard_blocks_d2d";

    /// The number of matched tokens
    pub const MATCHED_TOKENS: &str = "matched_tokens";

    /// Host cache hit rate (0.0-1.0) from the sliding window
    pub const HOST_CACHE_HIT_RATE: &str = "host_cache_hit_rate";

    /// Disk cache hit rate (0.0-1.0) from the sliding window
    pub const DISK_CACHE_HIT_RATE: &str = "disk_cache_hit_rate";

    /// Object storage cache hit rate (0.0-1.0) from the sliding window
    pub const OBJECT_CACHE_HIT_RATE: &str = "object_cache_hit_rate";

    /// Number of blocks offloaded from device to object storage
    pub const OFFLOAD_BLOCKS_D2O: &str = "offload_blocks_d2o";

    /// Number of blocks onboarded from object storage to device
    pub const ONBOARD_BLOCKS_O2D: &str = "onboard_blocks_o2d";

    /// Bytes transferred to object storage (offload)
    pub const OFFLOAD_BYTES_OBJECT: &str = "offload_bytes_object";

    /// Bytes transferred from object storage (onboard)
    pub const ONBOARD_BYTES_OBJECT: &str = "onboard_bytes_object";

    /// Number of failed object storage read operations (blocks)
    pub const OBJECT_READ_FAILURES: &str = "object_read_failures";

    /// Number of failed object storage write operations (blocks)
    pub const OBJECT_WRITE_FAILURES: &str = "object_write_failures";
}

/// Router per-request metrics (component-scoped via `MetricsHierarchy`).
///
/// Metric names are composed as `"{METRIC_PREFIX}{frontend_service::*}"` at init time,
/// then passed to `component.metrics().create_*()` which auto-prepends `dynamo_component_`,
/// yielding e.g. `dynamo_component_router_requests_total`.
/// See `lib/llm/src/kv_router/metrics.rs` `RouterRequestMetrics::from_component()`.
pub mod router_request {
    /// Prefix prepended to `frontend_service::*` names to form router metric names.
    /// e.g. `"router_"` + `frontend_service::REQUESTS_TOTAL` → `"router_requests_total"`.
    pub const METRIC_PREFIX: &str = "router_";
}

/// Routing overhead phase latency histogram suffixes.
///
/// Combined with `name_prefix::ROUTER` ("dynamo_router") in `RoutingOverheadMetrics::register()`,
/// yielding e.g. `dynamo_router_overhead_block_hashing_ms{router_id="..."}`.
/// See `lib/llm/src/kv_router/metrics.rs`.
pub mod routing_overhead {
    /// Time spent computing block hashes
    pub const BLOCK_HASHING_MS: &str = "overhead_block_hashing_ms";

    /// Time spent in indexer find_matches
    pub const INDEXER_FIND_MATCHES_MS: &str = "overhead_indexer_find_matches_ms";

    /// Time spent computing sequence hashes
    pub const SEQ_HASHING_MS: &str = "overhead_seq_hashing_ms";

    /// Time spent in scheduler worker selection
    pub const SCHEDULING_MS: &str = "overhead_scheduling_ms";

    /// Total routing overhead per request
    pub const TOTAL_MS: &str = "overhead_total_ms";
}

/// Router request metrics (component-scoped aggregate histograms + counter)
///
/// These constants are the suffix portions of full metric names, combined with
/// [`name_prefix::COMPONENT`] to form the complete name, e.g.
/// `dynamo_component_router_requests_total`.
///
/// ⚠️  Python codegen: Run gen-python-prometheus-names after changes
pub mod router {
    /// Total number of requests processed by the router
    pub const REQUESTS_TOTAL: &str = "router_requests_total";

    /// Time to first token observed at the router (seconds)
    pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "router_time_to_first_token_seconds";

    /// Average inter-token latency observed at the router (seconds)
    pub const INTER_TOKEN_LATENCY_SECONDS: &str = "router_inter_token_latency_seconds";

    /// Input sequence length in tokens observed at the router
    pub const INPUT_SEQUENCE_TOKENS: &str = "router_input_sequence_tokens";

    /// Output sequence length in tokens observed at the router
    pub const OUTPUT_SEQUENCE_TOKENS: &str = "router_output_sequence_tokens";
}

// KvRouter (including KvInexer) Prometheus metric names
pub mod kvrouter {
    /// Number of KV cache events applied to the index (including status)
    pub const KV_CACHE_EVENTS_APPLIED: &str = "kv_cache_events_applied";
}

// KV cache statistics metrics
pub mod kvstats {
    /// Total number of KV cache blocks available on the worker
    pub const TOTAL_BLOCKS: &str = "total_blocks";

    /// GPU cache usage as a percentage (0.0-1.0)
    pub const GPU_CACHE_USAGE_PERCENT: &str = "gpu_cache_usage_percent";
}

// Model information metrics
pub mod model_info {
    /// Model load time in seconds
    pub const LOAD_TIME_SECONDS: &str = "model_load_time_seconds";
}

// Shared regex patterns for Prometheus sanitization
static METRIC_INVALID_CHARS_PATTERN: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_:]").unwrap());
static LABEL_INVALID_CHARS_PATTERN: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
static INVALID_FIRST_CHAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[^a-zA-Z_]").unwrap());

/// Sanitizes a Prometheus metric name by converting invalid characters to underscores
/// and ensuring the first character is valid. Uses regex for clear validation.
/// Returns an error if the input cannot be sanitized into a valid name.
///
/// **Rules**: Pattern `[a-zA-Z_:][a-zA-Z0-9_:]*`. Allows colons and `__` anywhere.
pub fn sanitize_prometheus_name(raw: &str) -> anyhow::Result<String> {
    if raw.is_empty() {
        return Err(anyhow::anyhow!(
            "Cannot sanitize empty string into valid Prometheus name"
        ));
    }

    // Replace all invalid characters with underscores
    let mut sanitized = METRIC_INVALID_CHARS_PATTERN
        .replace_all(raw, "_")
        .to_string();

    // Ensure first character is valid (letter, underscore, or colon)
    if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
        sanitized = format!("_{}", sanitized);
    }

    // Check if the result is all underscores (invalid input)
    if sanitized.chars().all(|c| c == '_') {
        return Err(anyhow::anyhow!(
            "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus name",
            raw
        ));
    }

    Ok(sanitized)
}

/// Sanitizes a Prometheus label name by converting invalid characters to underscores
/// and ensuring the first character is valid. Uses regex for clear validation.
/// Label names have stricter rules than metric names (no colons allowed).
/// Returns an error if the input cannot be sanitized into a valid label name.
///
/// **Rules**: Pattern `[a-zA-Z_][a-zA-Z0-9_]*`. No colons, no `__` prefix (reserved).
pub fn sanitize_prometheus_label(raw: &str) -> anyhow::Result<String> {
    if raw.is_empty() {
        return Err(anyhow::anyhow!(
            "Cannot sanitize empty string into valid Prometheus label"
        ));
    }

    // Replace all invalid characters with underscores (no colons allowed in labels)
    let mut sanitized = LABEL_INVALID_CHARS_PATTERN
        .replace_all(raw, "_")
        .to_string();

    // Ensure first character is valid (letter or underscore only)
    if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
        sanitized = format!("_{}", sanitized);
    }

    // Prevent __ prefix (reserved for Prometheus internal use) but allow __ elsewhere
    if sanitized.starts_with("__") {
        sanitized = sanitized
            .strip_prefix("__")
            .unwrap_or(&sanitized)
            .to_string();
        if sanitized.is_empty() || !sanitized.chars().next().unwrap().is_ascii_alphabetic() {
            sanitized = format!("_{}", sanitized);
        }
    }

    // Check if the result is all underscores (invalid input)
    if sanitized.chars().all(|c| c == '_') {
        return Err(anyhow::anyhow!(
            "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus label",
            raw
        ));
    }

    Ok(sanitized)
}

/// Sanitizes a Prometheus frontend metric prefix by converting invalid characters to underscores
/// and ensuring the first character is valid. Uses the general prometheus name sanitization
/// but with frontend-specific fallback behavior.
pub fn sanitize_frontend_prometheus_prefix(raw: &str) -> String {
    if raw.is_empty() {
        return name_prefix::FRONTEND.to_string();
    }

    // Reuse the general prometheus name sanitization logic, fallback to frontend prefix on error
    sanitize_prometheus_name(raw).unwrap_or_else(|_| name_prefix::FRONTEND.to_string())
}

/// Builds a full component metric name by prepending the component prefix
/// Sanitizes the metric name to ensure it's valid for Prometheus
pub fn build_component_metric_name(metric_name: &str) -> String {
    let sanitized_name =
        sanitize_prometheus_name(metric_name).expect("metric name should be valid or sanitizable");
    format!("{}_{}", name_prefix::COMPONENT, sanitized_name)
}

/// Safely converts a u64 value to i64 for Prometheus metrics
///
/// Since Prometheus IntGaugeVec uses i64 but our data types use u64,
/// this function clamps large u64 values to i64::MAX to prevent overflow
/// and ensure metrics remain positive.
///
/// # Arguments
/// * `value` - The u64 value to convert
///
/// # Returns
/// An i64 value, clamped to i64::MAX if the input exceeds i64::MAX
///
/// # Examples
/// ```
/// use dynamo_runtime::metrics::prometheus_names::clamp_u64_to_i64;
///
/// assert_eq!(clamp_u64_to_i64(100), 100);
/// assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
/// ```
pub fn clamp_u64_to_i64(value: u64) -> i64 {
    if value > i64::MAX as u64 {
        i64::MAX
    } else {
        value as i64
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sanitize_frontend_prometheus_prefix() {
        // Test that valid prefixes remain unchanged
        assert_eq!(
            sanitize_frontend_prometheus_prefix("dynamo_frontend"),
            "dynamo_frontend"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("custom_prefix"),
            "custom_prefix"
        );
        assert_eq!(sanitize_frontend_prometheus_prefix("test123"), "test123");

        // Test that invalid characters are converted to underscores
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test prefix"),
            "test_prefix"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test.prefix"),
            "test_prefix"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test@prefix"),
            "test_prefix"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test-prefix"),
            "test_prefix"
        );

        // Test that invalid first characters are fixed
        assert_eq!(sanitize_frontend_prometheus_prefix("123test"), "_123test");
        assert_eq!(sanitize_frontend_prometheus_prefix("@test"), "_test");

        // Test empty string fallback
        assert_eq!(
            sanitize_frontend_prometheus_prefix(""),
            name_prefix::FRONTEND
        );
    }

    #[test]
    fn test_sanitize_prometheus_name() {
        // Test that valid names remain unchanged
        assert_eq!(
            sanitize_prometheus_name("valid_name").unwrap(),
            "valid_name"
        );
        assert_eq!(sanitize_prometheus_name("test123").unwrap(), "test123");
        assert_eq!(
            sanitize_prometheus_name("test_name_123").unwrap(),
            "test_name_123"
        );
        assert_eq!(sanitize_prometheus_name("test:name").unwrap(), "test:name"); // colons allowed

        // Test that invalid characters are converted to underscores
        assert_eq!(sanitize_prometheus_name("test name").unwrap(), "test_name");
        assert_eq!(sanitize_prometheus_name("test.name").unwrap(), "test_name");
        assert_eq!(sanitize_prometheus_name("test@name").unwrap(), "test_name");
        assert_eq!(sanitize_prometheus_name("test-name").unwrap(), "test_name");
        assert_eq!(
            sanitize_prometheus_name("test$name#123").unwrap(),
            "test_name_123"
        );

        // Test that double underscores are ALLOWED in metric names (unlike labels)
        assert_eq!(
            sanitize_prometheus_name("test__name").unwrap(),
            "test__name"
        );
        assert_eq!(
            sanitize_prometheus_name("test___name").unwrap(),
            "test___name"
        );
        assert_eq!(sanitize_prometheus_name("__test").unwrap(), "__test"); // Leading double underscore OK

        // Test that invalid first characters are fixed
        assert_eq!(sanitize_prometheus_name("123test").unwrap(), "_123test");
        assert_eq!(sanitize_prometheus_name("@test").unwrap(), "_test"); // @ becomes _, no double underscore
        assert_eq!(sanitize_prometheus_name("-test").unwrap(), "_test"); // - becomes _, no double underscore
        assert_eq!(sanitize_prometheus_name(".test").unwrap(), "_test"); // . becomes _, no double underscore

        // Test empty string returns error
        assert!(sanitize_prometheus_name("").is_err());

        // Test complex cases
        assert_eq!(
            sanitize_prometheus_name("123.test-name@domain").unwrap(),
            "_123_test_name_domain"
        );

        // Test that strings with only invalid characters return error
        assert!(sanitize_prometheus_name("@#$%").is_err());
        assert!(sanitize_prometheus_name("!!!!").is_err());
    }

    #[test]
    fn test_sanitize_prometheus_label() {
        // Test that valid labels remain unchanged
        assert_eq!(
            sanitize_prometheus_label("valid_label").unwrap(),
            "valid_label"
        );
        assert_eq!(sanitize_prometheus_label("test123").unwrap(), "test123");
        assert_eq!(
            sanitize_prometheus_label("test_label_123").unwrap(),
            "test_label_123"
        );

        // Test that colons are NOT allowed in labels (stricter than names)
        assert_eq!(
            sanitize_prometheus_label("test:label").unwrap(),
            "test_label"
        );

        // Test that invalid characters are converted to underscores
        assert_eq!(
            sanitize_prometheus_label("test label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test.label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test@label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test-label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test$label#123").unwrap(),
            "test_label_123"
        );

        // Test that double underscores are ALLOWED in middle but NOT at start
        assert_eq!(
            sanitize_prometheus_label("test__label").unwrap(),
            "test__label"
        ); // OK in middle
        assert_eq!(
            sanitize_prometheus_label("test___label").unwrap(),
            "test___label"
        ); // OK in middle
        assert_eq!(
            sanitize_prometheus_label("test____label").unwrap(),
            "test____label"
        ); // OK in middle
        assert_eq!(sanitize_prometheus_label("__test").unwrap(), "test"); // Leading __ removed
        assert!(sanitize_prometheus_label("____").is_err()); // All underscores should error

        // Test that invalid first characters are fixed (no colons allowed)
        assert_eq!(sanitize_prometheus_label("123test").unwrap(), "_123test");
        assert_eq!(sanitize_prometheus_label("@test").unwrap(), "_test");
        assert_eq!(sanitize_prometheus_label(":test").unwrap(), "_test"); // colon not allowed
        assert_eq!(sanitize_prometheus_label("-test").unwrap(), "_test");

        // Test empty string returns error
        assert!(sanitize_prometheus_label("").is_err());

        // Test complex cases
        assert_eq!(
            sanitize_prometheus_label("123:test-label@domain").unwrap(),
            "_123_test_label_domain"
        );

        // Test that strings with only invalid characters return error
        assert!(sanitize_prometheus_label("@#$%").is_err()); // @#$% -> ____ -> ___ -> all underscores error
        assert!(sanitize_prometheus_label("!!!!").is_err()); // !!!! -> ____ -> ___ -> all underscores error
    }

    #[test]
    fn test_build_component_metric_name() {
        // Test that valid names work correctly
        assert_eq!(
            build_component_metric_name("test_metric"),
            "dynamo_component_test_metric"
        );
        assert_eq!(
            build_component_metric_name("requests_total"),
            "dynamo_component_requests_total"
        );

        // Test that invalid characters are sanitized
        assert_eq!(
            build_component_metric_name("test metric"),
            "dynamo_component_test_metric"
        );
        assert_eq!(
            build_component_metric_name("test.metric"),
            "dynamo_component_test_metric"
        );
        assert_eq!(
            build_component_metric_name("test@metric"),
            "dynamo_component_test_metric"
        );

        // Test that invalid first characters are fixed
        assert_eq!(
            build_component_metric_name("123metric"),
            "dynamo_component__123metric"
        );
    }

    #[test]
    #[should_panic(expected = "metric name should be valid or sanitizable")]
    fn test_build_component_metric_name_panics_on_invalid_input() {
        // Test that completely invalid input panics with clear message
        build_component_metric_name("@#$%");
    }

    #[test]
    #[should_panic(expected = "metric name should be valid or sanitizable")]
    fn test_build_component_metric_name_panics_on_empty_input() {
        // Test that empty input panics with clear message
        build_component_metric_name("");
    }

    #[test]
    fn test_clamp_u64_to_i64() {
        // Test normal values within i64 range
        assert_eq!(clamp_u64_to_i64(0), 0);
        assert_eq!(clamp_u64_to_i64(100), 100);
        assert_eq!(clamp_u64_to_i64(1000000), 1000000);

        // Test maximum i64 value
        assert_eq!(clamp_u64_to_i64(i64::MAX as u64), i64::MAX);

        // Test values that exceed i64::MAX
        assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
        assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1), i64::MAX);
        assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1000), i64::MAX);
    }
}