use once_cell::sync::Lazy;
use regex::Regex;
pub mod name_prefix {
pub const COMPONENT: &str = "dynamo_component";
pub const FRONTEND: &str = "dynamo_frontend";
pub const ROUTER: &str = "dynamo_router";
}
pub mod labels {
pub const COMPONENT: &str = "dynamo_component";
pub const NAMESPACE: &str = "dynamo_namespace";
pub const ENDPOINT: &str = "dynamo_endpoint";
pub const DP_RANK: &str = "dp_rank";
pub const WORKER_ID: &str = "worker_id";
pub const MODEL: &str = "model";
pub const MODEL_NAME: &str = "model_name";
pub const WORKER_TYPE: &str = "worker_type";
pub const ROUTER_ID: &str = "router_id";
}
pub mod component_names {
pub const ROUTER: &str = "router";
}
pub mod frontend_service {
pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX";
pub const REQUESTS_TOTAL: &str = "requests_total";
pub const QUEUED_REQUESTS: &str = "queued_requests";
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
pub const DISCONNECTED_CLIENTS: &str = "disconnected_clients";
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
pub const INPUT_SEQUENCE_TOKENS: &str = "input_sequence_tokens";
pub const OUTPUT_SEQUENCE_TOKENS: &str = "output_sequence_tokens";
pub const KV_HIT_RATE: &str = "kv_hit_rate";
pub const CACHED_TOKENS: &str = "cached_tokens";
pub const TOKENIZER_LATENCY_MS: &str = "tokenizer_latency_ms";
pub const OUTPUT_TOKENS_TOTAL: &str = "output_tokens_total";
pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "time_to_first_token_seconds";
pub const INTER_TOKEN_LATENCY_SECONDS: &str = "inter_token_latency_seconds";
pub const MODEL_TOTAL_KV_BLOCKS: &str = "model_total_kv_blocks";
pub const MODEL_MAX_NUM_SEQS: &str = "model_max_num_seqs";
pub const MODEL_MAX_NUM_BATCHED_TOKENS: &str = "model_max_num_batched_tokens";
pub const MODEL_CONTEXT_LENGTH: &str = "model_context_length";
pub const MODEL_KV_CACHE_BLOCK_SIZE: &str = "model_kv_cache_block_size";
pub const MODEL_MIGRATION_LIMIT: &str = "model_migration_limit";
pub const MODEL_MIGRATION_TOTAL: &str = "model_migration_total";
pub const WORKER_ACTIVE_DECODE_BLOCKS: &str = "worker_active_decode_blocks";
pub const WORKER_ACTIVE_PREFILL_TOKENS: &str = "worker_active_prefill_tokens";
pub const WORKER_LAST_TIME_TO_FIRST_TOKEN_SECONDS: &str =
"worker_last_time_to_first_token_seconds";
pub const WORKER_LAST_INPUT_SEQUENCE_TOKENS: &str = "worker_last_input_sequence_tokens";
pub const WORKER_LAST_INTER_TOKEN_LATENCY_SECONDS: &str =
"worker_last_inter_token_latency_seconds";
pub const MIGRATION_TYPE_LABEL: &str = "migration_type";
pub const OPERATION_LABEL: &str = "operation";
pub mod operation {
pub const TOKENIZE: &str = "tokenize";
pub const DETOKENIZE: &str = "detokenize";
}
pub mod migration_type {
pub const NEW_REQUEST: &str = "new_request";
pub const ONGOING_REQUEST: &str = "ongoing_request";
}
pub mod status {
pub const SUCCESS: &str = "success";
pub const ERROR: &str = "error";
}
pub mod request_type {
pub const STREAM: &str = "stream";
pub const UNARY: &str = "unary";
}
pub mod error_type {
pub const NONE: &str = "";
pub const VALIDATION: &str = "validation";
pub const NOT_FOUND: &str = "not_found";
pub const OVERLOAD: &str = "overload";
pub const CANCELLED: &str = "cancelled";
pub const INTERNAL: &str = "internal";
pub const NOT_IMPLEMENTED: &str = "not_implemented";
}
}
pub mod work_handler {
pub const REQUESTS_TOTAL: &str = "requests_total";
pub const REQUEST_BYTES_TOTAL: &str = "request_bytes_total";
pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
pub const ERRORS_TOTAL: &str = "errors_total";
pub const ERROR_TYPE_LABEL: &str = "error_type";
pub mod error_types {
pub const DESERIALIZATION: &str = "deserialization";
pub const INVALID_MESSAGE: &str = "invalid_message";
pub const RESPONSE_STREAM: &str = "response_stream";
pub const GENERATE: &str = "generate";
pub const PUBLISH_RESPONSE: &str = "publish_response";
pub const PUBLISH_FINAL: &str = "publish_final";
}
}
pub mod task_tracker {
pub const TASKS_ISSUED_TOTAL: &str = "tasks_issued_total";
pub const TASKS_STARTED_TOTAL: &str = "tasks_started_total";
pub const TASKS_SUCCESS_TOTAL: &str = "tasks_success_total";
pub const TASKS_CANCELLED_TOTAL: &str = "tasks_cancelled_total";
pub const TASKS_FAILED_TOTAL: &str = "tasks_failed_total";
pub const TASKS_REJECTED_TOTAL: &str = "tasks_rejected_total";
}
pub mod distributed_runtime {
pub const UPTIME_SECONDS: &str = "uptime_seconds";
}
pub mod kvbm {
pub const OFFLOAD_BLOCKS_D2H: &str = "offload_blocks_d2h";
pub const OFFLOAD_BLOCKS_H2D: &str = "offload_blocks_h2d";
pub const OFFLOAD_BLOCKS_D2D: &str = "offload_blocks_d2d";
pub const ONBOARD_BLOCKS_H2D: &str = "onboard_blocks_h2d";
pub const ONBOARD_BLOCKS_D2D: &str = "onboard_blocks_d2d";
pub const MATCHED_TOKENS: &str = "matched_tokens";
pub const HOST_CACHE_HIT_RATE: &str = "host_cache_hit_rate";
pub const DISK_CACHE_HIT_RATE: &str = "disk_cache_hit_rate";
pub const OBJECT_CACHE_HIT_RATE: &str = "object_cache_hit_rate";
pub const OFFLOAD_BLOCKS_D2O: &str = "offload_blocks_d2o";
pub const ONBOARD_BLOCKS_O2D: &str = "onboard_blocks_o2d";
pub const OFFLOAD_BYTES_OBJECT: &str = "offload_bytes_object";
pub const ONBOARD_BYTES_OBJECT: &str = "onboard_bytes_object";
pub const OBJECT_READ_FAILURES: &str = "object_read_failures";
pub const OBJECT_WRITE_FAILURES: &str = "object_write_failures";
}
pub mod router_request {
pub const METRIC_PREFIX: &str = "router_";
}
pub mod routing_overhead {
pub const BLOCK_HASHING_MS: &str = "overhead_block_hashing_ms";
pub const INDEXER_FIND_MATCHES_MS: &str = "overhead_indexer_find_matches_ms";
pub const SEQ_HASHING_MS: &str = "overhead_seq_hashing_ms";
pub const SCHEDULING_MS: &str = "overhead_scheduling_ms";
pub const TOTAL_MS: &str = "overhead_total_ms";
}
pub mod router {
pub const REQUESTS_TOTAL: &str = "router_requests_total";
pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "router_time_to_first_token_seconds";
pub const INTER_TOKEN_LATENCY_SECONDS: &str = "router_inter_token_latency_seconds";
pub const INPUT_SEQUENCE_TOKENS: &str = "router_input_sequence_tokens";
pub const OUTPUT_SEQUENCE_TOKENS: &str = "router_output_sequence_tokens";
}
pub mod kvrouter {
pub const KV_CACHE_EVENTS_APPLIED: &str = "kv_cache_events_applied";
}
pub mod kvstats {
pub const TOTAL_BLOCKS: &str = "total_blocks";
pub const GPU_CACHE_USAGE_PERCENT: &str = "gpu_cache_usage_percent";
}
pub mod model_info {
pub const LOAD_TIME_SECONDS: &str = "model_load_time_seconds";
}
static METRIC_INVALID_CHARS_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_:]").unwrap());
static LABEL_INVALID_CHARS_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
static INVALID_FIRST_CHAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[^a-zA-Z_]").unwrap());
pub fn sanitize_prometheus_name(raw: &str) -> anyhow::Result<String> {
if raw.is_empty() {
return Err(anyhow::anyhow!(
"Cannot sanitize empty string into valid Prometheus name"
));
}
let mut sanitized = METRIC_INVALID_CHARS_PATTERN
.replace_all(raw, "_")
.to_string();
if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
sanitized = format!("_{}", sanitized);
}
if sanitized.chars().all(|c| c == '_') {
return Err(anyhow::anyhow!(
"Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus name",
raw
));
}
Ok(sanitized)
}
pub fn sanitize_prometheus_label(raw: &str) -> anyhow::Result<String> {
if raw.is_empty() {
return Err(anyhow::anyhow!(
"Cannot sanitize empty string into valid Prometheus label"
));
}
let mut sanitized = LABEL_INVALID_CHARS_PATTERN
.replace_all(raw, "_")
.to_string();
if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
sanitized = format!("_{}", sanitized);
}
if sanitized.starts_with("__") {
sanitized = sanitized
.strip_prefix("__")
.unwrap_or(&sanitized)
.to_string();
if sanitized.is_empty() || !sanitized.chars().next().unwrap().is_ascii_alphabetic() {
sanitized = format!("_{}", sanitized);
}
}
if sanitized.chars().all(|c| c == '_') {
return Err(anyhow::anyhow!(
"Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus label",
raw
));
}
Ok(sanitized)
}
pub fn sanitize_frontend_prometheus_prefix(raw: &str) -> String {
if raw.is_empty() {
return name_prefix::FRONTEND.to_string();
}
sanitize_prometheus_name(raw).unwrap_or_else(|_| name_prefix::FRONTEND.to_string())
}
pub fn build_component_metric_name(metric_name: &str) -> String {
let sanitized_name =
sanitize_prometheus_name(metric_name).expect("metric name should be valid or sanitizable");
format!("{}_{}", name_prefix::COMPONENT, sanitized_name)
}
pub fn clamp_u64_to_i64(value: u64) -> i64 {
if value > i64::MAX as u64 {
i64::MAX
} else {
value as i64
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_frontend_prometheus_prefix() {
assert_eq!(
sanitize_frontend_prometheus_prefix("dynamo_frontend"),
"dynamo_frontend"
);
assert_eq!(
sanitize_frontend_prometheus_prefix("custom_prefix"),
"custom_prefix"
);
assert_eq!(sanitize_frontend_prometheus_prefix("test123"), "test123");
assert_eq!(
sanitize_frontend_prometheus_prefix("test prefix"),
"test_prefix"
);
assert_eq!(
sanitize_frontend_prometheus_prefix("test.prefix"),
"test_prefix"
);
assert_eq!(
sanitize_frontend_prometheus_prefix("test@prefix"),
"test_prefix"
);
assert_eq!(
sanitize_frontend_prometheus_prefix("test-prefix"),
"test_prefix"
);
assert_eq!(sanitize_frontend_prometheus_prefix("123test"), "_123test");
assert_eq!(sanitize_frontend_prometheus_prefix("@test"), "_test");
assert_eq!(
sanitize_frontend_prometheus_prefix(""),
name_prefix::FRONTEND
);
}
#[test]
fn test_sanitize_prometheus_name() {
assert_eq!(
sanitize_prometheus_name("valid_name").unwrap(),
"valid_name"
);
assert_eq!(sanitize_prometheus_name("test123").unwrap(), "test123");
assert_eq!(
sanitize_prometheus_name("test_name_123").unwrap(),
"test_name_123"
);
assert_eq!(sanitize_prometheus_name("test:name").unwrap(), "test:name");
assert_eq!(sanitize_prometheus_name("test name").unwrap(), "test_name");
assert_eq!(sanitize_prometheus_name("test.name").unwrap(), "test_name");
assert_eq!(sanitize_prometheus_name("test@name").unwrap(), "test_name");
assert_eq!(sanitize_prometheus_name("test-name").unwrap(), "test_name");
assert_eq!(
sanitize_prometheus_name("test$name#123").unwrap(),
"test_name_123"
);
assert_eq!(
sanitize_prometheus_name("test__name").unwrap(),
"test__name"
);
assert_eq!(
sanitize_prometheus_name("test___name").unwrap(),
"test___name"
);
assert_eq!(sanitize_prometheus_name("__test").unwrap(), "__test");
assert_eq!(sanitize_prometheus_name("123test").unwrap(), "_123test");
assert_eq!(sanitize_prometheus_name("@test").unwrap(), "_test"); assert_eq!(sanitize_prometheus_name("-test").unwrap(), "_test"); assert_eq!(sanitize_prometheus_name(".test").unwrap(), "_test");
assert!(sanitize_prometheus_name("").is_err());
assert_eq!(
sanitize_prometheus_name("123.test-name@domain").unwrap(),
"_123_test_name_domain"
);
assert!(sanitize_prometheus_name("@#$%").is_err());
assert!(sanitize_prometheus_name("!!!!").is_err());
}
#[test]
fn test_sanitize_prometheus_label() {
assert_eq!(
sanitize_prometheus_label("valid_label").unwrap(),
"valid_label"
);
assert_eq!(sanitize_prometheus_label("test123").unwrap(), "test123");
assert_eq!(
sanitize_prometheus_label("test_label_123").unwrap(),
"test_label_123"
);
assert_eq!(
sanitize_prometheus_label("test:label").unwrap(),
"test_label"
);
assert_eq!(
sanitize_prometheus_label("test label").unwrap(),
"test_label"
);
assert_eq!(
sanitize_prometheus_label("test.label").unwrap(),
"test_label"
);
assert_eq!(
sanitize_prometheus_label("test@label").unwrap(),
"test_label"
);
assert_eq!(
sanitize_prometheus_label("test-label").unwrap(),
"test_label"
);
assert_eq!(
sanitize_prometheus_label("test$label#123").unwrap(),
"test_label_123"
);
assert_eq!(
sanitize_prometheus_label("test__label").unwrap(),
"test__label"
); assert_eq!(
sanitize_prometheus_label("test___label").unwrap(),
"test___label"
); assert_eq!(
sanitize_prometheus_label("test____label").unwrap(),
"test____label"
); assert_eq!(sanitize_prometheus_label("__test").unwrap(), "test"); assert!(sanitize_prometheus_label("____").is_err());
assert_eq!(sanitize_prometheus_label("123test").unwrap(), "_123test");
assert_eq!(sanitize_prometheus_label("@test").unwrap(), "_test");
assert_eq!(sanitize_prometheus_label(":test").unwrap(), "_test"); assert_eq!(sanitize_prometheus_label("-test").unwrap(), "_test");
assert!(sanitize_prometheus_label("").is_err());
assert_eq!(
sanitize_prometheus_label("123:test-label@domain").unwrap(),
"_123_test_label_domain"
);
assert!(sanitize_prometheus_label("@#$%").is_err()); assert!(sanitize_prometheus_label("!!!!").is_err()); }
#[test]
fn test_build_component_metric_name() {
assert_eq!(
build_component_metric_name("test_metric"),
"dynamo_component_test_metric"
);
assert_eq!(
build_component_metric_name("requests_total"),
"dynamo_component_requests_total"
);
assert_eq!(
build_component_metric_name("test metric"),
"dynamo_component_test_metric"
);
assert_eq!(
build_component_metric_name("test.metric"),
"dynamo_component_test_metric"
);
assert_eq!(
build_component_metric_name("test@metric"),
"dynamo_component_test_metric"
);
assert_eq!(
build_component_metric_name("123metric"),
"dynamo_component__123metric"
);
}
#[test]
#[should_panic(expected = "metric name should be valid or sanitizable")]
fn test_build_component_metric_name_panics_on_invalid_input() {
build_component_metric_name("@#$%");
}
#[test]
#[should_panic(expected = "metric name should be valid or sanitizable")]
fn test_build_component_metric_name_panics_on_empty_input() {
build_component_metric_name("");
}
#[test]
fn test_clamp_u64_to_i64() {
assert_eq!(clamp_u64_to_i64(0), 0);
assert_eq!(clamp_u64_to_i64(100), 100);
assert_eq!(clamp_u64_to_i64(1000000), 1000000);
assert_eq!(clamp_u64_to_i64(i64::MAX as u64), i64::MAX);
assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1), i64::MAX);
assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1000), i64::MAX);
}
}