use std::sync::OnceLock;
use std::sync::atomic::{AtomicU64, Ordering};
use serde::{Deserialize, Serialize};
static METRICS: OnceLock<Metrics> = OnceLock::new();
pub fn global() -> &'static Metrics {
METRICS.get_or_init(Metrics::new)
}
pub struct Metrics {
pub requests_total: AtomicU64,
pub requests_reconcile: AtomicU64,
pub requests_node_info: AtomicU64,
pub requests_node_stats: AtomicU64,
pub requests_tenant_list: AtomicU64,
pub requests_instance_list: AtomicU64,
pub requests_wake: AtomicU64,
pub requests_rate_limited: AtomicU64,
pub requests_failed: AtomicU64,
pub reconcile_runs: AtomicU64,
pub reconcile_errors: AtomicU64,
pub reconcile_duration_ms: AtomicU64,
pub instances_created: AtomicU64,
pub instances_started: AtomicU64,
pub instances_stopped: AtomicU64,
pub instances_slept: AtomicU64,
pub instances_woken: AtomicU64,
pub instances_destroyed: AtomicU64,
pub instances_deferred: AtomicU64,
pub connections_accepted: AtomicU64,
pub connections_rejected: AtomicU64,
pub build_image_duration_ms: AtomicU64,
pub vm_start_duration_ms: AtomicU64,
pub vsock_handshake_rtt_ms: AtomicU64,
}
impl Metrics {
fn new() -> Self {
Self {
requests_total: AtomicU64::new(0),
requests_reconcile: AtomicU64::new(0),
requests_node_info: AtomicU64::new(0),
requests_node_stats: AtomicU64::new(0),
requests_tenant_list: AtomicU64::new(0),
requests_instance_list: AtomicU64::new(0),
requests_wake: AtomicU64::new(0),
requests_rate_limited: AtomicU64::new(0),
requests_failed: AtomicU64::new(0),
reconcile_runs: AtomicU64::new(0),
reconcile_errors: AtomicU64::new(0),
reconcile_duration_ms: AtomicU64::new(0),
instances_created: AtomicU64::new(0),
instances_started: AtomicU64::new(0),
instances_stopped: AtomicU64::new(0),
instances_slept: AtomicU64::new(0),
instances_woken: AtomicU64::new(0),
instances_destroyed: AtomicU64::new(0),
instances_deferred: AtomicU64::new(0),
connections_accepted: AtomicU64::new(0),
connections_rejected: AtomicU64::new(0),
build_image_duration_ms: AtomicU64::new(0),
vm_start_duration_ms: AtomicU64::new(0),
vsock_handshake_rtt_ms: AtomicU64::new(0),
}
}
pub fn snapshot(&self) -> MetricsSnapshot {
MetricsSnapshot {
requests_total: self.requests_total.load(Ordering::Relaxed),
requests_reconcile: self.requests_reconcile.load(Ordering::Relaxed),
requests_node_info: self.requests_node_info.load(Ordering::Relaxed),
requests_node_stats: self.requests_node_stats.load(Ordering::Relaxed),
requests_tenant_list: self.requests_tenant_list.load(Ordering::Relaxed),
requests_instance_list: self.requests_instance_list.load(Ordering::Relaxed),
requests_wake: self.requests_wake.load(Ordering::Relaxed),
requests_rate_limited: self.requests_rate_limited.load(Ordering::Relaxed),
requests_failed: self.requests_failed.load(Ordering::Relaxed),
reconcile_runs: self.reconcile_runs.load(Ordering::Relaxed),
reconcile_errors: self.reconcile_errors.load(Ordering::Relaxed),
reconcile_duration_ms: self.reconcile_duration_ms.load(Ordering::Relaxed),
instances_created: self.instances_created.load(Ordering::Relaxed),
instances_started: self.instances_started.load(Ordering::Relaxed),
instances_stopped: self.instances_stopped.load(Ordering::Relaxed),
instances_slept: self.instances_slept.load(Ordering::Relaxed),
instances_woken: self.instances_woken.load(Ordering::Relaxed),
instances_destroyed: self.instances_destroyed.load(Ordering::Relaxed),
instances_deferred: self.instances_deferred.load(Ordering::Relaxed),
connections_accepted: self.connections_accepted.load(Ordering::Relaxed),
connections_rejected: self.connections_rejected.load(Ordering::Relaxed),
build_image_duration_ms: self.build_image_duration_ms.load(Ordering::Relaxed),
vm_start_duration_ms: self.vm_start_duration_ms.load(Ordering::Relaxed),
vsock_handshake_rtt_ms: self.vsock_handshake_rtt_ms.load(Ordering::Relaxed),
}
}
pub fn prometheus_exposition(&self) -> String {
let s = self.snapshot();
let mut out = String::with_capacity(2048);
write_metric(
&mut out,
"mvm_requests_total",
s.requests_total,
"Total QUIC API requests received",
);
write_metric(
&mut out,
"mvm_requests_reconcile_total",
s.requests_reconcile,
"Reconcile requests",
);
write_metric(
&mut out,
"mvm_requests_node_info_total",
s.requests_node_info,
"NodeInfo requests",
);
write_metric(
&mut out,
"mvm_requests_node_stats_total",
s.requests_node_stats,
"NodeStats requests",
);
write_metric(
&mut out,
"mvm_requests_tenant_list_total",
s.requests_tenant_list,
"TenantList requests",
);
write_metric(
&mut out,
"mvm_requests_instance_list_total",
s.requests_instance_list,
"InstanceList requests",
);
write_metric(
&mut out,
"mvm_requests_wake_total",
s.requests_wake,
"WakeInstance requests",
);
write_metric(
&mut out,
"mvm_requests_rate_limited_total",
s.requests_rate_limited,
"Rate-limited requests",
);
write_metric(
&mut out,
"mvm_requests_failed_total",
s.requests_failed,
"Failed requests",
);
write_metric(
&mut out,
"mvm_reconcile_runs_total",
s.reconcile_runs,
"Reconcile loop executions",
);
write_metric(
&mut out,
"mvm_reconcile_errors_total",
s.reconcile_errors,
"Reconcile errors",
);
write_metric(
&mut out,
"mvm_reconcile_duration_milliseconds",
s.reconcile_duration_ms,
"Last reconcile duration in ms",
);
write_metric(
&mut out,
"mvm_instances_created_total",
s.instances_created,
"Instances created",
);
write_metric(
&mut out,
"mvm_instances_started_total",
s.instances_started,
"Instances started",
);
write_metric(
&mut out,
"mvm_instances_stopped_total",
s.instances_stopped,
"Instances stopped",
);
write_metric(
&mut out,
"mvm_instances_slept_total",
s.instances_slept,
"Instances slept",
);
write_metric(
&mut out,
"mvm_instances_woken_total",
s.instances_woken,
"Instances woken",
);
write_metric(
&mut out,
"mvm_instances_destroyed_total",
s.instances_destroyed,
"Instances destroyed",
);
write_metric(
&mut out,
"mvm_instances_deferred_total",
s.instances_deferred,
"Instances deferred by min-runtime policy",
);
write_metric(
&mut out,
"mvm_connections_accepted_total",
s.connections_accepted,
"Connections accepted",
);
write_metric(
&mut out,
"mvm_connections_rejected_total",
s.connections_rejected,
"Connections rejected",
);
write_gauge(
&mut out,
"mvm_build_image_duration_milliseconds",
s.build_image_duration_ms,
"Last build_image() duration in milliseconds",
);
write_gauge(
&mut out,
"mvm_vm_start_duration_milliseconds",
s.vm_start_duration_ms,
"Last VM start duration in milliseconds",
);
write_gauge(
&mut out,
"mvm_vsock_handshake_rtt_milliseconds",
s.vsock_handshake_rtt_ms,
"Last vsock auth handshake RTT in milliseconds",
);
out
}
}
fn write_metric(out: &mut String, name: &str, value: u64, help: &str) {
use std::fmt::Write;
let _ = writeln!(out, "# HELP {} {}", name, help);
let _ = writeln!(out, "# TYPE {} counter", name);
let _ = writeln!(out, "{} {}", name, value);
}
fn write_gauge(out: &mut String, name: &str, value: u64, help: &str) {
use std::fmt::Write;
let _ = writeln!(out, "# HELP {} {}", name, help);
let _ = writeln!(out, "# TYPE {} gauge", name);
let _ = writeln!(out, "{} {}", name, value);
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricsSnapshot {
pub requests_total: u64,
pub requests_reconcile: u64,
pub requests_node_info: u64,
pub requests_node_stats: u64,
pub requests_tenant_list: u64,
pub requests_instance_list: u64,
pub requests_wake: u64,
pub requests_rate_limited: u64,
pub requests_failed: u64,
pub reconcile_runs: u64,
pub reconcile_errors: u64,
pub reconcile_duration_ms: u64,
pub instances_created: u64,
pub instances_started: u64,
pub instances_stopped: u64,
pub instances_slept: u64,
pub instances_woken: u64,
pub instances_destroyed: u64,
pub instances_deferred: u64,
pub connections_accepted: u64,
pub connections_rejected: u64,
pub build_image_duration_ms: u64,
pub vm_start_duration_ms: u64,
pub vsock_handshake_rtt_ms: u64,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metrics_increment() {
let m = Metrics::new();
m.requests_total.fetch_add(1, Ordering::Relaxed);
m.requests_total.fetch_add(1, Ordering::Relaxed);
assert_eq!(m.requests_total.load(Ordering::Relaxed), 2);
}
#[test]
fn test_metrics_snapshot() {
let m = Metrics::new();
m.instances_created.fetch_add(5, Ordering::Relaxed);
m.reconcile_runs.fetch_add(3, Ordering::Relaxed);
let snap = m.snapshot();
assert_eq!(snap.instances_created, 5);
assert_eq!(snap.reconcile_runs, 3);
assert_eq!(snap.requests_total, 0);
}
#[test]
fn test_metrics_snapshot_roundtrip() {
let m = Metrics::new();
m.requests_total.fetch_add(10, Ordering::Relaxed);
let snap = m.snapshot();
let json = serde_json::to_string(&snap).unwrap();
assert!(json.contains("\"requests_total\":10"));
}
#[test]
fn test_prometheus_exposition_format() {
let m = Metrics::new();
m.requests_total.fetch_add(42, Ordering::Relaxed);
m.connections_accepted.fetch_add(7, Ordering::Relaxed);
let prom = m.prometheus_exposition();
assert!(prom.contains("# HELP mvm_requests_total"));
assert!(prom.contains("# TYPE mvm_requests_total counter"));
assert!(prom.contains("mvm_requests_total 42"));
assert!(prom.contains("mvm_connections_accepted_total 7"));
}
#[test]
fn test_timing_gauges_store_and_snapshot() {
let m = Metrics::new();
m.build_image_duration_ms.store(1234, Ordering::Relaxed);
m.vm_start_duration_ms.store(567, Ordering::Relaxed);
m.vsock_handshake_rtt_ms.store(89, Ordering::Relaxed);
let snap = m.snapshot();
assert_eq!(snap.build_image_duration_ms, 1234);
assert_eq!(snap.vm_start_duration_ms, 567);
assert_eq!(snap.vsock_handshake_rtt_ms, 89);
}
#[test]
fn test_timing_gauges_prometheus_type_is_gauge() {
let m = Metrics::new();
m.build_image_duration_ms.store(100, Ordering::Relaxed);
let prom = m.prometheus_exposition();
assert!(prom.contains("# TYPE mvm_build_image_duration_milliseconds gauge"));
assert!(prom.contains("# TYPE mvm_vm_start_duration_milliseconds gauge"));
assert!(prom.contains("# TYPE mvm_vsock_handshake_rtt_milliseconds gauge"));
}
#[test]
fn test_timing_gauges_prometheus_values() {
let m = Metrics::new();
m.vm_start_duration_ms.store(42, Ordering::Relaxed);
let prom = m.prometheus_exposition();
assert!(prom.contains("mvm_vm_start_duration_milliseconds 42"));
}
}