trueno/monitor/metrics.rs
1//! GPU metrics structs (TRUENO-SPEC-010 Section 4)
2
3use std::time::{Duration, Instant};
4
5use super::GpuMemoryMetrics;
6
7// ============================================================================
8// GPU Utilization Metrics (TRUENO-SPEC-010 Section 4.1.1)
9// ============================================================================
10
11/// GPU utilization metrics
12#[derive(Debug, Clone, Copy, Default)]
13pub struct GpuUtilization {
14 /// GPU compute utilization (0-100%)
15 pub gpu_percent: u32,
16 /// Memory controller utilization (0-100%)
17 pub memory_percent: u32,
18 /// Video encoder utilization (0-100%), if available
19 pub encoder_percent: Option<u32>,
20 /// Video decoder utilization (0-100%), if available
21 pub decoder_percent: Option<u32>,
22}
23
24// ============================================================================
25// GPU Thermal Metrics (TRUENO-SPEC-010 Section 4.1.3)
26// ============================================================================
27
28/// GPU thermal metrics
29#[derive(Debug, Clone, Copy, Default)]
30pub struct GpuThermalMetrics {
31 /// Current temperature in Celsius
32 pub temperature_celsius: u32,
33 /// Shutdown threshold temperature, if available
34 pub temperature_threshold_shutdown: Option<u32>,
35 /// Fan speed percentage (0-100), if available
36 pub fan_speed_percent: Option<u32>,
37}
38
39impl GpuThermalMetrics {
40 /// Check if temperature is in safe range (< 80°C)
41 #[must_use]
42 pub const fn is_safe(&self) -> bool {
43 self.temperature_celsius < 80
44 }
45
46 /// Check if temperature is critical (>= 90°C)
47 #[must_use]
48 pub const fn is_critical(&self) -> bool {
49 self.temperature_celsius >= 90
50 }
51
52 /// Get thermal status string
53 #[must_use]
54 pub const fn status(&self) -> &'static str {
55 match self.temperature_celsius {
56 0..=50 => "COOL",
57 51..=70 => "WARM",
58 71..=85 => "HOT",
59 86.. => "CRITICAL",
60 }
61 }
62}
63
64// ============================================================================
65// GPU Power Metrics (TRUENO-SPEC-010 Section 4.1.3)
66// ============================================================================
67
68/// GPU power metrics
69#[derive(Debug, Clone, Copy, Default)]
70pub struct GpuPowerMetrics {
71 /// Current power draw in watts
72 pub power_draw_watts: f32,
73 /// Power limit (TDP) in watts
74 pub power_limit_watts: f32,
75 /// Power state (P-state, 0 = highest performance)
76 pub power_state: u32,
77}
78
79impl GpuPowerMetrics {
80 /// Calculate power usage percentage
81 #[must_use]
82 pub fn usage_percent(&self) -> f64 {
83 if self.power_limit_watts <= 0.0 {
84 0.0
85 } else {
86 (self.power_draw_watts as f64 / self.power_limit_watts as f64) * 100.0
87 }
88 }
89}
90
91// ============================================================================
92// GPU Clock Metrics (TRUENO-SPEC-010 Section 4.1.4)
93// ============================================================================
94
95/// GPU clock metrics
96#[derive(Debug, Clone, Copy, Default)]
97pub struct GpuClockMetrics {
98 /// Graphics/shader clock in MHz
99 pub graphics_mhz: u32,
100 /// Memory clock in MHz
101 pub memory_mhz: u32,
102 /// SM clock in MHz (NVIDIA), if available
103 pub sm_mhz: Option<u32>,
104}
105
106// ============================================================================
107// GPU PCIe Metrics (TRUENO-SPEC-010 Section 4.1.5)
108// ============================================================================
109
110/// GPU PCIe metrics
111#[derive(Debug, Clone, Copy, Default)]
112pub struct GpuPcieMetrics {
113 /// PCIe TX throughput in bytes/sec
114 pub tx_bytes_per_sec: u64,
115 /// PCIe RX throughput in bytes/sec
116 pub rx_bytes_per_sec: u64,
117 /// PCIe link generation (1-5)
118 pub link_gen: u32,
119 /// PCIe link width (lanes)
120 pub link_width: u32,
121}
122
123// ============================================================================
124// Combined GPU Metrics Snapshot (TRUENO-SPEC-010 Section 4.2)
125// ============================================================================
126
127/// Complete GPU metrics snapshot
128///
129/// Contains all available metrics at a point in time.
130#[derive(Debug, Clone)]
131pub struct GpuMetrics {
132 /// Timestamp of measurement
133 pub timestamp: Instant,
134 /// Device index
135 pub device_index: u32,
136 /// Memory metrics
137 pub memory: GpuMemoryMetrics,
138 /// Utilization metrics
139 pub utilization: GpuUtilization,
140 /// Thermal metrics (if available)
141 pub thermal: Option<GpuThermalMetrics>,
142 /// Power metrics (if available)
143 pub power: Option<GpuPowerMetrics>,
144 /// Clock metrics (if available)
145 pub clocks: Option<GpuClockMetrics>,
146 /// PCIe metrics (if available)
147 pub pcie: Option<GpuPcieMetrics>,
148}
149
150impl GpuMetrics {
151 /// Create a new metrics snapshot with only memory info
152 #[must_use]
153 pub fn new(device_index: u32, memory: GpuMemoryMetrics) -> Self {
154 Self {
155 timestamp: Instant::now(),
156 device_index,
157 memory,
158 utilization: GpuUtilization::default(),
159 thermal: None,
160 power: None,
161 clocks: None,
162 pcie: None,
163 }
164 }
165
166 /// Age of this snapshot
167 #[must_use]
168 pub fn age(&self) -> Duration {
169 self.timestamp.elapsed()
170 }
171}