Skip to main content

trueno/monitor/
metrics.rs

1//! GPU metrics structs (TRUENO-SPEC-010 Section 4)
2
3use std::time::{Duration, Instant};
4
5use super::GpuMemoryMetrics;
6
7// ============================================================================
8// GPU Utilization Metrics (TRUENO-SPEC-010 Section 4.1.1)
9// ============================================================================
10
11/// GPU utilization metrics
12#[derive(Debug, Clone, Copy, Default)]
13pub struct GpuUtilization {
14    /// GPU compute utilization (0-100%)
15    pub gpu_percent: u32,
16    /// Memory controller utilization (0-100%)
17    pub memory_percent: u32,
18    /// Video encoder utilization (0-100%), if available
19    pub encoder_percent: Option<u32>,
20    /// Video decoder utilization (0-100%), if available
21    pub decoder_percent: Option<u32>,
22}
23
24// ============================================================================
25// GPU Thermal Metrics (TRUENO-SPEC-010 Section 4.1.3)
26// ============================================================================
27
28/// GPU thermal metrics
29#[derive(Debug, Clone, Copy, Default)]
30pub struct GpuThermalMetrics {
31    /// Current temperature in Celsius
32    pub temperature_celsius: u32,
33    /// Shutdown threshold temperature, if available
34    pub temperature_threshold_shutdown: Option<u32>,
35    /// Fan speed percentage (0-100), if available
36    pub fan_speed_percent: Option<u32>,
37}
38
39impl GpuThermalMetrics {
40    /// Check if temperature is in safe range (< 80°C)
41    #[must_use]
42    pub const fn is_safe(&self) -> bool {
43        self.temperature_celsius < 80
44    }
45
46    /// Check if temperature is critical (>= 90°C)
47    #[must_use]
48    pub const fn is_critical(&self) -> bool {
49        self.temperature_celsius >= 90
50    }
51
52    /// Get thermal status string
53    #[must_use]
54    pub const fn status(&self) -> &'static str {
55        match self.temperature_celsius {
56            0..=50 => "COOL",
57            51..=70 => "WARM",
58            71..=85 => "HOT",
59            86.. => "CRITICAL",
60        }
61    }
62}
63
64// ============================================================================
65// GPU Power Metrics (TRUENO-SPEC-010 Section 4.1.3)
66// ============================================================================
67
68/// GPU power metrics
69#[derive(Debug, Clone, Copy, Default)]
70pub struct GpuPowerMetrics {
71    /// Current power draw in watts
72    pub power_draw_watts: f32,
73    /// Power limit (TDP) in watts
74    pub power_limit_watts: f32,
75    /// Power state (P-state, 0 = highest performance)
76    pub power_state: u32,
77}
78
79impl GpuPowerMetrics {
80    /// Calculate power usage percentage
81    #[must_use]
82    pub fn usage_percent(&self) -> f64 {
83        if self.power_limit_watts <= 0.0 {
84            0.0
85        } else {
86            (self.power_draw_watts as f64 / self.power_limit_watts as f64) * 100.0
87        }
88    }
89}
90
91// ============================================================================
92// GPU Clock Metrics (TRUENO-SPEC-010 Section 4.1.4)
93// ============================================================================
94
95/// GPU clock metrics
96#[derive(Debug, Clone, Copy, Default)]
97pub struct GpuClockMetrics {
98    /// Graphics/shader clock in MHz
99    pub graphics_mhz: u32,
100    /// Memory clock in MHz
101    pub memory_mhz: u32,
102    /// SM clock in MHz (NVIDIA), if available
103    pub sm_mhz: Option<u32>,
104}
105
106// ============================================================================
107// GPU PCIe Metrics (TRUENO-SPEC-010 Section 4.1.5)
108// ============================================================================
109
110/// GPU PCIe metrics
111#[derive(Debug, Clone, Copy, Default)]
112pub struct GpuPcieMetrics {
113    /// PCIe TX throughput in bytes/sec
114    pub tx_bytes_per_sec: u64,
115    /// PCIe RX throughput in bytes/sec
116    pub rx_bytes_per_sec: u64,
117    /// PCIe link generation (1-5)
118    pub link_gen: u32,
119    /// PCIe link width (lanes)
120    pub link_width: u32,
121}
122
123// ============================================================================
124// Combined GPU Metrics Snapshot (TRUENO-SPEC-010 Section 4.2)
125// ============================================================================
126
127/// Complete GPU metrics snapshot
128///
129/// Contains all available metrics at a point in time.
130#[derive(Debug, Clone)]
131pub struct GpuMetrics {
132    /// Timestamp of measurement
133    pub timestamp: Instant,
134    /// Device index
135    pub device_index: u32,
136    /// Memory metrics
137    pub memory: GpuMemoryMetrics,
138    /// Utilization metrics
139    pub utilization: GpuUtilization,
140    /// Thermal metrics (if available)
141    pub thermal: Option<GpuThermalMetrics>,
142    /// Power metrics (if available)
143    pub power: Option<GpuPowerMetrics>,
144    /// Clock metrics (if available)
145    pub clocks: Option<GpuClockMetrics>,
146    /// PCIe metrics (if available)
147    pub pcie: Option<GpuPcieMetrics>,
148}
149
150impl GpuMetrics {
151    /// Create a new metrics snapshot with only memory info
152    #[must_use]
153    pub fn new(device_index: u32, memory: GpuMemoryMetrics) -> Self {
154        Self {
155            timestamp: Instant::now(),
156            device_index,
157            memory,
158            utilization: GpuUtilization::default(),
159            thermal: None,
160            power: None,
161            clocks: None,
162            pcie: None,
163        }
164    }
165
166    /// Age of this snapshot
167    #[must_use]
168    pub fn age(&self) -> Duration {
169        self.timestamp.elapsed()
170    }
171}