Skip to main content

oximedia_gpu/
gpu_stats.rs

1#![allow(dead_code)]
2//! GPU statistics collection and monitoring.
3
4/// A measurable GPU statistic.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
6pub enum GpuStat {
7    /// Core utilization (0–100 %).
8    Utilization,
9    /// VRAM currently in use (bytes).
10    MemoryUsed,
11    /// Die temperature (degrees Celsius).
12    Temperature,
13    /// Board power draw (milliwatts).
14    PowerDraw,
15}
16
17impl GpuStat {
18    /// Return the SI/display unit for this statistic.
19    #[must_use]
20    pub fn unit(&self) -> &'static str {
21        match self {
22            Self::Utilization => "%",
23            Self::MemoryUsed => "bytes",
24            Self::Temperature => "°C",
25            Self::PowerDraw => "mW",
26        }
27    }
28
29    /// Returns `true` if this is a percentage-based stat.
30    #[must_use]
31    pub fn is_percentage(&self) -> bool {
32        matches!(self, Self::Utilization)
33    }
34
35    /// Returns `true` if this is a thermal stat.
36    #[must_use]
37    pub fn is_thermal(&self) -> bool {
38        matches!(self, Self::Temperature)
39    }
40}
41
42/// A single sample of a GPU statistic at a point in time.
43#[derive(Debug, Clone)]
44pub struct GpuStatSample {
45    /// Which statistic was measured.
46    pub stat: GpuStat,
47    /// The measured value.
48    pub value: f64,
49    /// Threshold above which the value is considered critical.
50    pub critical_threshold: f64,
51}
52
53impl GpuStatSample {
54    /// Create a new sample.
55    #[must_use]
56    pub fn new(stat: GpuStat, value: f64, critical_threshold: f64) -> Self {
57        Self {
58            stat,
59            value,
60            critical_threshold,
61        }
62    }
63
64    /// Returns `true` when the value exceeds the critical threshold.
65    #[must_use]
66    pub fn is_critical(&self) -> bool {
67        self.value >= self.critical_threshold
68    }
69
70    /// Returns how far the value is from the critical threshold (negative = safe).
71    #[must_use]
72    pub fn headroom(&self) -> f64 {
73        self.critical_threshold - self.value
74    }
75}
76
77/// Accumulated GPU statistics over a recording period.
78#[derive(Debug, Default)]
79pub struct GpuStats {
80    utilization_samples: Vec<f64>,
81    memory_used_samples: Vec<u64>,
82    temperature_samples: Vec<f64>,
83    power_draw_samples: Vec<f64>,
84    total_memory_bytes: u64,
85}
86
87impl GpuStats {
88    /// Create a new collector knowing total VRAM.
89    #[must_use]
90    pub fn new(total_memory_bytes: u64) -> Self {
91        Self {
92            total_memory_bytes,
93            ..Default::default()
94        }
95    }
96
97    /// Record a [`GpuStatSample`].
98    pub fn record(&mut self, sample: &GpuStatSample) {
99        match sample.stat {
100            GpuStat::Utilization => self.utilization_samples.push(sample.value),
101            GpuStat::MemoryUsed => self.memory_used_samples.push(sample.value as u64),
102            GpuStat::Temperature => self.temperature_samples.push(sample.value),
103            GpuStat::PowerDraw => self.power_draw_samples.push(sample.value),
104        }
105    }
106
107    /// Average utilization percentage over all recorded samples.
108    #[allow(clippy::cast_precision_loss)]
109    #[must_use]
110    pub fn utilization_pct(&self) -> f64 {
111        if self.utilization_samples.is_empty() {
112            return 0.0;
113        }
114        let sum: f64 = self.utilization_samples.iter().sum();
115        sum / self.utilization_samples.len() as f64
116    }
117
118    /// Average memory usage as a percentage of total VRAM.
119    #[allow(clippy::cast_precision_loss)]
120    #[must_use]
121    pub fn memory_pct(&self) -> f64 {
122        if self.memory_used_samples.is_empty() || self.total_memory_bytes == 0 {
123            return 0.0;
124        }
125        let sum: u64 = self.memory_used_samples.iter().sum();
126        let avg = sum as f64 / self.memory_used_samples.len() as f64;
127        (avg / self.total_memory_bytes as f64) * 100.0
128    }
129
130    /// Peak temperature recorded.
131    #[must_use]
132    pub fn peak_temperature(&self) -> Option<f64> {
133        self.temperature_samples.iter().copied().reduce(f64::max)
134    }
135
136    /// Average power draw in milliwatts.
137    #[allow(clippy::cast_precision_loss)]
138    #[must_use]
139    pub fn avg_power_draw_mw(&self) -> f64 {
140        if self.power_draw_samples.is_empty() {
141            return 0.0;
142        }
143        let sum: f64 = self.power_draw_samples.iter().sum();
144        sum / self.power_draw_samples.len() as f64
145    }
146
147    /// Total number of recorded samples across all stat types.
148    #[must_use]
149    pub fn sample_count(&self) -> usize {
150        self.utilization_samples.len()
151            + self.memory_used_samples.len()
152            + self.temperature_samples.len()
153            + self.power_draw_samples.len()
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn test_gpu_stat_unit_utilization() {
163        assert_eq!(GpuStat::Utilization.unit(), "%");
164    }
165
166    #[test]
167    fn test_gpu_stat_unit_memory() {
168        assert_eq!(GpuStat::MemoryUsed.unit(), "bytes");
169    }
170
171    #[test]
172    fn test_gpu_stat_unit_temperature() {
173        assert_eq!(GpuStat::Temperature.unit(), "°C");
174    }
175
176    #[test]
177    fn test_gpu_stat_unit_power() {
178        assert_eq!(GpuStat::PowerDraw.unit(), "mW");
179    }
180
181    #[test]
182    fn test_gpu_stat_is_percentage() {
183        assert!(GpuStat::Utilization.is_percentage());
184        assert!(!GpuStat::MemoryUsed.is_percentage());
185        assert!(!GpuStat::Temperature.is_percentage());
186    }
187
188    #[test]
189    fn test_gpu_stat_is_thermal() {
190        assert!(GpuStat::Temperature.is_thermal());
191        assert!(!GpuStat::Utilization.is_thermal());
192    }
193
194    #[test]
195    fn test_sample_is_critical_true() {
196        let s = GpuStatSample::new(GpuStat::Temperature, 95.0, 90.0);
197        assert!(s.is_critical());
198    }
199
200    #[test]
201    fn test_sample_is_critical_false() {
202        let s = GpuStatSample::new(GpuStat::Temperature, 75.0, 90.0);
203        assert!(!s.is_critical());
204    }
205
206    #[test]
207    fn test_sample_is_critical_at_threshold() {
208        let s = GpuStatSample::new(GpuStat::Utilization, 90.0, 90.0);
209        assert!(s.is_critical());
210    }
211
212    #[test]
213    fn test_sample_headroom() {
214        let s = GpuStatSample::new(GpuStat::PowerDraw, 200.0, 250.0);
215        assert!((s.headroom() - 50.0).abs() < 1e-9);
216    }
217
218    #[test]
219    fn test_stats_utilization_pct_empty() {
220        let stats = GpuStats::new(8 * 1024 * 1024 * 1024);
221        assert!((stats.utilization_pct() - 0.0).abs() < 1e-9);
222    }
223
224    #[test]
225    fn test_stats_record_and_utilization_pct() {
226        let mut stats = GpuStats::new(8 * 1024 * 1024 * 1024);
227        stats.record(&GpuStatSample::new(GpuStat::Utilization, 80.0, 100.0));
228        stats.record(&GpuStatSample::new(GpuStat::Utilization, 60.0, 100.0));
229        assert!((stats.utilization_pct() - 70.0).abs() < 1e-9);
230    }
231
232    #[test]
233    fn test_stats_memory_pct() {
234        let total = 8_000_000_000u64;
235        let mut stats = GpuStats::new(total);
236        stats.record(&GpuStatSample::new(
237            GpuStat::MemoryUsed,
238            4_000_000_000.0,
239            f64::MAX,
240        ));
241        let pct = stats.memory_pct();
242        assert!((pct - 50.0).abs() < 0.01);
243    }
244
245    #[test]
246    fn test_stats_peak_temperature() {
247        let mut stats = GpuStats::new(0);
248        assert!(stats.peak_temperature().is_none());
249        stats.record(&GpuStatSample::new(GpuStat::Temperature, 60.0, 100.0));
250        stats.record(&GpuStatSample::new(GpuStat::Temperature, 85.0, 100.0));
251        assert_eq!(stats.peak_temperature(), Some(85.0));
252    }
253
254    #[test]
255    fn test_stats_avg_power_draw() {
256        let mut stats = GpuStats::new(0);
257        stats.record(&GpuStatSample::new(GpuStat::PowerDraw, 100.0, 300.0));
258        stats.record(&GpuStatSample::new(GpuStat::PowerDraw, 200.0, 300.0));
259        assert!((stats.avg_power_draw_mw() - 150.0).abs() < 1e-9);
260    }
261
262    #[test]
263    fn test_stats_sample_count() {
264        let mut stats = GpuStats::new(0);
265        stats.record(&GpuStatSample::new(GpuStat::Utilization, 50.0, 100.0));
266        stats.record(&GpuStatSample::new(GpuStat::Temperature, 70.0, 90.0));
267        assert_eq!(stats.sample_count(), 2);
268    }
269}