Skip to main content

codec/gpu/
utilization.rs

1//! `GpuUtilizationReader` — live per-GPU utilisation snapshots via NVML / sysfs.
2
3use super::types::{GpuDevice, GpuVendor, GpuUtilization};
4
5/// One-shot accumulator that opens NVML once and reads per-GPU
6/// utilisation for every NVIDIA device on each load tick. Holding
7/// the NVML handle across reads avoids the init cost
8/// (microseconds) on every tick and is the documented pattern.
9pub struct GpuUtilizationReader {
10    nvml: Option<nvml_wrapper::Nvml>,
11}
12
13impl GpuUtilizationReader {
14    /// Build a reader. NVML init failure is non-fatal — the reader
15    /// folds to "all zeroes" on every NVIDIA device and the rest of
16    /// the load-tick path stays alive. Logged once at startup so
17    /// operators can tell "no NVIDIA card" from "NVIDIA card but
18    /// driver missing".
19    pub fn new() -> Self {
20        let nvml = match super::nvidia::init_nvml_with_fallback() {
21            Ok(n) => Some(n),
22            Err(e) => {
23                // info-level: many production hosts are AMD/Intel-only
24                // and this isn't a problem. Operators looking at the
25                // dev box logs see this once at boot.
26                tracing::info!(error = %e, "nvml not available; NVIDIA GPU utilisation will be 0");
27                None
28            }
29        };
30        Self { nvml }
31    }
32
33    /// Read the per-tick snapshot for one device. Cheap when NVML is
34    /// available (handful of FFI calls); free when it's not (returns
35    /// the zero-initialised default).
36    pub fn read(&self, device: &GpuDevice) -> GpuUtilization {
37        match device.vendor {
38            GpuVendor::Nvidia => self.read_nvidia(device).unwrap_or_default(),
39            GpuVendor::Intel => self.read_intel(device).unwrap_or_default(),
40            GpuVendor::Amd => GpuUtilization::default(),
41        }
42    }
43
44    fn read_nvidia(&self, device: &GpuDevice) -> Option<GpuUtilization> {
45        let nvml = self.nvml.as_ref()?;
46        let dev = nvml.device_by_index(device.index).ok()?;
47        let util = dev.utilization_rates().ok();
48        // EncoderUtilizationInfo / DecoderUtilizationInfo have a
49        // `utilization` field (0..=100) plus a sampling period; we
50        // surface only the percentage.
51        let enc = dev.encoder_utilization().ok();
52        let dec = dev.decoder_utilization().ok();
53        let mem = dev.memory_info().ok();
54        let temp = dev
55            .temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu)
56            .ok()
57            .and_then(|t| u8::try_from(t).ok());
58        Some(GpuUtilization {
59            util_percent: util.as_ref().map(|u| u.gpu.min(100) as u8).unwrap_or(0),
60            encoder_percent: enc
61                .as_ref()
62                .map(|e| e.utilization.min(100) as u8)
63                .unwrap_or(0),
64            decoder_percent: dec
65                .as_ref()
66                .map(|d| d.utilization.min(100) as u8)
67                .unwrap_or(0),
68            mem_used_mib: mem
69                .as_ref()
70                .map(|m| (m.used / 1024 / 1024) as u32)
71                .unwrap_or(0),
72            mem_total_mib: mem
73                .as_ref()
74                .map(|m| (m.total / 1024 / 1024) as u32)
75                .unwrap_or(device.vram_mib as u32),
76            temperature_c: temp,
77        })
78    }
79
80    /// Intel stand-in via sysfs `gt_cur_freq_mhz` / `gt_max_freq_mhz`
81    /// for a coarse "busy" proxy and `mem_info_vram_used` for memory.
82    /// The i915 driver doesn't expose per-engine busy% via sysfs
83    /// cleanly — `intel_gpu_top -J` is the proper source but the
84    /// fork+capture cost on every 5 s tick is heavy. Phase 1: leave
85    /// encoder/decoder at 0 and let `util_percent` be the freq-ratio
86    /// proxy; real fix is the perf event interface (`i915_pmu`)
87    /// which deserves its own task.
88    #[cfg(target_os = "linux")]
89    fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
90        // We don't have the bdf here, so walk /sys/class/drm/cardN
91        // for an Intel card. Index 0 returns the first one that
92        // matches; multi-Intel hosts (rare today) get the same
93        // utilisation reported across both — acceptable until the
94        // proper i915_pmu integration lands.
95        let mut out = GpuUtilization::default();
96        if let Ok(entries) = std::fs::read_dir("/sys/class/drm") {
97            for entry in entries.flatten() {
98                let name = entry.file_name();
99                let Some(name_str) = name.to_str() else {
100                    continue;
101                };
102                if !name_str.starts_with("card") || name_str.contains('-') {
103                    continue;
104                }
105                // Confirm Intel via vendor file under device link.
106                let device_link = entry.path().join("device").join("vendor");
107                let vendor = std::fs::read_to_string(&device_link).unwrap_or_default();
108                if vendor.trim() != "0x8086" {
109                    continue;
110                }
111                let cur = std::fs::read_to_string(entry.path().join("gt_cur_freq_mhz"))
112                    .ok()
113                    .and_then(|s| s.trim().parse::<u32>().ok());
114                let max = std::fs::read_to_string(entry.path().join("gt_max_freq_mhz"))
115                    .ok()
116                    .and_then(|s| s.trim().parse::<u32>().ok());
117                if let (Some(cur), Some(max)) = (cur, max) {
118                    if max > 0 {
119                        out.util_percent = ((cur as u64 * 100 / max as u64).min(100)) as u8;
120                    }
121                }
122                let used = std::fs::read_to_string(
123                    entry.path().join("device").join("mem_info_vram_used"),
124                )
125                .ok()
126                .and_then(|s| s.trim().parse::<u64>().ok());
127                let total = std::fs::read_to_string(
128                    entry.path().join("device").join("mem_info_vram_total"),
129                )
130                .ok()
131                .and_then(|s| s.trim().parse::<u64>().ok());
132                if let Some(u) = used {
133                    out.mem_used_mib = (u / 1024 / 1024) as u32;
134                }
135                if let Some(t) = total {
136                    out.mem_total_mib = (t / 1024 / 1024) as u32;
137                }
138                // Fall back to the catalog VRAM total stored on the
139                // device record when sysfs didn't expose it. The dev
140                // box's kernel doesn't have mem_info_vram_total, so
141                // without this Intel cards report 0 / 0 forever.
142                if out.mem_total_mib == 0 && _device.vram_mib > 0 {
143                    out.mem_total_mib = _device.vram_mib as u32;
144                }
145                // Fall back to DRM fdinfo aggregation when sysfs didn't
146                // expose `mem_info_vram_used` (older kernels). Filtered
147                // to this card's PCI BDF so multi-Intel hosts report
148                // per-device used memory, not the cross-card total.
149                // This is the same source `intel_gpu_top -J` and `nvtop`
150                // use, available since kernel ~5.19 (i915) / ~6.8 (xe).
151                if out.mem_used_mib == 0 {
152                    let bdf = super::sysfs::read_pci_bdf_from_drm_card(&entry.path());
153                    if let Some(bytes) =
154                        super::sysfs::read_intel_vram_resident_bytes(bdf.as_deref())
155                    {
156                        out.mem_used_mib = (bytes / 1024 / 1024) as u32;
157                    }
158                }
159                return Some(out);
160            }
161        }
162        if out.mem_total_mib == 0 && _device.vram_mib > 0 {
163            out.mem_total_mib = _device.vram_mib as u32;
164        }
165        Some(out)
166    }
167
168    #[cfg(not(target_os = "linux"))]
169    fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
170        // Windows path for Intel hosts is performance-counter via
171        // the WMI `Win32_PerfFormattedData_GPUPerformanceCounters_GPUEngine`
172        // surface — same fork-cost concern as `intel_gpu_top` on
173        // Linux, deferred. Returns all zeroes.
174        Some(GpuUtilization::default())
175    }
176}
177
178impl Default for GpuUtilizationReader {
179    fn default() -> Self {
180        Self::new()
181    }
182}