codec/gpu/utilization.rs
1//! `GpuUtilizationReader` — live per-GPU utilisation snapshots via NVML / sysfs.
2
3use super::types::{GpuDevice, GpuVendor, GpuUtilization};
4
5/// One-shot accumulator that opens NVML once and reads per-GPU
6/// utilisation for every NVIDIA device on each load tick. Holding
7/// the NVML handle across reads avoids the init cost
8/// (microseconds) on every tick and is the documented pattern.
9pub struct GpuUtilizationReader {
10 nvml: Option<nvml_wrapper::Nvml>,
11}
12
13impl GpuUtilizationReader {
14 /// Build a reader. NVML init failure is non-fatal — the reader
15 /// folds to "all zeroes" on every NVIDIA device and the rest of
16 /// the load-tick path stays alive. Logged once at startup so
17 /// operators can tell "no NVIDIA card" from "NVIDIA card but
18 /// driver missing".
19 pub fn new() -> Self {
20 let nvml = match super::nvidia::init_nvml_with_fallback() {
21 Ok(n) => Some(n),
22 Err(e) => {
23 // info-level: many production hosts are AMD/Intel-only
24 // and this isn't a problem. Operators looking at the
25 // dev box logs see this once at boot.
26 tracing::info!(error = %e, "nvml not available; NVIDIA GPU utilisation will be 0");
27 None
28 }
29 };
30 Self { nvml }
31 }
32
33 /// Read the per-tick snapshot for one device. Cheap when NVML is
34 /// available (handful of FFI calls); free when it's not (returns
35 /// the zero-initialised default).
36 pub fn read(&self, device: &GpuDevice) -> GpuUtilization {
37 match device.vendor {
38 GpuVendor::Nvidia => self.read_nvidia(device).unwrap_or_default(),
39 GpuVendor::Intel => self.read_intel(device).unwrap_or_default(),
40 GpuVendor::Amd => GpuUtilization::default(),
41 }
42 }
43
44 fn read_nvidia(&self, device: &GpuDevice) -> Option<GpuUtilization> {
45 let nvml = self.nvml.as_ref()?;
46 let dev = nvml.device_by_index(device.index).ok()?;
47 let util = dev.utilization_rates().ok();
48 // EncoderUtilizationInfo / DecoderUtilizationInfo have a
49 // `utilization` field (0..=100) plus a sampling period; we
50 // surface only the percentage.
51 let enc = dev.encoder_utilization().ok();
52 let dec = dev.decoder_utilization().ok();
53 let mem = dev.memory_info().ok();
54 let temp = dev
55 .temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu)
56 .ok()
57 .and_then(|t| u8::try_from(t).ok());
58 Some(GpuUtilization {
59 util_percent: util.as_ref().map(|u| u.gpu.min(100) as u8).unwrap_or(0),
60 encoder_percent: enc
61 .as_ref()
62 .map(|e| e.utilization.min(100) as u8)
63 .unwrap_or(0),
64 decoder_percent: dec
65 .as_ref()
66 .map(|d| d.utilization.min(100) as u8)
67 .unwrap_or(0),
68 mem_used_mib: mem
69 .as_ref()
70 .map(|m| (m.used / 1024 / 1024) as u32)
71 .unwrap_or(0),
72 mem_total_mib: mem
73 .as_ref()
74 .map(|m| (m.total / 1024 / 1024) as u32)
75 .unwrap_or(device.vram_mib as u32),
76 temperature_c: temp,
77 })
78 }
79
80 /// Intel stand-in via sysfs `gt_cur_freq_mhz` / `gt_max_freq_mhz`
81 /// for a coarse "busy" proxy and `mem_info_vram_used` for memory.
82 /// The i915 driver doesn't expose per-engine busy% via sysfs
83 /// cleanly — `intel_gpu_top -J` is the proper source but the
84 /// fork+capture cost on every 5 s tick is heavy. Phase 1: leave
85 /// encoder/decoder at 0 and let `util_percent` be the freq-ratio
86 /// proxy; real fix is the perf event interface (`i915_pmu`)
87 /// which deserves its own task.
88 #[cfg(target_os = "linux")]
89 fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
90 // We don't have the bdf here, so walk /sys/class/drm/cardN
91 // for an Intel card. Index 0 returns the first one that
92 // matches; multi-Intel hosts (rare today) get the same
93 // utilisation reported across both — acceptable until the
94 // proper i915_pmu integration lands.
95 let mut out = GpuUtilization::default();
96 if let Ok(entries) = std::fs::read_dir("/sys/class/drm") {
97 for entry in entries.flatten() {
98 let name = entry.file_name();
99 let Some(name_str) = name.to_str() else {
100 continue;
101 };
102 if !name_str.starts_with("card") || name_str.contains('-') {
103 continue;
104 }
105 // Confirm Intel via vendor file under device link.
106 let device_link = entry.path().join("device").join("vendor");
107 let vendor = std::fs::read_to_string(&device_link).unwrap_or_default();
108 if vendor.trim() != "0x8086" {
109 continue;
110 }
111 let cur = std::fs::read_to_string(entry.path().join("gt_cur_freq_mhz"))
112 .ok()
113 .and_then(|s| s.trim().parse::<u32>().ok());
114 let max = std::fs::read_to_string(entry.path().join("gt_max_freq_mhz"))
115 .ok()
116 .and_then(|s| s.trim().parse::<u32>().ok());
117 if let (Some(cur), Some(max)) = (cur, max) {
118 if max > 0 {
119 out.util_percent = ((cur as u64 * 100 / max as u64).min(100)) as u8;
120 }
121 }
122 let used = std::fs::read_to_string(
123 entry.path().join("device").join("mem_info_vram_used"),
124 )
125 .ok()
126 .and_then(|s| s.trim().parse::<u64>().ok());
127 let total = std::fs::read_to_string(
128 entry.path().join("device").join("mem_info_vram_total"),
129 )
130 .ok()
131 .and_then(|s| s.trim().parse::<u64>().ok());
132 if let Some(u) = used {
133 out.mem_used_mib = (u / 1024 / 1024) as u32;
134 }
135 if let Some(t) = total {
136 out.mem_total_mib = (t / 1024 / 1024) as u32;
137 }
138 // Fall back to the catalog VRAM total stored on the
139 // device record when sysfs didn't expose it. The dev
140 // box's kernel doesn't have mem_info_vram_total, so
141 // without this Intel cards report 0 / 0 forever.
142 if out.mem_total_mib == 0 && _device.vram_mib > 0 {
143 out.mem_total_mib = _device.vram_mib as u32;
144 }
145 // Fall back to DRM fdinfo aggregation when sysfs didn't
146 // expose `mem_info_vram_used` (older kernels). Filtered
147 // to this card's PCI BDF so multi-Intel hosts report
148 // per-device used memory, not the cross-card total.
149 // This is the same source `intel_gpu_top -J` and `nvtop`
150 // use, available since kernel ~5.19 (i915) / ~6.8 (xe).
151 if out.mem_used_mib == 0 {
152 let bdf = super::sysfs::read_pci_bdf_from_drm_card(&entry.path());
153 if let Some(bytes) =
154 super::sysfs::read_intel_vram_resident_bytes(bdf.as_deref())
155 {
156 out.mem_used_mib = (bytes / 1024 / 1024) as u32;
157 }
158 }
159 return Some(out);
160 }
161 }
162 if out.mem_total_mib == 0 && _device.vram_mib > 0 {
163 out.mem_total_mib = _device.vram_mib as u32;
164 }
165 Some(out)
166 }
167
168 #[cfg(not(target_os = "linux"))]
169 fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
170 // Windows path for Intel hosts is performance-counter via
171 // the WMI `Win32_PerfFormattedData_GPUPerformanceCounters_GPUEngine`
172 // surface — same fork-cost concern as `intel_gpu_top` on
173 // Linux, deferred. Returns all zeroes.
174 Some(GpuUtilization::default())
175 }
176}
177
178impl Default for GpuUtilizationReader {
179 fn default() -> Self {
180 Self::new()
181 }
182}