codec/gpu.rs
1//! GPU device enumeration for NVDEC/NVENC scheduling.
2//!
3//! NVIDIA detection loads libcuda via dlopen, calls cuInit +
4//! cuDeviceGetCount + cuDeviceGetName. This works on minimal container
5//! images where the `nvidia-smi` binary may be absent but the driver's
6//! user-mode libraries are bind-mounted by the NVIDIA Container Toolkit.
7//! AMD/Intel detection scans /sys/bus/pci/devices on Linux.
8
9use std::ffi::{CStr, c_char, c_int, c_uint, c_void};
10use std::ptr;
11
12#[derive(Debug, Clone)]
13pub struct GpuDevice {
14 pub vendor: GpuVendor,
15 pub name: String,
16 pub index: u32,
17 /// Architecture / generation label, e.g. "Blackwell" (RTX 5060),
18 /// "Ada Lovelace" (RTX 4000-series), "Ampere" (RTX 3000), "Alchemist DG2"
19 /// (Arc A-series), "Battlemage BMG" (Arc B-series), "RDNA3" (RX 7000).
20 /// Phase 2 (2026-05-07) inventory page surface — derived from the PCI
21 /// device id at detect time so the inventory aggregations don't have
22 /// to re-derive it. "Unknown" when the device id falls outside the
23 /// per-vendor known-id table; preserved verbatim to the admin UI so
24 /// operators can spot fleet rows that need a label update.
25 pub generation: String,
26 /// Lowercase `vendor:device` PCI tuple, e.g. `"0x10de:0x2d05"`. Stable
27 /// identifier across driver / kernel versions. Empty string when the
28 /// platform path doesn't expose a device id (NVIDIA via CUDA on
29 /// Windows: cuda doesn't surface PCI; the field stays empty rather
30 /// than synthesise something misleading).
31 pub pci_id: String,
32 /// Total VRAM in MiB. NVIDIA via NVML `memory_info().total`; Intel via
33 /// `/sys/class/drm/cardN/device/mem_info_vram_total` when present;
34 /// AMD same. 0 when the platform path can't read it — admin UI shows
35 /// "—" for that case rather than "0 MiB".
36 pub vram_mib: u64,
37 /// Vendor-reported serial number of the physical card. NVIDIA via
38 /// NVML `Device::serial()` (returns the manufacturer's serial sticker
39 /// for cards that have one — datacenter Tesla / A10G / consumer Pro
40 /// cards expose it; consumer GeForce typically doesn't). Intel /
41 /// AMD: try `/sys/class/drm/cardN/device/serial[_number]` paths;
42 /// usually `None`. Stable identifier for warranty tracking + the
43 /// `transcoder_gpus` asset table — when present, the same card
44 /// across host moves dedups to a single row.
45 pub serial: Option<String>,
46 /// PCI host slot address, e.g. `"04:00.0"`. Used as the dedupe
47 /// fallback when `serial` is absent — assumes the card stays in
48 /// the same slot of the same host (the dev-box reality).
49 /// Empty when the platform path doesn't expose it.
50 pub host_pci_address: String,
51 /// Vendor portion of the PCI tuple as a standalone hex string,
52 /// e.g. `"0x10de"`. Already implicit in `pci_id` but exposed
53 /// separately so the SQL inventory query can index on it
54 /// without parsing.
55 pub vendor_id_hex: String,
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum GpuVendor {
60 Nvidia,
61 Amd,
62 Intel,
63}
64
65pub fn detect_gpus() -> Vec<GpuDevice> {
66 let mut devices = Vec::new();
67 devices.extend(detect_nvidia());
68 devices.extend(detect_amd());
69 devices.extend(detect_intel());
70 devices
71}
72
73/// Human-readable manufacturer label. Used by the WS hello frame's
74/// `WsGpuInfo.manufacturer` field and by the admin inventory page's
75/// "by manufacturer" rollup. Stays in lockstep with `vendor_label` in
76/// `transcoder/src/capabilities.rs` so the registration POST + the
77/// hello frame agree on the spelling.
78pub fn manufacturer_label(v: GpuVendor) -> &'static str {
79 match v {
80 GpuVendor::Nvidia => "NVIDIA",
81 GpuVendor::Amd => "AMD",
82 GpuVendor::Intel => "Intel",
83 }
84}
85
86pub fn has_nvidia() -> bool {
87 !detect_nvidia().is_empty()
88}
89
90// ─── NVIDIA via libcuda dlopen ─────────────────────────────────────
91type CUresult = c_int;
92type CUdevice = c_int;
93
94type FnCuInit = unsafe extern "C" fn(c_uint) -> CUresult;
95type FnCuDeviceGetCount = unsafe extern "C" fn(*mut c_int) -> CUresult;
96type FnCuDeviceGet = unsafe extern "C" fn(*mut CUdevice, c_int) -> CUresult;
97type FnCuDeviceGetName = unsafe extern "C" fn(*mut c_char, c_int, CUdevice) -> CUresult;
98
99fn detect_nvidia() -> Vec<GpuDevice> {
100 // Try the usual driver library names across Linux / Windows.
101 let lib = unsafe { libloading::Library::new("libcuda.so") }
102 .or_else(|_| unsafe { libloading::Library::new("libcuda.so.1") })
103 .or_else(|_| unsafe { libloading::Library::new("nvcuda.dll") });
104
105 let Ok(lib) = lib else { return Vec::new() };
106
107 unsafe {
108 let cu_init: libloading::Symbol<FnCuInit> = match lib.get(b"cuInit") {
109 Ok(f) => f,
110 Err(_) => return Vec::new(),
111 };
112 // Initialization flag is reserved — must be zero.
113 if cu_init(0) != 0 {
114 return Vec::new();
115 }
116
117 let cu_device_get_count: libloading::Symbol<FnCuDeviceGetCount> =
118 match lib.get(b"cuDeviceGetCount") {
119 Ok(f) => f,
120 Err(_) => return Vec::new(),
121 };
122 let mut count: c_int = 0;
123 if cu_device_get_count(&mut count) != 0 || count <= 0 {
124 return Vec::new();
125 }
126
127 let cu_device_get: libloading::Symbol<FnCuDeviceGet> = match lib.get(b"cuDeviceGet") {
128 Ok(f) => f,
129 Err(_) => return Vec::new(),
130 };
131 let cu_device_get_name: libloading::Symbol<FnCuDeviceGetName> =
132 match lib.get(b"cuDeviceGetName") {
133 Ok(f) => f,
134 Err(_) => return Vec::new(),
135 };
136
137 let mut devices = Vec::with_capacity(count as usize);
138 for ordinal in 0..count {
139 let mut dev: CUdevice = 0;
140 if cu_device_get(&mut dev, ordinal) != 0 {
141 continue;
142 }
143 let mut name_buf = [0i8; 256];
144 let name = if cu_device_get_name(
145 name_buf.as_mut_ptr() as *mut c_char,
146 name_buf.len() as c_int,
147 dev,
148 ) == 0
149 {
150 CStr::from_ptr(name_buf.as_ptr() as *const c_char)
151 .to_string_lossy()
152 .into_owned()
153 } else {
154 format!("NVIDIA GPU {ordinal}")
155 };
156 // Phase 2 (2026-05-07) richer inventory: try to enrich
157 // via NVML for VRAM total + PCI id + serial + bus address +
158 // generation. NVML failure (driver missing, NVML so/dll
159 // absent) leaves those fields empty/zero; the
160 // cuda-reported `name` is still authoritative for the
161 // substring-based AV1 dispatch in supports_av1_encode.
162 let nvml_lookup = nvidia_nvml_lookup(ordinal as u32);
163 let generation = nvidia_generation_from_name(&name);
164 devices.push(GpuDevice {
165 vendor: GpuVendor::Nvidia,
166 name,
167 index: ordinal as u32,
168 generation,
169 pci_id: nvml_lookup.pci_id,
170 vram_mib: nvml_lookup.vram_mib,
171 serial: nvml_lookup.serial,
172 host_pci_address: nvml_lookup.host_pci_address,
173 vendor_id_hex: "0x10de".into(),
174 });
175 }
176 // Silence unused-import warnings from the libloading bounds checks
177 let _ = ptr::null::<c_void>();
178 devices
179 }
180}
181
182/// Initialize NVML, trying both the unversioned and SONAME-versioned
183/// library names. The default `Nvml::init()` dlopens `libnvidia-ml.so`
184/// (no suffix) — but the NVIDIA Container Toolkit only mounts
185/// `libnvidia-ml.so.1` into containers, with no unversioned alias.
186/// On the dev box we observed the bare `init()` failing with
187/// "cannot open shared object file" while the `.so.1` was present.
188/// Fall back to the explicit SONAME path; if both fail, the caller
189/// folds to "no NVML available" same as before.
190fn init_nvml_with_fallback() -> Result<nvml_wrapper::Nvml, nvml_wrapper::error::NvmlError> {
191 match nvml_wrapper::Nvml::init() {
192 Ok(n) => Ok(n),
193 Err(_) => nvml_wrapper::Nvml::builder()
194 .lib_path(std::ffi::OsStr::new("libnvidia-ml.so.1"))
195 .init(),
196 }
197}
198
199/// NVML lookup result. Bundled into a struct so the call site stays
200/// self-documenting as we add fields for the Phase 2 (2026-05-07)
201/// inventory + asset-table extension.
202#[derive(Debug, Clone, Default)]
203struct NvmlLookup {
204 pci_id: String,
205 vram_mib: u64,
206 serial: Option<String>,
207 host_pci_address: String,
208}
209
210/// NVML lookup helper. Returns enrichment fields for the given CUDA
211/// ordinal. NVML's device handle indexing matches CUDA's ordinal in
212/// the common case (`CUDA_VISIBLE_DEVICES` empty / unset); mismatches
213/// are tolerated by returning all defaults on `device_by_index`
214/// errors, which the caller folds to empty/None.
215///
216/// NVML init is performed inside this function and torn down on return —
217/// repeated lookups during device enumeration share the same NVML
218/// process across the loop body via the `Nvml::init` call cost
219/// (microseconds) rather than holding a long-lived handle in static
220/// storage. Cross-platform: the `nvml-wrapper` crate dlopens
221/// `libnvidia-ml.so.1` on Linux and `nvml.dll` on Windows, same shape
222/// as our existing `libcuda` libloading path.
223fn nvidia_nvml_lookup(ordinal: u32) -> NvmlLookup {
224 let nvml = match init_nvml_with_fallback() {
225 Ok(n) => n,
226 Err(_) => return NvmlLookup::default(),
227 };
228 let device = match nvml.device_by_index(ordinal) {
229 Ok(d) => d,
230 Err(_) => return NvmlLookup::default(),
231 };
232 let (pci_id, host_pci_address) = match device.pci_info() {
233 Ok(p) => {
234 let id = format!(
235 "0x{:04x}:0x{:04x}",
236 p.pci_device_id >> 16,
237 p.pci_device_id & 0xFFFF
238 );
239 // bus_id format: "00000000:04:00.0". Strip the leading
240 // 0000-domain so the abbreviation matches the lspci /
241 // /sys/bus/pci/devices/<bdf> form admins recognise.
242 let bus = p
243 .bus_id
244 .trim_start_matches('0')
245 .trim_start_matches(':')
246 .to_string();
247 // If trimming above ate too much (single-domain "0000:..."),
248 // fall back to the raw bus_id; defensive against leading-zero
249 // pathological cases.
250 let host_pci = if bus.is_empty() {
251 p.bus_id.clone()
252 } else {
253 bus
254 };
255 (id, host_pci)
256 }
257 Err(_) => (String::new(), String::new()),
258 };
259 let vram_mib = match device.memory_info() {
260 Ok(m) => m.total / 1024 / 1024,
261 Err(_) => 0,
262 };
263 // `serial()` returns Err for cards without a serial sticker
264 // (consumer GeForce typically; datacenter Tesla / A10G expose it).
265 // Don't fail the worker — debug-log + None per coordinator's
266 // "graceful failure" guidance.
267 //
268 // Per NVML docs: "0 is not a valid serial for a nvidia card."
269 // Some consumer cards / driver-fallback paths return literal "0"
270 // instead of erroring. Treat that as None too so we don't
271 // mistakenly create asset rows keyed on a sentinel value.
272 let serial = match device.serial() {
273 Ok(s) => {
274 let trimmed = s.trim();
275 if trimmed.is_empty() || trimmed == "0" {
276 None
277 } else {
278 Some(trimmed.to_string())
279 }
280 }
281 Err(e) => {
282 tracing::debug!(error = %e, ordinal, "nvml serial unavailable");
283 None
284 }
285 };
286 NvmlLookup {
287 pci_id,
288 vram_mib,
289 serial,
290 host_pci_address,
291 }
292}
293
294/// NVIDIA generation lookup by marketing name substring. Matches
295/// the same convention `supports_av1_encode` uses (lowercase substring
296/// match) so the two stay in lockstep. Order matters: the more
297/// specific datacenter SKUs (B100/B200) are matched before the
298/// looser consumer family (5xxx) to avoid "B5060" — not a real SKU
299/// today, but defensive.
300fn nvidia_generation_from_name(name: &str) -> String {
301 let n = name.to_lowercase();
302 // Blackwell consumer (RTX 50xx) + datacenter (B100/B200/GB200).
303 if n.contains("rtx 50")
304 || n.contains("5050")
305 || n.contains("5060")
306 || n.contains("5070")
307 || n.contains("5080")
308 || n.contains("5090")
309 || n.contains("b100")
310 || n.contains("b200")
311 || n.contains("gb200")
312 {
313 return "Blackwell".into();
314 }
315 // Hopper datacenter (H100/H200). No NVENC silicon — surfaces in
316 // the inventory page so operators don't try to schedule encodes.
317 if n.contains("h100") || n.contains("h200") {
318 return "Hopper".into();
319 }
320 // Ada Lovelace: RTX 40xx + L4/L40 datacenter.
321 if n.contains("rtx 40")
322 || n.contains("4060")
323 || n.contains("4070")
324 || n.contains("4080")
325 || n.contains("4090")
326 || n.contains("ada")
327 || n.contains("l4")
328 || n.contains("l40")
329 {
330 return "Ada Lovelace".into();
331 }
332 // Ampere: RTX 30xx + A10/A10G/A100.
333 if n.contains("rtx 30")
334 || n.contains("3050")
335 || n.contains("3060")
336 || n.contains("3070")
337 || n.contains("3080")
338 || n.contains("3090")
339 || n.contains("a10")
340 || n.contains("a100")
341 || n.contains("ampere")
342 {
343 return "Ampere".into();
344 }
345 // Turing: RTX 20xx + T4.
346 if n.contains("rtx 20")
347 || n.contains("2060")
348 || n.contains("2070")
349 || n.contains("2080")
350 || n.contains(" t4")
351 || n.contains("turing")
352 {
353 return "Turing".into();
354 }
355 // Pascal: GTX 10xx + P100/P40/P4.
356 if n.contains("gtx 10")
357 || n.contains("1050")
358 || n.contains("1060")
359 || n.contains("1070")
360 || n.contains("1080")
361 || n.contains("p100")
362 || n.contains("p40")
363 || n.contains("pascal")
364 {
365 return "Pascal".into();
366 }
367 "Unknown".into()
368}
369
370fn detect_amd() -> Vec<GpuDevice> {
371 // Linux: check /sys/bus/pci/devices for AMD GPU (vendor 1002)
372 #[cfg(target_os = "linux")]
373 {
374 if let Ok(entries) = std::fs::read_dir("/sys/bus/pci/devices") {
375 let mut idx = 0u32;
376 return entries
377 .filter_map(|e| e.ok())
378 .filter_map(|entry| {
379 let vendor_path = entry.path().join("vendor");
380 let class_path = entry.path().join("class");
381 let vendor = std::fs::read_to_string(&vendor_path).ok()?;
382 let class = std::fs::read_to_string(&class_path).ok()?;
383 // VGA (0x030000) or 3D controller (0x030200)
384 if vendor.trim() == "0x1002" && class.trim().starts_with("0x0302") {
385 let device_path = entry.path().join("device");
386 let device = std::fs::read_to_string(&device_path)
387 .unwrap_or_default()
388 .trim()
389 .to_string();
390 let after = device.trim_start_matches("0x");
391 let pci_id = format!("0x1002:0x{after}");
392 let vram_mib = read_drm_vram_mib(&entry.path());
393 let generation = amd_generation_from_device_id(&device);
394 let host_pci_address = host_pci_address_from_sysfs(&entry.path());
395 let serial = read_drm_serial(&entry.path());
396 let dev = GpuDevice {
397 vendor: GpuVendor::Amd,
398 name: format!("AMD GPU {device}"),
399 index: idx,
400 generation,
401 pci_id,
402 vram_mib,
403 serial,
404 host_pci_address,
405 vendor_id_hex: "0x1002".into(),
406 };
407 idx += 1;
408 Some(dev)
409 } else {
410 None
411 }
412 })
413 .collect();
414 }
415 }
416 Vec::new()
417}
418
419fn detect_intel() -> Vec<GpuDevice> {
420 #[cfg(target_os = "linux")]
421 {
422 if let Ok(entries) = std::fs::read_dir("/sys/bus/pci/devices") {
423 let mut idx = 0u32;
424 return entries
425 .filter_map(|e| e.ok())
426 .filter_map(|entry| {
427 let vendor_path = entry.path().join("vendor");
428 let class_path = entry.path().join("class");
429 let device_path = entry.path().join("device");
430 let vendor = std::fs::read_to_string(&vendor_path).ok()?;
431 let class = std::fs::read_to_string(&class_path).ok()?;
432 if vendor.trim() == "0x8086" && class.trim().starts_with("0x0300") {
433 // Read the PCI device ID so we can label the GPU
434 // by family. Without this every Intel device was
435 // tagged "Intel Integrated GPU" — which made
436 // `supports_av1_encode`'s `contains("arc")`
437 // substring match miss the discrete Arc cards
438 // and silently route every job to rav1e CPU.
439 let device_id_str = std::fs::read_to_string(&device_path)
440 .ok()
441 .map(|s| s.trim().to_string())
442 .unwrap_or_default();
443 let name = intel_label_from_device_id(&device_id_str);
444 let pci_id = if device_id_str.starts_with("0x") {
445 format!("0x8086:{device_id_str}")
446 } else {
447 String::new()
448 };
449 // Prefer the live sysfs read (newer i915
450 // exposes total VRAM via mem_info_vram_total).
451 // Fall back to the static SKU catalog when the
452 // sysfs path is missing — the dev box's kernel
453 // is one of the older versions that doesn't
454 // export the field.
455 let live_vram = read_drm_vram_mib(&entry.path());
456 let vram_mib = if live_vram > 0 {
457 live_vram
458 } else {
459 intel_vram_mib_from_device_id(&device_id_str)
460 .map(u64::from)
461 .unwrap_or(0)
462 };
463 let generation = intel_generation_from_device_id(&device_id_str);
464 let host_pci_address = host_pci_address_from_sysfs(&entry.path());
465 let serial = read_drm_serial(&entry.path());
466 let dev = GpuDevice {
467 vendor: GpuVendor::Intel,
468 name,
469 index: idx,
470 generation,
471 pci_id,
472 vram_mib,
473 serial,
474 host_pci_address,
475 vendor_id_hex: "0x8086".into(),
476 };
477 idx += 1;
478 Some(dev)
479 } else {
480 None
481 }
482 })
483 .collect();
484 }
485 }
486 Vec::new()
487}
488
489/// Read VRAM total (MiB) from sysfs for a DRM device. AMD's amdgpu
490/// driver and Intel's i915 driver both expose `mem_info_vram_total`
491/// inside the device dir for discrete cards; integrated SKUs (Intel
492/// iGPU sharing system memory, AMD APUs) generally don't, in which
493/// case we return 0 and the inventory page renders "—". Best-effort:
494/// any read failure returns 0 silently.
495#[cfg(target_os = "linux")]
496fn read_drm_vram_mib(device_path: &std::path::Path) -> u64 {
497 // Path patterns (try in order):
498 // /sys/bus/pci/devices/<bdf>/mem_info_vram_total (amdgpu)
499 // /sys/bus/pci/devices/<bdf>/drm/cardN/device/mem_info_vram_total
500 // /sys/bus/pci/devices/<bdf>/i915_capabilities (Intel; not VRAM)
501 let direct = device_path.join("mem_info_vram_total");
502 if let Ok(s) = std::fs::read_to_string(&direct) {
503 if let Ok(bytes) = s.trim().parse::<u64>() {
504 return bytes / 1024 / 1024;
505 }
506 }
507 // Walk drm/cardN/device/mem_info_vram_total (one extra hop on
508 // some kernel versions).
509 let drm_dir = device_path.join("drm");
510 if let Ok(entries) = std::fs::read_dir(&drm_dir) {
511 for entry in entries.flatten() {
512 let candidate = entry.path().join("device").join("mem_info_vram_total");
513 if let Ok(s) = std::fs::read_to_string(&candidate) {
514 if let Ok(bytes) = s.trim().parse::<u64>() {
515 return bytes / 1024 / 1024;
516 }
517 }
518 }
519 }
520 0
521}
522
523#[cfg(not(target_os = "linux"))]
524fn read_drm_vram_mib(_device_path: &std::path::Path) -> u64 {
525 0
526}
527
528/// Extract the host-readable PCI bus address (e.g. `04:00.0`) from
529/// a sysfs device path. The sysfs path is normally
530/// `/sys/bus/pci/devices/0000:04:00.0`; we want the last path
531/// component minus the domain prefix, since the abbreviated form is
532/// what `lspci` shows and what admins recognise. Empty string on
533/// non-matching shapes (defensive).
534#[cfg(target_os = "linux")]
535fn host_pci_address_from_sysfs(device_path: &std::path::Path) -> String {
536 let Some(name) = device_path.file_name().and_then(|n| n.to_str()) else {
537 return String::new();
538 };
539 // Sysfs PCI BDF format: <domain>:<bus>:<device>.<function>
540 // e.g. "0000:04:00.0". Strip the leading "0000:" prefix when
541 // present so the result matches the conventional 7-char form.
542 if let Some(rest) = name.strip_prefix("0000:") {
543 return rest.to_string();
544 }
545 name.to_string()
546}
547
548#[cfg(not(target_os = "linux"))]
549fn host_pci_address_from_sysfs(_device_path: &std::path::Path) -> String {
550 String::new()
551}
552
553/// Best-effort serial-number read from sysfs. AMD / Intel cards
554/// occasionally expose `serial_number` or `serial` under the device
555/// dir; consumer cards usually don't. Empty result → `None`.
556///
557/// Treat the literal "0" sentinel the same as None (matching the NVML
558/// behaviour documented in `nvmlDeviceGetSerial`: "0 is not a valid
559/// serial for a nvidia card"). Some i915 / amdgpu code paths return
560/// "0" when the hardware doesn't have a real serial fuse, and we
561/// don't want to create asset rows keyed on that sentinel.
562#[cfg(target_os = "linux")]
563fn read_drm_serial(device_path: &std::path::Path) -> Option<String> {
564 for fname in &["serial_number", "serial"] {
565 let path = device_path.join(fname);
566 if let Ok(s) = std::fs::read_to_string(&path) {
567 let trimmed = s.trim().to_string();
568 if !trimmed.is_empty() && trimmed != "0" {
569 return Some(trimmed);
570 }
571 }
572 }
573 None
574}
575
576#[cfg(not(target_os = "linux"))]
577fn read_drm_serial(_device_path: &std::path::Path) -> Option<String> {
578 None
579}
580
581/// AMD generation lookup. RDNA3 (RX 7000) is the only generation we
582/// have AV1 encode silicon on today; earlier (RDNA1/2/Polaris/Vega) +
583/// later (RDNA4 announced) all surface in the inventory page so
584/// operators know the lay of the fleet. PCI device ids cross-checked
585/// against the upstream amdgpu driver's `pci_table` (drivers/gpu/drm/
586/// amd/amdgpu/amdgpu_drv.c) for the families we expect to see.
587fn amd_generation_from_device_id(device_id: &str) -> String {
588 let id_u16 = device_id
589 .strip_prefix("0x")
590 .and_then(|s| u16::from_str_radix(s, 16).ok());
591 match id_u16 {
592 // Navi 31 / 32 / 33 (RDNA3) — RX 7000 series.
593 Some(id) if (0x7400..=0x74ff).contains(&id) => "RDNA3".into(),
594 // Navi 21 / 22 / 23 / 24 (RDNA2) — RX 6000 series.
595 Some(id) if (0x73a0..=0x73ff).contains(&id) => "RDNA2".into(),
596 Some(id) if (0x7300..=0x73a0).contains(&id) => "RDNA2".into(),
597 // Navi 10 / 14 (RDNA1) — RX 5000 series.
598 Some(id) if (0x7310..=0x7350).contains(&id) => "RDNA1".into(),
599 // Vega 10 / 20 (GCN5) — Vega 56/64, MI50/60.
600 Some(id) if (0x6860..=0x687f).contains(&id) => "Vega".into(),
601 // Polaris 10/11/12 (GCN4) — RX 400 / 500.
602 Some(id) if (0x67c0..=0x67ff).contains(&id) => "Polaris".into(),
603 Some(id) if (0x6980..=0x69ff).contains(&id) => "Polaris".into(),
604 _ => "Unknown".into(),
605 }
606}
607
608/// Intel generation lookup. Mirrors `intel_label_from_device_id` —
609/// stays in lockstep so the inventory page's manufacturer / generation
610/// rollup agrees with the per-row name shown elsewhere.
611fn intel_generation_from_device_id(device_id: &str) -> String {
612 let id_u16 = device_id
613 .strip_prefix("0x")
614 .and_then(|s| u16::from_str_radix(s, 16).ok());
615 match id_u16 {
616 // Alchemist DG2 — entire 0x56xx range.
617 Some(id) if (0x5690..=0x56af).contains(&id) => "Alchemist DG2".into(),
618 // Battlemage BMG-G21 — 0xe200..=0xe21f.
619 Some(id) if (0xe200..=0xe21f).contains(&id) => "Battlemage BMG".into(),
620 // Lunar Lake Xe2 iGPU.
621 Some(id) if (0x6420..=0x643f).contains(&id) => "Lunar Lake".into(),
622 // Meteor Lake Xe-LP iGPU.
623 Some(id) if (0x7d40..=0x7d6f).contains(&id) => "Meteor Lake".into(),
624 // Older iGPU families surface in the inventory but have no
625 // AV1 encode silicon — labelled by family for fleet visibility.
626 Some(id) if (0xa780..=0xa7ff).contains(&id) => "Raptor Lake".into(),
627 Some(id) if (0x4680..=0x46ff).contains(&id) => "Alder Lake".into(),
628 Some(id) if (0x9a00..=0x9aff).contains(&id) => "Tiger Lake".into(),
629 _ => "Unknown".into(),
630 }
631}
632
633/// Map an Intel PCI device id (`0xNNNN`) to a human-readable label.
634/// Discrete Arc GPUs (Alchemist DG2, Battlemage BMG) are SKU-specific
635/// where the device id is well-known so admins can tell A310 from A750
636/// in the inventory log; family-level for unknown variants. Meteor Lake
637/// / Lunar Lake / Arrow Lake iGPUs are family-level only (the AV1 QSV
638/// silicon is a property of the family, not the SKU).
639///
640/// Device-id table cross-checked against
641/// `i915_pci_ids.h` / `xe_pci.c` in upstream kernel
642/// (`drivers/gpu/drm/i915/i915_pciids.h` for DG2 + BMG entries).
643/// Catalog VRAM total in MiB for known Intel discrete SKUs. The
644/// i915 driver on the dev box's kernel doesn't expose
645/// `/sys/class/drm/card*/device/mem_info_vram_total` — that path was
646/// added later — so the live read returns zero for both Arc cards.
647/// Fall back to a static SKU table so the inventory page can at least
648/// display "this is a 4 GB card vs an 8 GB card" without depending on
649/// kernel introspection. Live `mem_used_mib` stays 0 until i915_pmu /
650/// intel_gpu_top wiring lands; that's a separate task.
651///
652/// A770 has both 8 GB and 16 GB Limited Edition variants under the
653/// same PCI device id (0x56a0). Discriminating requires the subsystem
654/// device id; for our inventory display we report the more common
655/// 8 GB SKU and accept the LE undercount as a known limitation.
656fn intel_vram_mib_from_device_id(device_id: &str) -> Option<u32> {
657 let id_u16 = device_id
658 .strip_prefix("0x")
659 .and_then(|s| u16::from_str_radix(s, 16).ok())?;
660 Some(match id_u16 {
661 // Alchemist DG2-128 (small die)
662 0x56a5 => 6 * 1024, // A380
663 0x56a6 => 4 * 1024, // A310
664 0x5693 => 4 * 1024, // A350M
665 // Alchemist DG2-512 (full die)
666 0x56a0 => 8 * 1024, // A770 (8 GB; 16 GB LE shares this id)
667 0x56a1 => 8 * 1024, // A750
668 0x56a2 => 8 * 1024, // A580
669 0x5690 => 16 * 1024, // A770M (16 GB common spec)
670 0x5691 => 12 * 1024, // A730M
671 0x5692 => 8 * 1024, // A550M
672 // Battlemage
673 0xe20b => 12 * 1024, // B580
674 0xe20c => 10 * 1024, // B570
675 // Unknown DG2 / BMG SKUs — the catalog doesn't help here, return None
676 _ => return None,
677 })
678}
679
680fn intel_label_from_device_id(device_id: &str) -> String {
681 let id_u16 = device_id
682 .strip_prefix("0x")
683 .and_then(|s| u16::from_str_radix(s, 16).ok());
684 match id_u16 {
685 // Alchemist / DG2 discrete — per-SKU mapping.
686 // DG2-128 (small die): A310 / A380 / A350M.
687 Some(0x56a5) => "Intel Arc A380".into(),
688 Some(0x56a6) => "Intel Arc A310".into(),
689 Some(0x5693) => "Intel Arc A350M".into(),
690 // DG2-512 (full die): A580 / A750 / A770 + mobile A550M..A770M.
691 Some(0x56a0) => "Intel Arc A770".into(),
692 Some(0x56a1) => "Intel Arc A750".into(),
693 Some(0x56a2) => "Intel Arc A580".into(),
694 Some(0x5690) => "Intel Arc A770M".into(),
695 Some(0x5691) => "Intel Arc A730M".into(),
696 Some(0x5692) => "Intel Arc A550M".into(),
697 // Any other device id in the DG2-reserved 0x56xx range — likely
698 // a future SKU or a workstation Pro variant we haven't tagged.
699 // Family-level fallback so AV1 dispatch still picks it up via
700 // the `contains("alchemist")` substring match.
701 Some(id) if (0x5690..=0x56af).contains(&id) => {
702 format!("Intel Arc Alchemist (DG2 0x{id:04x})")
703 }
704 // Battlemage BMG-G21 discrete — per-SKU.
705 Some(0xe20b) => "Intel Arc B580".into(),
706 Some(0xe20c) => "Intel Arc B570".into(),
707 Some(id) if (0xe200..=0xe21f).contains(&id) => {
708 format!("Intel Arc Battlemage (BMG 0x{id:04x})")
709 }
710 // Lunar Lake Xe2 iGPU (Core Ultra 2xx mobile) — has AV1 encode.
711 Some(id) if (0x6420..=0x643f).contains(&id) => "Intel Lunar Lake iGPU".into(),
712 // Meteor Lake Xe-LP iGPU (Core Ultra 1xx mobile) — has AV1 encode.
713 Some(id) if (0x7d40..=0x7d6f).contains(&id) => "Intel Meteor Lake iGPU".into(),
714 // Anything else is some flavour of older iGPU (Coffee Lake → DG1
715 // → Tiger Lake → Alder Lake → Raptor Lake) that decodes plenty
716 // of formats but doesn't have AV1 QSV.
717 Some(id) => format!("Intel iGPU 0x{id:04x}"),
718 None => "Intel GPU".into(),
719 }
720}
721
722/// Per-GPU live utilisation snapshot. Read on every load tick (5 s
723/// cadence) by the Phase 2 (2026-05-07) `worker_load` reporter and
724/// folded into the `WsGpuLeaseEntry` for the wire. NVIDIA values come
725/// from NVML; Intel values come from sysfs `gt_cur_freq_mhz` /
726/// `gt_max_freq_mhz` for a coarse "busy" proxy + `mem_info_vram_*`
727/// for memory; AMD is currently a no-op (returns all zeros) — radeontop
728/// / `amdsmi` integration is the proper fix and is deferred per the
729/// brief's "Phase 1 stand-in for Intel; AMD skipped" guidance.
730#[derive(Debug, Clone, Default)]
731pub struct GpuUtilization {
732 /// 0..=100 compute / overall GPU busy.
733 pub util_percent: u8,
734 /// 0..=100 NVENC ASIC busy (encoder pipeline).
735 pub encoder_percent: u8,
736 /// 0..=100 NVDEC ASIC busy (decoder pipeline).
737 pub decoder_percent: u8,
738 /// VRAM in use (MiB).
739 pub mem_used_mib: u32,
740 /// VRAM total (MiB) — duplicated from the static device record so
741 /// the wire entry is self-contained for the FE bar render.
742 pub mem_total_mib: u32,
743 /// Core temperature in °C; `None` when the platform path doesn't
744 /// expose it.
745 pub temperature_c: Option<u8>,
746}
747
748/// One-shot accumulator that opens NVML once and reads per-GPU
749/// utilisation for every NVIDIA device on each load tick. Holding
750/// the NVML handle across reads avoids the init cost
751/// (microseconds) on every tick and is the documented pattern.
752pub struct GpuUtilizationReader {
753 nvml: Option<nvml_wrapper::Nvml>,
754}
755
756impl GpuUtilizationReader {
757 /// Build a reader. NVML init failure is non-fatal — the reader
758 /// folds to "all zeroes" on every NVIDIA device and the rest of
759 /// the load-tick path stays alive. Logged once at startup so
760 /// operators can tell "no NVIDIA card" from "NVIDIA card but
761 /// driver missing".
762 pub fn new() -> Self {
763 let nvml = match init_nvml_with_fallback() {
764 Ok(n) => Some(n),
765 Err(e) => {
766 // info-level: many production hosts are AMD/Intel-only
767 // and this isn't a problem. Operators looking at the
768 // dev box logs see this once at boot.
769 tracing::info!(error = %e, "nvml not available; NVIDIA GPU utilisation will be 0");
770 None
771 }
772 };
773 Self { nvml }
774 }
775
776 /// Read the per-tick snapshot for one device. Cheap when NVML is
777 /// available (handful of FFI calls); free when it's not (returns
778 /// the zero-initialised default).
779 pub fn read(&self, device: &GpuDevice) -> GpuUtilization {
780 match device.vendor {
781 GpuVendor::Nvidia => self.read_nvidia(device).unwrap_or_default(),
782 GpuVendor::Intel => self.read_intel(device).unwrap_or_default(),
783 GpuVendor::Amd => GpuUtilization::default(),
784 }
785 }
786
787 fn read_nvidia(&self, device: &GpuDevice) -> Option<GpuUtilization> {
788 let nvml = self.nvml.as_ref()?;
789 let dev = nvml.device_by_index(device.index).ok()?;
790 let util = dev.utilization_rates().ok();
791 // EncoderUtilizationInfo / DecoderUtilizationInfo have a
792 // `utilization` field (0..=100) plus a sampling period; we
793 // surface only the percentage.
794 let enc = dev.encoder_utilization().ok();
795 let dec = dev.decoder_utilization().ok();
796 let mem = dev.memory_info().ok();
797 let temp = dev
798 .temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu)
799 .ok()
800 .and_then(|t| u8::try_from(t).ok());
801 Some(GpuUtilization {
802 util_percent: util.as_ref().map(|u| u.gpu.min(100) as u8).unwrap_or(0),
803 encoder_percent: enc
804 .as_ref()
805 .map(|e| e.utilization.min(100) as u8)
806 .unwrap_or(0),
807 decoder_percent: dec
808 .as_ref()
809 .map(|d| d.utilization.min(100) as u8)
810 .unwrap_or(0),
811 mem_used_mib: mem
812 .as_ref()
813 .map(|m| (m.used / 1024 / 1024) as u32)
814 .unwrap_or(0),
815 mem_total_mib: mem
816 .as_ref()
817 .map(|m| (m.total / 1024 / 1024) as u32)
818 .unwrap_or(device.vram_mib as u32),
819 temperature_c: temp,
820 })
821 }
822
823 /// Intel stand-in via sysfs `gt_cur_freq_mhz` / `gt_max_freq_mhz`
824 /// for a coarse "busy" proxy and `mem_info_vram_used` for memory.
825 /// The i915 driver doesn't expose per-engine busy% via sysfs
826 /// cleanly — `intel_gpu_top -J` is the proper source but the
827 /// fork+capture cost on every 5 s tick is heavy. Phase 1: leave
828 /// encoder/decoder at 0 and let `util_percent` be the freq-ratio
829 /// proxy; real fix is the perf event interface (`i915_pmu`)
830 /// which deserves its own task.
831 #[cfg(target_os = "linux")]
832 fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
833 // We don't have the bdf here, so walk /sys/class/drm/cardN
834 // for an Intel card. Index 0 returns the first one that
835 // matches; multi-Intel hosts (rare today) get the same
836 // utilisation reported across both — acceptable until the
837 // proper i915_pmu integration lands.
838 let mut out = GpuUtilization::default();
839 if let Ok(entries) = std::fs::read_dir("/sys/class/drm") {
840 for entry in entries.flatten() {
841 let name = entry.file_name();
842 let Some(name_str) = name.to_str() else {
843 continue;
844 };
845 if !name_str.starts_with("card") || name_str.contains('-') {
846 continue;
847 }
848 // Confirm Intel via vendor file under device link.
849 let device_link = entry.path().join("device").join("vendor");
850 let vendor = std::fs::read_to_string(&device_link).unwrap_or_default();
851 if vendor.trim() != "0x8086" {
852 continue;
853 }
854 let cur = std::fs::read_to_string(entry.path().join("gt_cur_freq_mhz"))
855 .ok()
856 .and_then(|s| s.trim().parse::<u32>().ok());
857 let max = std::fs::read_to_string(entry.path().join("gt_max_freq_mhz"))
858 .ok()
859 .and_then(|s| s.trim().parse::<u32>().ok());
860 if let (Some(cur), Some(max)) = (cur, max) {
861 if max > 0 {
862 out.util_percent = ((cur as u64 * 100 / max as u64).min(100)) as u8;
863 }
864 }
865 let used =
866 std::fs::read_to_string(entry.path().join("device").join("mem_info_vram_used"))
867 .ok()
868 .and_then(|s| s.trim().parse::<u64>().ok());
869 let total = std::fs::read_to_string(
870 entry.path().join("device").join("mem_info_vram_total"),
871 )
872 .ok()
873 .and_then(|s| s.trim().parse::<u64>().ok());
874 if let Some(u) = used {
875 out.mem_used_mib = (u / 1024 / 1024) as u32;
876 }
877 if let Some(t) = total {
878 out.mem_total_mib = (t / 1024 / 1024) as u32;
879 }
880 // Fall back to the catalog VRAM total stored on the
881 // device record when sysfs didn't expose it. The dev
882 // box's kernel doesn't have mem_info_vram_total, so
883 // without this Intel cards report 0 / 0 forever.
884 if out.mem_total_mib == 0 && _device.vram_mib > 0 {
885 out.mem_total_mib = _device.vram_mib as u32;
886 }
887 // Fall back to DRM fdinfo aggregation when sysfs didn't
888 // expose `mem_info_vram_used` (older kernels). Filtered
889 // to this card's PCI BDF so multi-Intel hosts report
890 // per-device used memory, not the cross-card total.
891 // This is the same source `intel_gpu_top -J` and `nvtop`
892 // use, available since kernel ~5.19 (i915) / ~6.8 (xe).
893 if out.mem_used_mib == 0 {
894 let bdf = read_pci_bdf_from_drm_card(&entry.path());
895 if let Some(bytes) = read_intel_vram_resident_bytes(bdf.as_deref()) {
896 out.mem_used_mib = (bytes / 1024 / 1024) as u32;
897 }
898 }
899 return Some(out);
900 }
901 }
902 if out.mem_total_mib == 0 && _device.vram_mib > 0 {
903 out.mem_total_mib = _device.vram_mib as u32;
904 }
905 Some(out)
906 }
907
908 #[cfg(not(target_os = "linux"))]
909 fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
910 // Windows path for Intel hosts is performance-counter via
911 // the WMI `Win32_PerfFormattedData_GPUPerformanceCounters_GPUEngine`
912 // surface — same fork-cost concern as `intel_gpu_top` on
913 // Linux, deferred. Returns all zeroes.
914 Some(GpuUtilization::default())
915 }
916}
917
918/// Resolve the PCI BDF (e.g. `0000:03:00.0`) backing a
919/// `/sys/class/drm/cardN` entry. The `device` symlink under the card
920/// dir always points to the PCI device node — the file_name segment
921/// of the resolved path IS the BDF. Returns None on read_link failure
922/// (non-PCI virtual GPUs etc.).
923#[cfg(target_os = "linux")]
924fn read_pci_bdf_from_drm_card(card_dir: &std::path::Path) -> Option<String> {
925 let target = std::fs::read_link(card_dir.join("device")).ok()?;
926 target
927 .file_name()
928 .and_then(|n| n.to_str())
929 .map(|s| s.to_string())
930}
931
932/// Aggregate Intel VRAM bytes resident across every DRM client by
933/// walking `/proc/*/fdinfo/*`. The kernel exposes per-fd accounting
934/// in DRM fdinfo (i915 since ~5.19, xe driver since ~6.8); summing
935/// `drm-resident-local0` across all clients gives the same number
936/// `intel_gpu_top -J` reports for "VRAM used".
937///
938/// When `bdf_filter` is `Some(...)`, only fdinfo entries whose
939/// `drm-pdev:` matches that BDF are counted — the multi-Intel case
940/// (the dev box has Arc A750 + Arc A310 today) gets per-card
941/// accounting instead of a cross-card total. When `None`, every
942/// Intel client is summed.
943///
944/// Returns `None` when no Intel DRM clients are visible (rather than
945/// `Some(0)`) so the caller can distinguish "no usage right now"
946/// from "fdinfo path not available on this kernel" — the former
947/// shouldn't trigger a different fallback, the latter could.
948#[cfg(target_os = "linux")]
949fn read_intel_vram_resident_bytes(bdf_filter: Option<&str>) -> Option<u64> {
950 let proc_dir = std::fs::read_dir("/proc").ok()?;
951 let mut total_bytes: u64 = 0;
952 let mut found_any_intel_client = false;
953
954 for proc_entry in proc_dir.flatten() {
955 let pid_name = proc_entry.file_name();
956 let Some(pid_str) = pid_name.to_str() else {
957 continue;
958 };
959 if !pid_str.bytes().all(|b| b.is_ascii_digit()) {
960 continue;
961 }
962 let fdinfo_dir = proc_entry.path().join("fdinfo");
963 let Ok(fd_entries) = std::fs::read_dir(&fdinfo_dir) else {
964 continue;
965 };
966
967 for fd_entry in fd_entries.flatten() {
968 // fdinfo files for non-DRM fds are short and have no
969 // "drm-driver" key — read_to_string is cheap on those.
970 // Permission errors on other-user processes also fall
971 // through silently (the transcoder runs as root in our
972 // production container, so this is rare in practice).
973 let Ok(content) = std::fs::read_to_string(fd_entry.path()) else {
974 continue;
975 };
976 if !content.contains("drm-driver:") {
977 continue;
978 }
979 // Match either i915 (mainline Intel driver) or xe (newer
980 // Intel driver shipping with kernel 6.8+; takes over Arc
981 // discrete cards). Whitespace between key and value is a
982 // single tab in i915's emitter and a single space in xe's
983 // — accept both.
984 let is_intel = content
985 .lines()
986 .filter_map(|l| l.strip_prefix("drm-driver:"))
987 .any(|v| {
988 let v = v.trim();
989 v == "i915" || v == "xe"
990 });
991 if !is_intel {
992 continue;
993 }
994 // Optional BDF filter — only count clients on the card we
995 // care about. drm-pdev format is `drm-pdev: 0000:03:00.0`.
996 if let Some(want_bdf) = bdf_filter {
997 let matches = content
998 .lines()
999 .filter_map(|l| l.strip_prefix("drm-pdev:"))
1000 .any(|v| v.trim() == want_bdf);
1001 if !matches {
1002 continue;
1003 }
1004 }
1005 found_any_intel_client = true;
1006 // Sum drm-resident-local0 across the client. "local0" is
1007 // the i915/xe naming for the on-card VRAM region; values
1008 // are formatted as "<num> <unit>" with unit ∈ {B, KiB,
1009 // MiB, GiB} per drm-fdinfo.rst.
1010 for line in content.lines() {
1011 if let Some(rest) = line.strip_prefix("drm-resident-local0:") {
1012 if let Some(bytes) = parse_drm_size(rest) {
1013 total_bytes = total_bytes.saturating_add(bytes);
1014 }
1015 }
1016 }
1017 }
1018 }
1019
1020 if found_any_intel_client {
1021 Some(total_bytes)
1022 } else {
1023 None
1024 }
1025}
1026
1027/// Parse a DRM fdinfo size value: `<number> <unit>` where unit is
1028/// one of B / KiB / MiB / GiB. Bare numbers are treated as bytes.
1029/// Returns None on garbage input.
1030#[cfg(target_os = "linux")]
1031fn parse_drm_size(s: &str) -> Option<u64> {
1032 let trimmed = s.trim();
1033 let mut parts = trimmed.split_whitespace();
1034 let num: u64 = parts.next()?.parse().ok()?;
1035 let unit = parts.next().unwrap_or("B");
1036 let multiplier: u64 = match unit {
1037 "B" | "" => 1,
1038 "KiB" => 1024,
1039 "MiB" => 1024 * 1024,
1040 "GiB" => 1024 * 1024 * 1024,
1041 _ => return None,
1042 };
1043 Some(num.saturating_mul(multiplier))
1044}
1045
1046impl Default for GpuUtilizationReader {
1047 fn default() -> Self {
1048 Self::new()
1049 }
1050}
1051
1052pub fn supports_av1_encode(device: &GpuDevice) -> bool {
1053 match device.vendor {
1054 // NVIDIA: defer to the **real driver capability query**, not a
1055 // board-name list. The substring list this used to carry was brittle —
1056 // every new SKU had to be added by hand, and a missed one (e.g. the
1057 // RTX 5060 once was) now *hard-fails* the job since there's no CPU
1058 // fallback. NVENC AV1 support is authoritatively validated by
1059 // `nvEncGetEncodeCaps` / `GetEncodeGUIDs` in `NvencEncoder::new`, which
1060 // enumerates the GPU's actual encode codecs and bails cleanly if AV1
1061 // isn't among them (verified on an RTX 3090: "2 codec(s), none AV1").
1062 // So admit every NVIDIA GPU here and let the real query be the gate.
1063 GpuVendor::Nvidia => true,
1064 // AMD: defer to the real path. AV1 VCN encode is RDNA3+ (RX 7000+), but
1065 // rather than a brittle SKU list, `AmfEncoder::new` is authoritative —
1066 // AMF `CreateComponent(AMFVideoEncoderVCN_AV1)` fails on a pre-RDNA3 GPU
1067 // and we bail cleanly ("RDNA3+ GPU required"). Admit every AMD GPU here
1068 // and let that decide (matches the NVIDIA policy above).
1069 GpuVendor::Amd => true,
1070 // Intel: defer to the real path. AV1 QSV is Arc / Meteor Lake+, but
1071 // rather than a brittle family-name list, `QsvEncoder::new` is
1072 // authoritative — `MFXVideoENCODE_Query` (+ Init) reports whether the
1073 // GPU's oneVPL implementation supports AV1, and we bail cleanly if not.
1074 // Admit every Intel GPU here and let that decide (matches NVIDIA/AMD).
1075 GpuVendor::Intel => true,
1076 }
1077}
1078