Skip to main content

codec/
gpu.rs

1//! GPU device enumeration for NVDEC/NVENC scheduling.
2//!
3//! NVIDIA detection loads libcuda via dlopen, calls cuInit +
4//! cuDeviceGetCount + cuDeviceGetName. This works on minimal container
5//! images where the `nvidia-smi` binary may be absent but the driver's
6//! user-mode libraries are bind-mounted by the NVIDIA Container Toolkit.
7//! AMD/Intel detection scans /sys/bus/pci/devices on Linux.
8
9use std::ffi::{CStr, c_char, c_int, c_uint, c_void};
10use std::ptr;
11
12#[derive(Debug, Clone)]
13pub struct GpuDevice {
14    pub vendor: GpuVendor,
15    pub name: String,
16    pub index: u32,
17    /// Architecture / generation label, e.g. "Blackwell" (RTX 5060),
18    /// "Ada Lovelace" (RTX 4000-series), "Ampere" (RTX 3000), "Alchemist DG2"
19    /// (Arc A-series), "Battlemage BMG" (Arc B-series), "RDNA3" (RX 7000).
20    /// Phase 2 (2026-05-07) inventory page surface — derived from the PCI
21    /// device id at detect time so the inventory aggregations don't have
22    /// to re-derive it. "Unknown" when the device id falls outside the
23    /// per-vendor known-id table; preserved verbatim to the admin UI so
24    /// operators can spot fleet rows that need a label update.
25    pub generation: String,
26    /// Lowercase `vendor:device` PCI tuple, e.g. `"0x10de:0x2d05"`. Stable
27    /// identifier across driver / kernel versions. Empty string when the
28    /// platform path doesn't expose a device id (NVIDIA via CUDA on
29    /// Windows: cuda doesn't surface PCI; the field stays empty rather
30    /// than synthesise something misleading).
31    pub pci_id: String,
32    /// Total VRAM in MiB. NVIDIA via NVML `memory_info().total`; Intel via
33    /// `/sys/class/drm/cardN/device/mem_info_vram_total` when present;
34    /// AMD same. 0 when the platform path can't read it — admin UI shows
35    /// "—" for that case rather than "0 MiB".
36    pub vram_mib: u64,
37    /// Vendor-reported serial number of the physical card. NVIDIA via
38    /// NVML `Device::serial()` (returns the manufacturer's serial sticker
39    /// for cards that have one — datacenter Tesla / A10G / consumer Pro
40    /// cards expose it; consumer GeForce typically doesn't). Intel /
41    /// AMD: try `/sys/class/drm/cardN/device/serial[_number]` paths;
42    /// usually `None`. Stable identifier for warranty tracking + the
43    /// `transcoder_gpus` asset table — when present, the same card
44    /// across host moves dedups to a single row.
45    pub serial: Option<String>,
46    /// PCI host slot address, e.g. `"04:00.0"`. Used as the dedupe
47    /// fallback when `serial` is absent — assumes the card stays in
48    /// the same slot of the same host (the dev-box reality).
49    /// Empty when the platform path doesn't expose it.
50    pub host_pci_address: String,
51    /// Vendor portion of the PCI tuple as a standalone hex string,
52    /// e.g. `"0x10de"`. Already implicit in `pci_id` but exposed
53    /// separately so the SQL inventory query can index on it
54    /// without parsing.
55    pub vendor_id_hex: String,
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum GpuVendor {
60    Nvidia,
61    Amd,
62    Intel,
63}
64
65pub fn detect_gpus() -> Vec<GpuDevice> {
66    let mut devices = Vec::new();
67    devices.extend(detect_nvidia());
68    devices.extend(detect_amd());
69    devices.extend(detect_intel());
70    devices
71}
72
73/// Human-readable manufacturer label. Used by the WS hello frame's
74/// `WsGpuInfo.manufacturer` field and by the admin inventory page's
75/// "by manufacturer" rollup. Stays in lockstep with `vendor_label` in
76/// `transcoder/src/capabilities.rs` so the registration POST + the
77/// hello frame agree on the spelling.
78pub fn manufacturer_label(v: GpuVendor) -> &'static str {
79    match v {
80        GpuVendor::Nvidia => "NVIDIA",
81        GpuVendor::Amd => "AMD",
82        GpuVendor::Intel => "Intel",
83    }
84}
85
86pub fn has_nvidia() -> bool {
87    !detect_nvidia().is_empty()
88}
89
90// ─── NVIDIA via libcuda dlopen ─────────────────────────────────────
91type CUresult = c_int;
92type CUdevice = c_int;
93
94type FnCuInit = unsafe extern "C" fn(c_uint) -> CUresult;
95type FnCuDeviceGetCount = unsafe extern "C" fn(*mut c_int) -> CUresult;
96type FnCuDeviceGet = unsafe extern "C" fn(*mut CUdevice, c_int) -> CUresult;
97type FnCuDeviceGetName = unsafe extern "C" fn(*mut c_char, c_int, CUdevice) -> CUresult;
98
99fn detect_nvidia() -> Vec<GpuDevice> {
100    // Try the usual driver library names across Linux / Windows.
101    let lib = unsafe { libloading::Library::new("libcuda.so") }
102        .or_else(|_| unsafe { libloading::Library::new("libcuda.so.1") })
103        .or_else(|_| unsafe { libloading::Library::new("nvcuda.dll") });
104
105    let Ok(lib) = lib else { return Vec::new() };
106
107    unsafe {
108        let cu_init: libloading::Symbol<FnCuInit> = match lib.get(b"cuInit") {
109            Ok(f) => f,
110            Err(_) => return Vec::new(),
111        };
112        // Initialization flag is reserved — must be zero.
113        if cu_init(0) != 0 {
114            return Vec::new();
115        }
116
117        let cu_device_get_count: libloading::Symbol<FnCuDeviceGetCount> =
118            match lib.get(b"cuDeviceGetCount") {
119                Ok(f) => f,
120                Err(_) => return Vec::new(),
121            };
122        let mut count: c_int = 0;
123        if cu_device_get_count(&mut count) != 0 || count <= 0 {
124            return Vec::new();
125        }
126
127        let cu_device_get: libloading::Symbol<FnCuDeviceGet> = match lib.get(b"cuDeviceGet") {
128            Ok(f) => f,
129            Err(_) => return Vec::new(),
130        };
131        let cu_device_get_name: libloading::Symbol<FnCuDeviceGetName> =
132            match lib.get(b"cuDeviceGetName") {
133                Ok(f) => f,
134                Err(_) => return Vec::new(),
135            };
136
137        let mut devices = Vec::with_capacity(count as usize);
138        for ordinal in 0..count {
139            let mut dev: CUdevice = 0;
140            if cu_device_get(&mut dev, ordinal) != 0 {
141                continue;
142            }
143            let mut name_buf = [0i8; 256];
144            let name = if cu_device_get_name(
145                name_buf.as_mut_ptr() as *mut c_char,
146                name_buf.len() as c_int,
147                dev,
148            ) == 0
149            {
150                CStr::from_ptr(name_buf.as_ptr() as *const c_char)
151                    .to_string_lossy()
152                    .into_owned()
153            } else {
154                format!("NVIDIA GPU {ordinal}")
155            };
156            // Phase 2 (2026-05-07) richer inventory: try to enrich
157            // via NVML for VRAM total + PCI id + serial + bus address +
158            // generation. NVML failure (driver missing, NVML so/dll
159            // absent) leaves those fields empty/zero; the
160            // cuda-reported `name` is still authoritative for the
161            // substring-based AV1 dispatch in supports_av1_encode.
162            let nvml_lookup = nvidia_nvml_lookup(ordinal as u32);
163            let generation = nvidia_generation_from_name(&name);
164            devices.push(GpuDevice {
165                vendor: GpuVendor::Nvidia,
166                name,
167                index: ordinal as u32,
168                generation,
169                pci_id: nvml_lookup.pci_id,
170                vram_mib: nvml_lookup.vram_mib,
171                serial: nvml_lookup.serial,
172                host_pci_address: nvml_lookup.host_pci_address,
173                vendor_id_hex: "0x10de".into(),
174            });
175        }
176        // Silence unused-import warnings from the libloading bounds checks
177        let _ = ptr::null::<c_void>();
178        devices
179    }
180}
181
182/// Initialize NVML, trying both the unversioned and SONAME-versioned
183/// library names. The default `Nvml::init()` dlopens `libnvidia-ml.so`
184/// (no suffix) — but the NVIDIA Container Toolkit only mounts
185/// `libnvidia-ml.so.1` into containers, with no unversioned alias.
186/// On the dev box we observed the bare `init()` failing with
187/// "cannot open shared object file" while the `.so.1` was present.
188/// Fall back to the explicit SONAME path; if both fail, the caller
189/// folds to "no NVML available" same as before.
190fn init_nvml_with_fallback() -> Result<nvml_wrapper::Nvml, nvml_wrapper::error::NvmlError> {
191    match nvml_wrapper::Nvml::init() {
192        Ok(n) => Ok(n),
193        Err(_) => nvml_wrapper::Nvml::builder()
194            .lib_path(std::ffi::OsStr::new("libnvidia-ml.so.1"))
195            .init(),
196    }
197}
198
199/// NVML lookup result. Bundled into a struct so the call site stays
200/// self-documenting as we add fields for the Phase 2 (2026-05-07)
201/// inventory + asset-table extension.
202#[derive(Debug, Clone, Default)]
203struct NvmlLookup {
204    pci_id: String,
205    vram_mib: u64,
206    serial: Option<String>,
207    host_pci_address: String,
208}
209
210/// NVML lookup helper. Returns enrichment fields for the given CUDA
211/// ordinal. NVML's device handle indexing matches CUDA's ordinal in
212/// the common case (`CUDA_VISIBLE_DEVICES` empty / unset); mismatches
213/// are tolerated by returning all defaults on `device_by_index`
214/// errors, which the caller folds to empty/None.
215///
216/// NVML init is performed inside this function and torn down on return —
217/// repeated lookups during device enumeration share the same NVML
218/// process across the loop body via the `Nvml::init` call cost
219/// (microseconds) rather than holding a long-lived handle in static
220/// storage. Cross-platform: the `nvml-wrapper` crate dlopens
221/// `libnvidia-ml.so.1` on Linux and `nvml.dll` on Windows, same shape
222/// as our existing `libcuda` libloading path.
223fn nvidia_nvml_lookup(ordinal: u32) -> NvmlLookup {
224    let nvml = match init_nvml_with_fallback() {
225        Ok(n) => n,
226        Err(_) => return NvmlLookup::default(),
227    };
228    let device = match nvml.device_by_index(ordinal) {
229        Ok(d) => d,
230        Err(_) => return NvmlLookup::default(),
231    };
232    let (pci_id, host_pci_address) = match device.pci_info() {
233        Ok(p) => {
234            let id = format!(
235                "0x{:04x}:0x{:04x}",
236                p.pci_device_id >> 16,
237                p.pci_device_id & 0xFFFF
238            );
239            // bus_id format: "00000000:04:00.0". Strip the leading
240            // 0000-domain so the abbreviation matches the lspci /
241            // /sys/bus/pci/devices/<bdf> form admins recognise.
242            let bus = p
243                .bus_id
244                .trim_start_matches('0')
245                .trim_start_matches(':')
246                .to_string();
247            // If trimming above ate too much (single-domain "0000:..."),
248            // fall back to the raw bus_id; defensive against leading-zero
249            // pathological cases.
250            let host_pci = if bus.is_empty() {
251                p.bus_id.clone()
252            } else {
253                bus
254            };
255            (id, host_pci)
256        }
257        Err(_) => (String::new(), String::new()),
258    };
259    let vram_mib = match device.memory_info() {
260        Ok(m) => m.total / 1024 / 1024,
261        Err(_) => 0,
262    };
263    // `serial()` returns Err for cards without a serial sticker
264    // (consumer GeForce typically; datacenter Tesla / A10G expose it).
265    // Don't fail the worker — debug-log + None per coordinator's
266    // "graceful failure" guidance.
267    //
268    // Per NVML docs: "0 is not a valid serial for a nvidia card."
269    // Some consumer cards / driver-fallback paths return literal "0"
270    // instead of erroring. Treat that as None too so we don't
271    // mistakenly create asset rows keyed on a sentinel value.
272    let serial = match device.serial() {
273        Ok(s) => {
274            let trimmed = s.trim();
275            if trimmed.is_empty() || trimmed == "0" {
276                None
277            } else {
278                Some(trimmed.to_string())
279            }
280        }
281        Err(e) => {
282            tracing::debug!(error = %e, ordinal, "nvml serial unavailable");
283            None
284        }
285    };
286    NvmlLookup {
287        pci_id,
288        vram_mib,
289        serial,
290        host_pci_address,
291    }
292}
293
294/// NVIDIA generation lookup by marketing name substring. Matches
295/// the same convention `supports_av1_encode` uses (lowercase substring
296/// match) so the two stay in lockstep. Order matters: the more
297/// specific datacenter SKUs (B100/B200) are matched before the
298/// looser consumer family (5xxx) to avoid "B5060" — not a real SKU
299/// today, but defensive.
300fn nvidia_generation_from_name(name: &str) -> String {
301    let n = name.to_lowercase();
302    // Blackwell consumer (RTX 50xx) + datacenter (B100/B200/GB200).
303    if n.contains("rtx 50")
304        || n.contains("5050")
305        || n.contains("5060")
306        || n.contains("5070")
307        || n.contains("5080")
308        || n.contains("5090")
309        || n.contains("b100")
310        || n.contains("b200")
311        || n.contains("gb200")
312    {
313        return "Blackwell".into();
314    }
315    // Hopper datacenter (H100/H200). No NVENC silicon — surfaces in
316    // the inventory page so operators don't try to schedule encodes.
317    if n.contains("h100") || n.contains("h200") {
318        return "Hopper".into();
319    }
320    // Ada Lovelace: RTX 40xx + L4/L40 datacenter.
321    if n.contains("rtx 40")
322        || n.contains("4060")
323        || n.contains("4070")
324        || n.contains("4080")
325        || n.contains("4090")
326        || n.contains("ada")
327        || n.contains("l4")
328        || n.contains("l40")
329    {
330        return "Ada Lovelace".into();
331    }
332    // Ampere: RTX 30xx + A10/A10G/A100.
333    if n.contains("rtx 30")
334        || n.contains("3050")
335        || n.contains("3060")
336        || n.contains("3070")
337        || n.contains("3080")
338        || n.contains("3090")
339        || n.contains("a10")
340        || n.contains("a100")
341        || n.contains("ampere")
342    {
343        return "Ampere".into();
344    }
345    // Turing: RTX 20xx + T4.
346    if n.contains("rtx 20")
347        || n.contains("2060")
348        || n.contains("2070")
349        || n.contains("2080")
350        || n.contains(" t4")
351        || n.contains("turing")
352    {
353        return "Turing".into();
354    }
355    // Pascal: GTX 10xx + P100/P40/P4.
356    if n.contains("gtx 10")
357        || n.contains("1050")
358        || n.contains("1060")
359        || n.contains("1070")
360        || n.contains("1080")
361        || n.contains("p100")
362        || n.contains("p40")
363        || n.contains("pascal")
364    {
365        return "Pascal".into();
366    }
367    "Unknown".into()
368}
369
370fn detect_amd() -> Vec<GpuDevice> {
371    // Linux: check /sys/bus/pci/devices for AMD GPU (vendor 1002)
372    #[cfg(target_os = "linux")]
373    {
374        if let Ok(entries) = std::fs::read_dir("/sys/bus/pci/devices") {
375            let mut idx = 0u32;
376            return entries
377                .filter_map(|e| e.ok())
378                .filter_map(|entry| {
379                    let vendor_path = entry.path().join("vendor");
380                    let class_path = entry.path().join("class");
381                    let vendor = std::fs::read_to_string(&vendor_path).ok()?;
382                    let class = std::fs::read_to_string(&class_path).ok()?;
383                    // VGA (0x030000) or 3D controller (0x030200)
384                    if vendor.trim() == "0x1002" && class.trim().starts_with("0x0302") {
385                        let device_path = entry.path().join("device");
386                        let device = std::fs::read_to_string(&device_path)
387                            .unwrap_or_default()
388                            .trim()
389                            .to_string();
390                        let after = device.trim_start_matches("0x");
391                        let pci_id = format!("0x1002:0x{after}");
392                        let vram_mib = read_drm_vram_mib(&entry.path());
393                        let generation = amd_generation_from_device_id(&device);
394                        let host_pci_address = host_pci_address_from_sysfs(&entry.path());
395                        let serial = read_drm_serial(&entry.path());
396                        let dev = GpuDevice {
397                            vendor: GpuVendor::Amd,
398                            name: format!("AMD GPU {device}"),
399                            index: idx,
400                            generation,
401                            pci_id,
402                            vram_mib,
403                            serial,
404                            host_pci_address,
405                            vendor_id_hex: "0x1002".into(),
406                        };
407                        idx += 1;
408                        Some(dev)
409                    } else {
410                        None
411                    }
412                })
413                .collect();
414        }
415    }
416    Vec::new()
417}
418
419fn detect_intel() -> Vec<GpuDevice> {
420    #[cfg(target_os = "linux")]
421    {
422        if let Ok(entries) = std::fs::read_dir("/sys/bus/pci/devices") {
423            let mut idx = 0u32;
424            return entries
425                .filter_map(|e| e.ok())
426                .filter_map(|entry| {
427                    let vendor_path = entry.path().join("vendor");
428                    let class_path = entry.path().join("class");
429                    let device_path = entry.path().join("device");
430                    let vendor = std::fs::read_to_string(&vendor_path).ok()?;
431                    let class = std::fs::read_to_string(&class_path).ok()?;
432                    if vendor.trim() == "0x8086" && class.trim().starts_with("0x0300") {
433                        // Read the PCI device ID so we can label the GPU
434                        // by family. Without this every Intel device was
435                        // tagged "Intel Integrated GPU" — which made
436                        // `supports_av1_encode`'s `contains("arc")`
437                        // substring match miss the discrete Arc cards
438                        // and silently route every job to rav1e CPU.
439                        let device_id_str = std::fs::read_to_string(&device_path)
440                            .ok()
441                            .map(|s| s.trim().to_string())
442                            .unwrap_or_default();
443                        let name = intel_label_from_device_id(&device_id_str);
444                        let pci_id = if device_id_str.starts_with("0x") {
445                            format!("0x8086:{device_id_str}")
446                        } else {
447                            String::new()
448                        };
449                        // Prefer the live sysfs read (newer i915
450                        // exposes total VRAM via mem_info_vram_total).
451                        // Fall back to the static SKU catalog when the
452                        // sysfs path is missing — the dev box's kernel
453                        // is one of the older versions that doesn't
454                        // export the field.
455                        let live_vram = read_drm_vram_mib(&entry.path());
456                        let vram_mib = if live_vram > 0 {
457                            live_vram
458                        } else {
459                            intel_vram_mib_from_device_id(&device_id_str)
460                                .map(u64::from)
461                                .unwrap_or(0)
462                        };
463                        let generation = intel_generation_from_device_id(&device_id_str);
464                        let host_pci_address = host_pci_address_from_sysfs(&entry.path());
465                        let serial = read_drm_serial(&entry.path());
466                        let dev = GpuDevice {
467                            vendor: GpuVendor::Intel,
468                            name,
469                            index: idx,
470                            generation,
471                            pci_id,
472                            vram_mib,
473                            serial,
474                            host_pci_address,
475                            vendor_id_hex: "0x8086".into(),
476                        };
477                        idx += 1;
478                        Some(dev)
479                    } else {
480                        None
481                    }
482                })
483                .collect();
484        }
485    }
486    Vec::new()
487}
488
489/// Read VRAM total (MiB) from sysfs for a DRM device. AMD's amdgpu
490/// driver and Intel's i915 driver both expose `mem_info_vram_total`
491/// inside the device dir for discrete cards; integrated SKUs (Intel
492/// iGPU sharing system memory, AMD APUs) generally don't, in which
493/// case we return 0 and the inventory page renders "—". Best-effort:
494/// any read failure returns 0 silently.
495#[cfg(target_os = "linux")]
496fn read_drm_vram_mib(device_path: &std::path::Path) -> u64 {
497    // Path patterns (try in order):
498    //   /sys/bus/pci/devices/<bdf>/mem_info_vram_total  (amdgpu)
499    //   /sys/bus/pci/devices/<bdf>/drm/cardN/device/mem_info_vram_total
500    //   /sys/bus/pci/devices/<bdf>/i915_capabilities (Intel; not VRAM)
501    let direct = device_path.join("mem_info_vram_total");
502    if let Ok(s) = std::fs::read_to_string(&direct) {
503        if let Ok(bytes) = s.trim().parse::<u64>() {
504            return bytes / 1024 / 1024;
505        }
506    }
507    // Walk drm/cardN/device/mem_info_vram_total (one extra hop on
508    // some kernel versions).
509    let drm_dir = device_path.join("drm");
510    if let Ok(entries) = std::fs::read_dir(&drm_dir) {
511        for entry in entries.flatten() {
512            let candidate = entry.path().join("device").join("mem_info_vram_total");
513            if let Ok(s) = std::fs::read_to_string(&candidate) {
514                if let Ok(bytes) = s.trim().parse::<u64>() {
515                    return bytes / 1024 / 1024;
516                }
517            }
518        }
519    }
520    0
521}
522
523#[cfg(not(target_os = "linux"))]
524fn read_drm_vram_mib(_device_path: &std::path::Path) -> u64 {
525    0
526}
527
528/// Extract the host-readable PCI bus address (e.g. `04:00.0`) from
529/// a sysfs device path. The sysfs path is normally
530/// `/sys/bus/pci/devices/0000:04:00.0`; we want the last path
531/// component minus the domain prefix, since the abbreviated form is
532/// what `lspci` shows and what admins recognise. Empty string on
533/// non-matching shapes (defensive).
534#[cfg(target_os = "linux")]
535fn host_pci_address_from_sysfs(device_path: &std::path::Path) -> String {
536    let Some(name) = device_path.file_name().and_then(|n| n.to_str()) else {
537        return String::new();
538    };
539    // Sysfs PCI BDF format: <domain>:<bus>:<device>.<function>
540    // e.g. "0000:04:00.0". Strip the leading "0000:" prefix when
541    // present so the result matches the conventional 7-char form.
542    if let Some(rest) = name.strip_prefix("0000:") {
543        return rest.to_string();
544    }
545    name.to_string()
546}
547
548#[cfg(not(target_os = "linux"))]
549fn host_pci_address_from_sysfs(_device_path: &std::path::Path) -> String {
550    String::new()
551}
552
553/// Best-effort serial-number read from sysfs. AMD / Intel cards
554/// occasionally expose `serial_number` or `serial` under the device
555/// dir; consumer cards usually don't. Empty result → `None`.
556///
557/// Treat the literal "0" sentinel the same as None (matching the NVML
558/// behaviour documented in `nvmlDeviceGetSerial`: "0 is not a valid
559/// serial for a nvidia card"). Some i915 / amdgpu code paths return
560/// "0" when the hardware doesn't have a real serial fuse, and we
561/// don't want to create asset rows keyed on that sentinel.
562#[cfg(target_os = "linux")]
563fn read_drm_serial(device_path: &std::path::Path) -> Option<String> {
564    for fname in &["serial_number", "serial"] {
565        let path = device_path.join(fname);
566        if let Ok(s) = std::fs::read_to_string(&path) {
567            let trimmed = s.trim().to_string();
568            if !trimmed.is_empty() && trimmed != "0" {
569                return Some(trimmed);
570            }
571        }
572    }
573    None
574}
575
576#[cfg(not(target_os = "linux"))]
577fn read_drm_serial(_device_path: &std::path::Path) -> Option<String> {
578    None
579}
580
581/// AMD generation lookup. RDNA3 (RX 7000) is the only generation we
582/// have AV1 encode silicon on today; earlier (RDNA1/2/Polaris/Vega) +
583/// later (RDNA4 announced) all surface in the inventory page so
584/// operators know the lay of the fleet. PCI device ids cross-checked
585/// against the upstream amdgpu driver's `pci_table` (drivers/gpu/drm/
586/// amd/amdgpu/amdgpu_drv.c) for the families we expect to see.
587fn amd_generation_from_device_id(device_id: &str) -> String {
588    let id_u16 = device_id
589        .strip_prefix("0x")
590        .and_then(|s| u16::from_str_radix(s, 16).ok());
591    match id_u16 {
592        // Navi 31 / 32 / 33 (RDNA3) — RX 7000 series.
593        Some(id) if (0x7400..=0x74ff).contains(&id) => "RDNA3".into(),
594        // Navi 21 / 22 / 23 / 24 (RDNA2) — RX 6000 series.
595        Some(id) if (0x73a0..=0x73ff).contains(&id) => "RDNA2".into(),
596        Some(id) if (0x7300..=0x73a0).contains(&id) => "RDNA2".into(),
597        // Navi 10 / 14 (RDNA1) — RX 5000 series.
598        Some(id) if (0x7310..=0x7350).contains(&id) => "RDNA1".into(),
599        // Vega 10 / 20 (GCN5) — Vega 56/64, MI50/60.
600        Some(id) if (0x6860..=0x687f).contains(&id) => "Vega".into(),
601        // Polaris 10/11/12 (GCN4) — RX 400 / 500.
602        Some(id) if (0x67c0..=0x67ff).contains(&id) => "Polaris".into(),
603        Some(id) if (0x6980..=0x69ff).contains(&id) => "Polaris".into(),
604        _ => "Unknown".into(),
605    }
606}
607
608/// Intel generation lookup. Mirrors `intel_label_from_device_id` —
609/// stays in lockstep so the inventory page's manufacturer / generation
610/// rollup agrees with the per-row name shown elsewhere.
611fn intel_generation_from_device_id(device_id: &str) -> String {
612    let id_u16 = device_id
613        .strip_prefix("0x")
614        .and_then(|s| u16::from_str_radix(s, 16).ok());
615    match id_u16 {
616        // Alchemist DG2 — entire 0x56xx range.
617        Some(id) if (0x5690..=0x56af).contains(&id) => "Alchemist DG2".into(),
618        // Battlemage BMG-G21 — 0xe200..=0xe21f.
619        Some(id) if (0xe200..=0xe21f).contains(&id) => "Battlemage BMG".into(),
620        // Lunar Lake Xe2 iGPU.
621        Some(id) if (0x6420..=0x643f).contains(&id) => "Lunar Lake".into(),
622        // Meteor Lake Xe-LP iGPU.
623        Some(id) if (0x7d40..=0x7d6f).contains(&id) => "Meteor Lake".into(),
624        // Older iGPU families surface in the inventory but have no
625        // AV1 encode silicon — labelled by family for fleet visibility.
626        Some(id) if (0xa780..=0xa7ff).contains(&id) => "Raptor Lake".into(),
627        Some(id) if (0x4680..=0x46ff).contains(&id) => "Alder Lake".into(),
628        Some(id) if (0x9a00..=0x9aff).contains(&id) => "Tiger Lake".into(),
629        _ => "Unknown".into(),
630    }
631}
632
633/// Map an Intel PCI device id (`0xNNNN`) to a human-readable label.
634/// Discrete Arc GPUs (Alchemist DG2, Battlemage BMG) are SKU-specific
635/// where the device id is well-known so admins can tell A310 from A750
636/// in the inventory log; family-level for unknown variants. Meteor Lake
637/// / Lunar Lake / Arrow Lake iGPUs are family-level only (the AV1 QSV
638/// silicon is a property of the family, not the SKU).
639///
640/// Device-id table cross-checked against
641/// `i915_pci_ids.h` / `xe_pci.c` in upstream kernel
642/// (`drivers/gpu/drm/i915/i915_pciids.h` for DG2 + BMG entries).
643/// Catalog VRAM total in MiB for known Intel discrete SKUs. The
644/// i915 driver on the dev box's kernel doesn't expose
645/// `/sys/class/drm/card*/device/mem_info_vram_total` — that path was
646/// added later — so the live read returns zero for both Arc cards.
647/// Fall back to a static SKU table so the inventory page can at least
648/// display "this is a 4 GB card vs an 8 GB card" without depending on
649/// kernel introspection. Live `mem_used_mib` stays 0 until i915_pmu /
650/// intel_gpu_top wiring lands; that's a separate task.
651///
652/// A770 has both 8 GB and 16 GB Limited Edition variants under the
653/// same PCI device id (0x56a0). Discriminating requires the subsystem
654/// device id; for our inventory display we report the more common
655/// 8 GB SKU and accept the LE undercount as a known limitation.
656fn intel_vram_mib_from_device_id(device_id: &str) -> Option<u32> {
657    let id_u16 = device_id
658        .strip_prefix("0x")
659        .and_then(|s| u16::from_str_radix(s, 16).ok())?;
660    Some(match id_u16 {
661        // Alchemist DG2-128 (small die)
662        0x56a5 => 6 * 1024, // A380
663        0x56a6 => 4 * 1024, // A310
664        0x5693 => 4 * 1024, // A350M
665        // Alchemist DG2-512 (full die)
666        0x56a0 => 8 * 1024,  // A770 (8 GB; 16 GB LE shares this id)
667        0x56a1 => 8 * 1024,  // A750
668        0x56a2 => 8 * 1024,  // A580
669        0x5690 => 16 * 1024, // A770M (16 GB common spec)
670        0x5691 => 12 * 1024, // A730M
671        0x5692 => 8 * 1024,  // A550M
672        // Battlemage
673        0xe20b => 12 * 1024, // B580
674        0xe20c => 10 * 1024, // B570
675        // Unknown DG2 / BMG SKUs — the catalog doesn't help here, return None
676        _ => return None,
677    })
678}
679
680fn intel_label_from_device_id(device_id: &str) -> String {
681    let id_u16 = device_id
682        .strip_prefix("0x")
683        .and_then(|s| u16::from_str_radix(s, 16).ok());
684    match id_u16 {
685        // Alchemist / DG2 discrete — per-SKU mapping.
686        // DG2-128 (small die): A310 / A380 / A350M.
687        Some(0x56a5) => "Intel Arc A380".into(),
688        Some(0x56a6) => "Intel Arc A310".into(),
689        Some(0x5693) => "Intel Arc A350M".into(),
690        // DG2-512 (full die): A580 / A750 / A770 + mobile A550M..A770M.
691        Some(0x56a0) => "Intel Arc A770".into(),
692        Some(0x56a1) => "Intel Arc A750".into(),
693        Some(0x56a2) => "Intel Arc A580".into(),
694        Some(0x5690) => "Intel Arc A770M".into(),
695        Some(0x5691) => "Intel Arc A730M".into(),
696        Some(0x5692) => "Intel Arc A550M".into(),
697        // Any other device id in the DG2-reserved 0x56xx range — likely
698        // a future SKU or a workstation Pro variant we haven't tagged.
699        // Family-level fallback so AV1 dispatch still picks it up via
700        // the `contains("alchemist")` substring match.
701        Some(id) if (0x5690..=0x56af).contains(&id) => {
702            format!("Intel Arc Alchemist (DG2 0x{id:04x})")
703        }
704        // Battlemage BMG-G21 discrete — per-SKU.
705        Some(0xe20b) => "Intel Arc B580".into(),
706        Some(0xe20c) => "Intel Arc B570".into(),
707        Some(id) if (0xe200..=0xe21f).contains(&id) => {
708            format!("Intel Arc Battlemage (BMG 0x{id:04x})")
709        }
710        // Lunar Lake Xe2 iGPU (Core Ultra 2xx mobile) — has AV1 encode.
711        Some(id) if (0x6420..=0x643f).contains(&id) => "Intel Lunar Lake iGPU".into(),
712        // Meteor Lake Xe-LP iGPU (Core Ultra 1xx mobile) — has AV1 encode.
713        Some(id) if (0x7d40..=0x7d6f).contains(&id) => "Intel Meteor Lake iGPU".into(),
714        // Anything else is some flavour of older iGPU (Coffee Lake → DG1
715        // → Tiger Lake → Alder Lake → Raptor Lake) that decodes plenty
716        // of formats but doesn't have AV1 QSV.
717        Some(id) => format!("Intel iGPU 0x{id:04x}"),
718        None => "Intel GPU".into(),
719    }
720}
721
722/// Per-GPU live utilisation snapshot. Read on every load tick (5 s
723/// cadence) by the Phase 2 (2026-05-07) `worker_load` reporter and
724/// folded into the `WsGpuLeaseEntry` for the wire. NVIDIA values come
725/// from NVML; Intel values come from sysfs `gt_cur_freq_mhz` /
726/// `gt_max_freq_mhz` for a coarse "busy" proxy + `mem_info_vram_*`
727/// for memory; AMD is currently a no-op (returns all zeros) — radeontop
728/// / `amdsmi` integration is the proper fix and is deferred per the
729/// brief's "Phase 1 stand-in for Intel; AMD skipped" guidance.
730#[derive(Debug, Clone, Default)]
731pub struct GpuUtilization {
732    /// 0..=100 compute / overall GPU busy.
733    pub util_percent: u8,
734    /// 0..=100 NVENC ASIC busy (encoder pipeline).
735    pub encoder_percent: u8,
736    /// 0..=100 NVDEC ASIC busy (decoder pipeline).
737    pub decoder_percent: u8,
738    /// VRAM in use (MiB).
739    pub mem_used_mib: u32,
740    /// VRAM total (MiB) — duplicated from the static device record so
741    /// the wire entry is self-contained for the FE bar render.
742    pub mem_total_mib: u32,
743    /// Core temperature in °C; `None` when the platform path doesn't
744    /// expose it.
745    pub temperature_c: Option<u8>,
746}
747
748/// One-shot accumulator that opens NVML once and reads per-GPU
749/// utilisation for every NVIDIA device on each load tick. Holding
750/// the NVML handle across reads avoids the init cost
751/// (microseconds) on every tick and is the documented pattern.
752pub struct GpuUtilizationReader {
753    nvml: Option<nvml_wrapper::Nvml>,
754}
755
756impl GpuUtilizationReader {
757    /// Build a reader. NVML init failure is non-fatal — the reader
758    /// folds to "all zeroes" on every NVIDIA device and the rest of
759    /// the load-tick path stays alive. Logged once at startup so
760    /// operators can tell "no NVIDIA card" from "NVIDIA card but
761    /// driver missing".
762    pub fn new() -> Self {
763        let nvml = match init_nvml_with_fallback() {
764            Ok(n) => Some(n),
765            Err(e) => {
766                // info-level: many production hosts are AMD/Intel-only
767                // and this isn't a problem. Operators looking at the
768                // dev box logs see this once at boot.
769                tracing::info!(error = %e, "nvml not available; NVIDIA GPU utilisation will be 0");
770                None
771            }
772        };
773        Self { nvml }
774    }
775
776    /// Read the per-tick snapshot for one device. Cheap when NVML is
777    /// available (handful of FFI calls); free when it's not (returns
778    /// the zero-initialised default).
779    pub fn read(&self, device: &GpuDevice) -> GpuUtilization {
780        match device.vendor {
781            GpuVendor::Nvidia => self.read_nvidia(device).unwrap_or_default(),
782            GpuVendor::Intel => self.read_intel(device).unwrap_or_default(),
783            GpuVendor::Amd => GpuUtilization::default(),
784        }
785    }
786
787    fn read_nvidia(&self, device: &GpuDevice) -> Option<GpuUtilization> {
788        let nvml = self.nvml.as_ref()?;
789        let dev = nvml.device_by_index(device.index).ok()?;
790        let util = dev.utilization_rates().ok();
791        // EncoderUtilizationInfo / DecoderUtilizationInfo have a
792        // `utilization` field (0..=100) plus a sampling period; we
793        // surface only the percentage.
794        let enc = dev.encoder_utilization().ok();
795        let dec = dev.decoder_utilization().ok();
796        let mem = dev.memory_info().ok();
797        let temp = dev
798            .temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu)
799            .ok()
800            .and_then(|t| u8::try_from(t).ok());
801        Some(GpuUtilization {
802            util_percent: util.as_ref().map(|u| u.gpu.min(100) as u8).unwrap_or(0),
803            encoder_percent: enc
804                .as_ref()
805                .map(|e| e.utilization.min(100) as u8)
806                .unwrap_or(0),
807            decoder_percent: dec
808                .as_ref()
809                .map(|d| d.utilization.min(100) as u8)
810                .unwrap_or(0),
811            mem_used_mib: mem
812                .as_ref()
813                .map(|m| (m.used / 1024 / 1024) as u32)
814                .unwrap_or(0),
815            mem_total_mib: mem
816                .as_ref()
817                .map(|m| (m.total / 1024 / 1024) as u32)
818                .unwrap_or(device.vram_mib as u32),
819            temperature_c: temp,
820        })
821    }
822
823    /// Intel stand-in via sysfs `gt_cur_freq_mhz` / `gt_max_freq_mhz`
824    /// for a coarse "busy" proxy and `mem_info_vram_used` for memory.
825    /// The i915 driver doesn't expose per-engine busy% via sysfs
826    /// cleanly — `intel_gpu_top -J` is the proper source but the
827    /// fork+capture cost on every 5 s tick is heavy. Phase 1: leave
828    /// encoder/decoder at 0 and let `util_percent` be the freq-ratio
829    /// proxy; real fix is the perf event interface (`i915_pmu`)
830    /// which deserves its own task.
831    #[cfg(target_os = "linux")]
832    fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
833        // We don't have the bdf here, so walk /sys/class/drm/cardN
834        // for an Intel card. Index 0 returns the first one that
835        // matches; multi-Intel hosts (rare today) get the same
836        // utilisation reported across both — acceptable until the
837        // proper i915_pmu integration lands.
838        let mut out = GpuUtilization::default();
839        if let Ok(entries) = std::fs::read_dir("/sys/class/drm") {
840            for entry in entries.flatten() {
841                let name = entry.file_name();
842                let Some(name_str) = name.to_str() else {
843                    continue;
844                };
845                if !name_str.starts_with("card") || name_str.contains('-') {
846                    continue;
847                }
848                // Confirm Intel via vendor file under device link.
849                let device_link = entry.path().join("device").join("vendor");
850                let vendor = std::fs::read_to_string(&device_link).unwrap_or_default();
851                if vendor.trim() != "0x8086" {
852                    continue;
853                }
854                let cur = std::fs::read_to_string(entry.path().join("gt_cur_freq_mhz"))
855                    .ok()
856                    .and_then(|s| s.trim().parse::<u32>().ok());
857                let max = std::fs::read_to_string(entry.path().join("gt_max_freq_mhz"))
858                    .ok()
859                    .and_then(|s| s.trim().parse::<u32>().ok());
860                if let (Some(cur), Some(max)) = (cur, max) {
861                    if max > 0 {
862                        out.util_percent = ((cur as u64 * 100 / max as u64).min(100)) as u8;
863                    }
864                }
865                let used =
866                    std::fs::read_to_string(entry.path().join("device").join("mem_info_vram_used"))
867                        .ok()
868                        .and_then(|s| s.trim().parse::<u64>().ok());
869                let total = std::fs::read_to_string(
870                    entry.path().join("device").join("mem_info_vram_total"),
871                )
872                .ok()
873                .and_then(|s| s.trim().parse::<u64>().ok());
874                if let Some(u) = used {
875                    out.mem_used_mib = (u / 1024 / 1024) as u32;
876                }
877                if let Some(t) = total {
878                    out.mem_total_mib = (t / 1024 / 1024) as u32;
879                }
880                // Fall back to the catalog VRAM total stored on the
881                // device record when sysfs didn't expose it. The dev
882                // box's kernel doesn't have mem_info_vram_total, so
883                // without this Intel cards report 0 / 0 forever.
884                if out.mem_total_mib == 0 && _device.vram_mib > 0 {
885                    out.mem_total_mib = _device.vram_mib as u32;
886                }
887                // Fall back to DRM fdinfo aggregation when sysfs didn't
888                // expose `mem_info_vram_used` (older kernels). Filtered
889                // to this card's PCI BDF so multi-Intel hosts report
890                // per-device used memory, not the cross-card total.
891                // This is the same source `intel_gpu_top -J` and `nvtop`
892                // use, available since kernel ~5.19 (i915) / ~6.8 (xe).
893                if out.mem_used_mib == 0 {
894                    let bdf = read_pci_bdf_from_drm_card(&entry.path());
895                    if let Some(bytes) = read_intel_vram_resident_bytes(bdf.as_deref()) {
896                        out.mem_used_mib = (bytes / 1024 / 1024) as u32;
897                    }
898                }
899                return Some(out);
900            }
901        }
902        if out.mem_total_mib == 0 && _device.vram_mib > 0 {
903            out.mem_total_mib = _device.vram_mib as u32;
904        }
905        Some(out)
906    }
907
908    #[cfg(not(target_os = "linux"))]
909    fn read_intel(&self, _device: &GpuDevice) -> Option<GpuUtilization> {
910        // Windows path for Intel hosts is performance-counter via
911        // the WMI `Win32_PerfFormattedData_GPUPerformanceCounters_GPUEngine`
912        // surface — same fork-cost concern as `intel_gpu_top` on
913        // Linux, deferred. Returns all zeroes.
914        Some(GpuUtilization::default())
915    }
916}
917
918/// Resolve the PCI BDF (e.g. `0000:03:00.0`) backing a
919/// `/sys/class/drm/cardN` entry. The `device` symlink under the card
920/// dir always points to the PCI device node — the file_name segment
921/// of the resolved path IS the BDF. Returns None on read_link failure
922/// (non-PCI virtual GPUs etc.).
923#[cfg(target_os = "linux")]
924fn read_pci_bdf_from_drm_card(card_dir: &std::path::Path) -> Option<String> {
925    let target = std::fs::read_link(card_dir.join("device")).ok()?;
926    target
927        .file_name()
928        .and_then(|n| n.to_str())
929        .map(|s| s.to_string())
930}
931
932/// Aggregate Intel VRAM bytes resident across every DRM client by
933/// walking `/proc/*/fdinfo/*`. The kernel exposes per-fd accounting
934/// in DRM fdinfo (i915 since ~5.19, xe driver since ~6.8); summing
935/// `drm-resident-local0` across all clients gives the same number
936/// `intel_gpu_top -J` reports for "VRAM used".
937///
938/// When `bdf_filter` is `Some(...)`, only fdinfo entries whose
939/// `drm-pdev:` matches that BDF are counted — the multi-Intel case
940/// (the dev box has Arc A750 + Arc A310 today) gets per-card
941/// accounting instead of a cross-card total. When `None`, every
942/// Intel client is summed.
943///
944/// Returns `None` when no Intel DRM clients are visible (rather than
945/// `Some(0)`) so the caller can distinguish "no usage right now"
946/// from "fdinfo path not available on this kernel" — the former
947/// shouldn't trigger a different fallback, the latter could.
948#[cfg(target_os = "linux")]
949fn read_intel_vram_resident_bytes(bdf_filter: Option<&str>) -> Option<u64> {
950    let proc_dir = std::fs::read_dir("/proc").ok()?;
951    let mut total_bytes: u64 = 0;
952    let mut found_any_intel_client = false;
953
954    for proc_entry in proc_dir.flatten() {
955        let pid_name = proc_entry.file_name();
956        let Some(pid_str) = pid_name.to_str() else {
957            continue;
958        };
959        if !pid_str.bytes().all(|b| b.is_ascii_digit()) {
960            continue;
961        }
962        let fdinfo_dir = proc_entry.path().join("fdinfo");
963        let Ok(fd_entries) = std::fs::read_dir(&fdinfo_dir) else {
964            continue;
965        };
966
967        for fd_entry in fd_entries.flatten() {
968            // fdinfo files for non-DRM fds are short and have no
969            // "drm-driver" key — read_to_string is cheap on those.
970            // Permission errors on other-user processes also fall
971            // through silently (the transcoder runs as root in our
972            // production container, so this is rare in practice).
973            let Ok(content) = std::fs::read_to_string(fd_entry.path()) else {
974                continue;
975            };
976            if !content.contains("drm-driver:") {
977                continue;
978            }
979            // Match either i915 (mainline Intel driver) or xe (newer
980            // Intel driver shipping with kernel 6.8+; takes over Arc
981            // discrete cards). Whitespace between key and value is a
982            // single tab in i915's emitter and a single space in xe's
983            // — accept both.
984            let is_intel = content
985                .lines()
986                .filter_map(|l| l.strip_prefix("drm-driver:"))
987                .any(|v| {
988                    let v = v.trim();
989                    v == "i915" || v == "xe"
990                });
991            if !is_intel {
992                continue;
993            }
994            // Optional BDF filter — only count clients on the card we
995            // care about. drm-pdev format is `drm-pdev: 0000:03:00.0`.
996            if let Some(want_bdf) = bdf_filter {
997                let matches = content
998                    .lines()
999                    .filter_map(|l| l.strip_prefix("drm-pdev:"))
1000                    .any(|v| v.trim() == want_bdf);
1001                if !matches {
1002                    continue;
1003                }
1004            }
1005            found_any_intel_client = true;
1006            // Sum drm-resident-local0 across the client. "local0" is
1007            // the i915/xe naming for the on-card VRAM region; values
1008            // are formatted as "<num> <unit>" with unit ∈ {B, KiB,
1009            // MiB, GiB} per drm-fdinfo.rst.
1010            for line in content.lines() {
1011                if let Some(rest) = line.strip_prefix("drm-resident-local0:") {
1012                    if let Some(bytes) = parse_drm_size(rest) {
1013                        total_bytes = total_bytes.saturating_add(bytes);
1014                    }
1015                }
1016            }
1017        }
1018    }
1019
1020    if found_any_intel_client {
1021        Some(total_bytes)
1022    } else {
1023        None
1024    }
1025}
1026
1027/// Parse a DRM fdinfo size value: `<number> <unit>` where unit is
1028/// one of B / KiB / MiB / GiB. Bare numbers are treated as bytes.
1029/// Returns None on garbage input.
1030#[cfg(target_os = "linux")]
1031fn parse_drm_size(s: &str) -> Option<u64> {
1032    let trimmed = s.trim();
1033    let mut parts = trimmed.split_whitespace();
1034    let num: u64 = parts.next()?.parse().ok()?;
1035    let unit = parts.next().unwrap_or("B");
1036    let multiplier: u64 = match unit {
1037        "B" | "" => 1,
1038        "KiB" => 1024,
1039        "MiB" => 1024 * 1024,
1040        "GiB" => 1024 * 1024 * 1024,
1041        _ => return None,
1042    };
1043    Some(num.saturating_mul(multiplier))
1044}
1045
1046impl Default for GpuUtilizationReader {
1047    fn default() -> Self {
1048        Self::new()
1049    }
1050}
1051
1052pub fn supports_av1_encode(device: &GpuDevice) -> bool {
1053    match device.vendor {
1054        // NVIDIA: defer to the **real driver capability query**, not a
1055        // board-name list. The substring list this used to carry was brittle —
1056        // every new SKU had to be added by hand, and a missed one (e.g. the
1057        // RTX 5060 once was) now *hard-fails* the job since there's no CPU
1058        // fallback. NVENC AV1 support is authoritatively validated by
1059        // `nvEncGetEncodeCaps` / `GetEncodeGUIDs` in `NvencEncoder::new`, which
1060        // enumerates the GPU's actual encode codecs and bails cleanly if AV1
1061        // isn't among them (verified on an RTX 3090: "2 codec(s), none AV1").
1062        // So admit every NVIDIA GPU here and let the real query be the gate.
1063        GpuVendor::Nvidia => true,
1064        // AMD: defer to the real path. AV1 VCN encode is RDNA3+ (RX 7000+), but
1065        // rather than a brittle SKU list, `AmfEncoder::new` is authoritative —
1066        // AMF `CreateComponent(AMFVideoEncoderVCN_AV1)` fails on a pre-RDNA3 GPU
1067        // and we bail cleanly ("RDNA3+ GPU required"). Admit every AMD GPU here
1068        // and let that decide (matches the NVIDIA policy above).
1069        GpuVendor::Amd => true,
1070        // Intel: defer to the real path. AV1 QSV is Arc / Meteor Lake+, but
1071        // rather than a brittle family-name list, `QsvEncoder::new` is
1072        // authoritative — `MFXVideoENCODE_Query` (+ Init) reports whether the
1073        // GPU's oneVPL implementation supports AV1, and we bail cleanly if not.
1074        // Admit every Intel GPU here and let that decide (matches NVIDIA/AMD).
1075        GpuVendor::Intel => true,
1076    }
1077}
1078