Skip to main content

codec/gpu/
mod.rs

1//! GPU device enumeration for NVDEC/NVENC scheduling.
2//!
3//! NVIDIA detection loads libcuda via dlopen, calls cuInit +
4//! cuDeviceGetCount + cuDeviceGetName. This works on minimal container
5//! images where the `nvidia-smi` binary may be absent but the driver's
6//! user-mode libraries are bind-mounted by the NVIDIA Container Toolkit.
7//! AMD/Intel detection scans /sys/bus/pci/devices on Linux.
8
9mod amd;
10mod intel;
11mod nvidia;
12mod sysfs;
13mod types;
14mod utilization;
15
16pub use types::{GpuDevice, GpuUtilization, GpuVendor};
17pub use utilization::GpuUtilizationReader;
18
19pub fn detect_gpus() -> Vec<GpuDevice> {
20    let mut devices = Vec::new();
21    devices.extend(nvidia::detect_nvidia());
22    devices.extend(amd::detect_amd());
23    devices.extend(intel::detect_intel());
24    // Each detect_* numbers its own vendor from 0 (kept as `vendor_index`).
25    // Assign the GLOBAL `index` here so a mixed host (e.g. NVIDIA + AMD iGPU)
26    // gets unique, user-addressable indices instead of colliding 0s.
27    for (i, d) in devices.iter_mut().enumerate() {
28        d.index = i as u32;
29    }
30    devices
31}
32
33/// Human-readable manufacturer label. Used by the WS hello frame's
34/// `WsGpuInfo.manufacturer` field and by the admin inventory page's
35/// "by manufacturer" rollup. Stays in lockstep with `vendor_label` in
36/// `transcoder/src/capabilities.rs` so the registration POST + the
37/// hello frame agree on the spelling.
38pub fn manufacturer_label(v: GpuVendor) -> &'static str {
39    match v {
40        GpuVendor::Nvidia => "NVIDIA",
41        GpuVendor::Amd => "AMD",
42        GpuVendor::Intel => "Intel",
43    }
44}
45
46pub fn has_nvidia() -> bool {
47    !nvidia::detect_nvidia().is_empty()
48}
49
50pub fn supports_av1_encode(device: &GpuDevice) -> bool {
51    match device.vendor {
52        // NVIDIA: defer to the **real driver capability query**, not a
53        // board-name list. The substring list this used to carry was brittle —
54        // every new SKU had to be added by hand, and a missed one (e.g. the
55        // RTX 5060 once was) now *hard-fails* the job since there's no CPU
56        // fallback. NVENC AV1 support is authoritatively validated by
57        // `nvEncGetEncodeCaps` / `GetEncodeGUIDs` in `NvencEncoder::new`, which
58        // enumerates the GPU's actual encode codecs and bails cleanly if AV1
59        // isn't among them (verified on an RTX 3090: "2 codec(s), none AV1").
60        // So admit every NVIDIA GPU here and let the real query be the gate.
61        GpuVendor::Nvidia => true,
62        // AMD: defer to the real path. AV1 VCN encode is RDNA3+ (RX 7000+), but
63        // rather than a brittle SKU list, `AmfEncoder::new` is authoritative —
64        // AMF `CreateComponent(AMFVideoEncoderVCN_AV1)` fails on a pre-RDNA3 GPU
65        // and we bail cleanly ("RDNA3+ GPU required"). Admit every AMD GPU here
66        // and let that decide (matches the NVIDIA policy above).
67        GpuVendor::Amd => true,
68        // Intel: defer to the real path. AV1 QSV is Arc / Meteor Lake+, but
69        // rather than a brittle family-name list, `QsvEncoder::new` is
70        // authoritative — `MFXVideoENCODE_Query` (+ Init) reports whether the
71        // GPU's oneVPL implementation supports AV1, and we bail cleanly if not.
72        // Admit every Intel GPU here and let that decide (matches NVIDIA/AMD).
73        GpuVendor::Intel => true,
74    }
75}
76
77// ─── Windows helpers ─────────────────────────────────────────────────────────
78
79/// Enumerate the host's video controllers on Windows via WMI
80/// (`Get-CimInstance Win32_VideoController`). Cached for the process — the query
81/// spawns PowerShell (~hundreds of ms) and the GPU set is stable per run.
82/// Returns `(name, vendor_id, device_id)` per controller; empty if PowerShell
83/// is unavailable. The Linux paths read `/sys` directly and don't use this.
84#[cfg(windows)]
85fn windows_video_controllers() -> &'static [(String, u16, u16)] {
86    use std::sync::OnceLock;
87    static CACHE: OnceLock<Vec<(String, u16, u16)>> = OnceLock::new();
88    CACHE.get_or_init(|| {
89        let output = std::process::Command::new("powershell")
90            .args([
91                "-NoProfile",
92                "-NonInteractive",
93                "-Command",
94                "Get-CimInstance Win32_VideoController | \
95                 ForEach-Object { \"$($_.Name)|$($_.PNPDeviceID)\" }",
96            ])
97            .output();
98        let Ok(output) = output else {
99            return Vec::new();
100        };
101        String::from_utf8_lossy(&output.stdout)
102            .lines()
103            .filter_map(|line| {
104                let (name, pnp) = line.split_once('|')?;
105                let vendor_id = win_hex_after(pnp, "VEN_")?;
106                let device_id = win_hex_after(pnp, "DEV_").unwrap_or(0);
107                Some((name.trim().to_string(), vendor_id, device_id))
108            })
109            .collect()
110    })
111}
112
113/// Extract the 4 hex digits following `marker` (e.g. `VEN_1002`) from a Windows
114/// PNPDeviceID, as a `u16`.
115#[cfg(windows)]
116fn win_hex_after(s: &str, marker: &str) -> Option<u16> {
117    let start = s.find(marker)? + marker.len();
118    let hex: String = s[start..].chars().take(4).collect();
119    u16::from_str_radix(&hex, 16).ok()
120}
121
122/// Build a `GpuDevice` list from the Windows controllers of one PCI vendor.
123/// `vendor_index`/`index` are vendor-local here; `detect_gpus()` reassigns the
124/// global `index`.
125#[cfg(windows)]
126fn detect_windows_vendor(vendor: GpuVendor, vendor_id: u16) -> Vec<GpuDevice> {
127    let vendor_hex = format!("0x{vendor_id:04x}");
128    windows_video_controllers()
129        .iter()
130        .filter(|(_, vid, _)| *vid == vendor_id)
131        .enumerate()
132        .map(|(idx, (name, _vid, did))| {
133            let device = format!("0x{did:04x}");
134            let generation = match vendor {
135                GpuVendor::Amd => amd::amd_generation_from_device_id(&device),
136                GpuVendor::Intel => intel::intel_generation_from_device_id(&device),
137                GpuVendor::Nvidia => "Unknown".into(),
138            };
139            GpuDevice {
140                vendor,
141                name: name.clone(),
142                index: idx as u32,
143                vendor_index: idx as u32,
144                generation,
145                pci_id: format!("{vendor_hex}:{device}"),
146                vram_mib: 0, // WMI AdapterRAM is u32-capped + unreliable
147                serial: None,
148                host_pci_address: String::new(),
149                vendor_id_hex: vendor_hex.clone(),
150            }
151        })
152        .collect()
153}