use super::GpuBackend;
pub fn probe() -> Option<Box<dyn GpuBackend>> {
#[cfg(target_os = "linux")]
if let Some(b) = linux_impl::probe() {
return Some(b);
}
None
}
#[cfg(target_os = "linux")]
mod linux_impl {
use crate::backend::linux::{
self, card_name, cards_with_vendor, first_dir, hwmon_u64, pdev_of, read_trim, read_u64,
};
use crate::backend::{GpuBackend, GpuProcess, GpuSnapshot, clamp_pct};
use anyhow::Result;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::Instant;
const AMD_VENDOR: &str = "0x1002";
pub fn probe() -> Option<Box<dyn GpuBackend>> {
let devices = scan("/sys/class/drm");
if devices.is_empty() {
return None;
}
Some(Box::new(AmdBackend {
devices,
engine_state: HashMap::new(),
last_procs: Vec::new(),
}))
}
struct AmdDevice {
name: String,
dev: PathBuf,
hwmon: Option<PathBuf>,
pdev: Option<String>,
temp_crit_c: Option<f64>,
temp_junction_ch: Option<u8>,
temp_mem_ch: Option<u8>,
}
struct AmdBackend {
devices: Vec<AmdDevice>,
engine_state: HashMap<(u32, u64), (u64, u64, Instant)>,
last_procs: Vec<GpuProcess>,
}
fn is_video_engine(name: &str) -> bool {
name.starts_with("dec")
|| name.starts_with("enc")
|| name.starts_with("jpeg")
|| name.starts_with("vcn")
|| name.starts_with("vpe")
}
impl GpuBackend for AmdBackend {
fn name(&self) -> &'static str {
"amdgpu"
}
fn poll(&mut self) -> Result<Vec<GpuSnapshot>> {
let video_util = self.sweep_clients();
Ok(self
.devices
.iter()
.enumerate()
.map(|(i, d)| sample(d, video_util.get(&i).copied()))
.collect())
}
fn processes(&mut self) -> Vec<GpuProcess> {
self.last_procs.clone()
}
fn driver_info(&self) -> Option<String> {
kernel_release().map(|k| format!("amdgpu · kernel {k}"))
}
}
impl AmdBackend {
fn sweep_clients(&mut self) -> HashMap<usize, f64> {
let pdev_to_gpu: HashMap<&str, usize> = self
.devices
.iter()
.enumerate()
.filter_map(|(i, d)| d.pdev.as_deref().map(|p| (p, i)))
.collect();
let mut agg: HashMap<(u32, usize), (f64, u64, bool)> = HashMap::new();
let mut video_util: HashMap<usize, f64> = HashMap::new();
let mut seen_clients: HashSet<(u32, u64)> = HashSet::new();
let now = Instant::now();
for pid in linux::proc_pids() {
for client in linux::drm_clients(pid, "amdgpu") {
let Some(&gpu) = client.pdev.as_deref().and_then(|p| pdev_to_gpu.get(p)) else {
continue;
};
if !seen_clients.insert((pid, client.id)) {
continue;
}
let engine_ns = client.total_engine_ns();
let video_ns = client.engine_ns_where(is_video_engine);
let (util, vutil) = linux::ns_delta_util(
self.engine_state.get(&(pid, client.id)),
engine_ns,
video_ns,
now,
);
self.engine_state
.insert((pid, client.id), (engine_ns, video_ns, now));
*video_util.entry(gpu).or_default() += vutil;
let e = agg.entry((pid, gpu)).or_insert((0.0, 0, false));
e.0 += util;
e.1 += client.memory.get("vram").copied().unwrap_or(0);
e.2 |= client.engine_ns.get("gfx").copied().unwrap_or(0) > 0;
}
}
self.engine_state.retain(|k, _| seen_clients.contains(k));
self.last_procs = agg
.into_iter()
.map(|((pid, gpu_index), (util, vram, graphics))| {
linux::build_proc(pid, gpu_index, util, vram, graphics)
})
.collect();
video_util
}
}
fn scan(drm: &str) -> Vec<AmdDevice> {
cards_with_vendor(drm, AMD_VENDOR)
.into_iter()
.map(|(idx, dev)| {
let name = card_name(&dev, idx, "1002", "AMD");
let hwmon = first_dir(&dev.join("hwmon"));
let pdev = pdev_of(&dev);
let temp_crit_c = hwmon
.as_deref()
.and_then(|h| read_u64(&h.join("temp1_crit")))
.map(|v| v as f64 / 1000.0);
let mut temp_junction_ch = None;
let mut temp_mem_ch = None;
if let Some(h) = hwmon.as_deref() {
for ch in 2u8..=4 {
match read_trim(&h.join(format!("temp{ch}_label"))).as_deref() {
Some("junction") => temp_junction_ch = Some(ch),
Some("mem") => temp_mem_ch = Some(ch),
_ => {}
}
}
}
AmdDevice {
name,
dev,
hwmon,
pdev,
temp_crit_c,
temp_junction_ch,
temp_mem_ch,
}
})
.collect()
}
fn sample(d: &AmdDevice, video_util: Option<f64>) -> GpuSnapshot {
let h = d.hwmon.as_deref();
let temperature_c = hwmon_u64(h, "temp1_input").map(|v| v as f64 / 1000.0);
let power_w = hwmon_u64(h, "power1_average")
.or_else(|| hwmon_u64(h, "power1_input"))
.map(|v| v as f64 / 1e6);
let power_limit_w = hwmon_u64(h, "power1_cap")
.filter(|v| *v > 0)
.or_else(|| hwmon_u64(h, "power1_cap_default").filter(|v| *v > 0))
.map(|v| v as f64 / 1e6);
let mut throttle_parts: Vec<&str> = Vec::new();
if let (Some(t), Some(crit)) = (temperature_c, d.temp_crit_c)
&& t >= crit - 3.0
{
throttle_parts.push("thermal");
}
if let (Some(w), Some(cap)) = (power_w, power_limit_w)
&& w >= cap * 0.99
{
throttle_parts.push("power-limit");
}
let throttle = crate::backend::join_throttle(&throttle_parts);
GpuSnapshot {
name: d.name.clone(),
integrated: is_apu(&d.dev),
utilization_pct: read_u64(&d.dev.join("gpu_busy_percent")).unwrap_or(0) as f64,
mem_util_pct: read_u64(&d.dev.join("mem_busy_percent")).map(|v| v as f64),
video_util_pct: video_util.map(clamp_pct),
enc_util_pct: None,
dec_util_pct: None,
throttle,
vram_used_bytes: read_u64(&d.dev.join("mem_info_vram_used")).unwrap_or(0),
vram_total_bytes: read_u64(&d.dev.join("mem_info_vram_total")).unwrap_or(0),
temperature_c,
temp_junction_c: d
.temp_junction_ch
.and_then(|ch| hwmon_u64(h, &format!("temp{ch}_input")))
.map(|v| v as f64 / 1000.0),
temp_mem_c: d
.temp_mem_ch
.and_then(|ch| hwmon_u64(h, &format!("temp{ch}_input")))
.map(|v| v as f64 / 1000.0),
power_w,
power_limit_w,
fan_pct: fan_pct(h),
fan_rpm: hwmon_u64(h, "fan1_input"),
clock_mhz: hwmon_u64(h, "freq1_input")
.map(|v| v / 1_000_000)
.or_else(|| dpm_active_mhz(&d.dev.join("pp_dpm_sclk"))),
mem_clock_mhz: hwmon_u64(h, "freq2_input")
.map(|v| v / 1_000_000)
.or_else(|| dpm_active_mhz(&d.dev.join("pp_dpm_mclk"))),
pcie_gen: read_trim(&d.dev.join("current_link_speed"))
.as_deref()
.and_then(gts_to_gen),
pcie_width: read_trim(&d.dev.join("current_link_width")).and_then(|w| w.parse().ok()),
pcie_max_gen: read_trim(&d.dev.join("max_link_speed"))
.as_deref()
.and_then(gts_to_gen),
pcie_max_width: read_trim(&d.dev.join("max_link_width")).and_then(|w| w.parse().ok()),
pcie_rx_kbs: None,
pcie_tx_kbs: None,
gtt_used_bytes: read_u64(&d.dev.join("mem_info_gtt_used")),
gtt_total_bytes: read_u64(&d.dev.join("mem_info_gtt_total")),
volt_mv: hwmon_u64(h, "in0_input"),
perf_level: read_trim(&d.dev.join("power_dpm_force_performance_level"))
.filter(|l| l != "auto"),
}
}
fn is_apu(dev: &Path) -> bool {
fs::read(dev.join("gpu_metrics"))
.ok()
.and_then(|b| b.get(2).copied())
.is_some_and(|rev| rev >= 2)
}
fn gts_to_gen(speed: &str) -> Option<u8> {
let gts: f64 = speed.split_whitespace().next()?.parse().ok()?;
Some(match gts {
s if s >= 128.0 => 7,
s if s >= 64.0 => 6,
s if s >= 32.0 => 5,
s if s >= 16.0 => 4,
s if s >= 8.0 => 3,
s if s >= 5.0 => 2,
_ => 1,
})
}
fn dpm_active_mhz(path: &Path) -> Option<u64> {
let table = read_trim(path)?;
let active = table.lines().find(|l| l.trim_end().ends_with('*'))?;
let digits: String = active
.split(':')
.nth(1)?
.trim()
.chars()
.take_while(char::is_ascii_digit)
.collect();
digits.parse().ok()
}
fn fan_pct(h: Option<&Path>) -> Option<f64> {
let pwm = hwmon_u64(h, "pwm1")?;
let max = hwmon_u64(h, "pwm1_max").filter(|v| *v > 0).unwrap_or(255);
Some(pwm as f64 / max as f64 * 100.0)
}
fn kernel_release() -> Option<String> {
sysinfo::System::kernel_version()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pcie_gen_from_gts_string() {
assert_eq!(gts_to_gen("2.5 GT/s PCIe"), Some(1));
assert_eq!(gts_to_gen("8.0 GT/s PCIe"), Some(3));
assert_eq!(gts_to_gen("16.0 GT/s PCIe"), Some(4));
assert_eq!(gts_to_gen("32.0 GT/s PCIe"), Some(5));
assert_eq!(gts_to_gen("garbage"), None);
}
#[test]
fn dpm_table_active_level_parses() {
let dir = std::env::temp_dir().join("gpur-dpm-test");
std::fs::create_dir_all(&dir).unwrap();
let f = dir.join("pp_dpm_mclk");
std::fs::write(&f, "0: 96Mhz\n1: 3000Mhz *\n2: 1249Mhz\n").unwrap();
assert_eq!(dpm_active_mhz(&f), Some(3000));
std::fs::write(&f, "S: 0Mhz *\n").unwrap();
assert_eq!(dpm_active_mhz(&f), Some(0));
}
#[test]
#[ignore = "requires AMD hardware; run with --ignored --nocapture"]
fn live_poll_reports_devices() {
let mut backend = probe().expect("no amdgpu devices visible in /sys/class/drm");
let gpus = backend.poll().unwrap();
assert!(!gpus.is_empty());
for g in &gpus {
println!(
"{}: util={}% vram={}/{}MiB temp={:?}C power={:?}W fan={:?}% core={:?}MHz mem={:?}MHz",
g.name,
g.utilization_pct,
g.vram_used_bytes / 1024 / 1024,
g.vram_total_bytes / 1024 / 1024,
g.temperature_c,
g.power_w,
g.fan_pct,
g.clock_mhz,
g.mem_clock_mhz,
);
assert!(g.vram_total_bytes > 0, "vram total should be nonzero");
}
let _ = backend.processes();
std::thread::sleep(std::time::Duration::from_millis(300));
backend.poll().unwrap();
let procs = backend.processes();
for p in &procs {
println!(
"pid={} gpu={} kind={:?} util={:?}% vram={}MiB",
p.pid,
p.gpu_index,
p.kind,
p.gpu_util_pct,
p.gpu_mem_bytes / 1024 / 1024,
);
}
println!("{} gpu processes", procs.len());
}
}
}