use std::collections::HashMap;
#[derive(Debug, Clone, Copy, Default)]
pub struct ProcGpu {
pub gpu_pct: Option<f32>,
pub gpu_mem_bytes: Option<u64>,
}
#[derive(Debug, Default)]
pub struct ProcGpuCollector {
#[cfg(target_os = "linux")]
prev_engine_ns: HashMap<u32, (u64, std::time::Instant)>,
}
impl ProcGpuCollector {
pub fn new() -> Self {
Self::default()
}
pub fn sample(&mut self) -> HashMap<u32, ProcGpu> {
#[cfg(target_os = "linux")]
{
let mut out = self.sample_linux_fdinfo();
#[cfg(feature = "gpu-nvidia")]
sample_nvml_into(&mut out);
return out;
}
#[cfg(not(target_os = "linux"))]
{
HashMap::new()
}
}
#[cfg(target_os = "linux")]
fn sample_linux_fdinfo(&mut self) -> HashMap<u32, ProcGpu> {
use std::fs;
let mut out: HashMap<u32, ProcGpu> = HashMap::new();
let mut totals: HashMap<u32, (u64, u64)> = HashMap::new();
let Ok(proc_iter) = fs::read_dir("/proc") else {
return out;
};
for entry in proc_iter.flatten() {
let Some(pid_str) = entry.file_name().to_str().map(|s| s.to_string()) else {
continue;
};
let Ok(pid) = pid_str.parse::<u32>() else {
continue;
};
let fd_dir = entry.path().join("fd");
let fdinfo_dir = entry.path().join("fdinfo");
let Ok(fd_iter) = fs::read_dir(&fd_dir) else {
continue;
};
for fd_entry in fd_iter.flatten() {
let target = match fs::read_link(fd_entry.path()) {
Ok(p) => p,
Err(_) => continue,
};
if !target.starts_with("/dev/dri/") {
continue;
}
let fd_name = fd_entry.file_name();
let fdinfo_path = fdinfo_dir.join(&fd_name);
let Ok(text) = fs::read_to_string(&fdinfo_path) else {
continue;
};
let parsed = parse_fdinfo_drm(&text);
let entry = totals.entry(pid).or_insert((0, 0));
entry.0 = entry.0.saturating_add(parsed.engine_ns);
entry.1 = entry.1.saturating_add(parsed.mem_bytes);
}
}
let now = std::time::Instant::now();
for (pid, (cur_ns, mem_bytes)) in totals {
let pct = if let Some((prev_ns, prev_t)) = self.prev_engine_ns.get(&pid) {
let dt = now.duration_since(*prev_t).as_secs_f64();
if dt > 0.0 {
let dns = cur_ns.saturating_sub(*prev_ns) as f64;
let frac = (dns / 1_000_000_000.0) / dt;
Some((frac * 100.0).clamp(0.0, 100.0) as f32)
} else {
None
}
} else {
None
};
self.prev_engine_ns.insert(pid, (cur_ns, now));
out.insert(
pid,
ProcGpu {
gpu_pct: pct,
gpu_mem_bytes: if mem_bytes > 0 { Some(mem_bytes) } else { None },
},
);
}
let live: std::collections::HashSet<u32> = out.keys().copied().collect();
self.prev_engine_ns.retain(|pid, _| live.contains(pid));
out
}
}
#[cfg(any(target_os = "linux", test))]
#[derive(Debug, Default, PartialEq)]
pub(crate) struct FdinfoDrm {
pub engine_ns: u64,
pub mem_bytes: u64,
}
#[cfg(any(target_os = "linux", test))]
pub(crate) fn parse_fdinfo_drm(text: &str) -> FdinfoDrm {
let mut out = FdinfoDrm::default();
for line in text.lines() {
let Some((key, rest)) = line.split_once(':') else {
continue;
};
let key = key.trim();
let rest = rest.trim();
let mut parts = rest.split_whitespace();
let Some(num_str) = parts.next() else {
continue;
};
let unit = parts.next().unwrap_or("");
let Ok(n) = num_str.parse::<u64>() else {
continue;
};
if key.starts_with("drm-engine-") {
out.engine_ns = out.engine_ns.saturating_add(n);
} else if key.starts_with("drm-memory-") {
let bytes = match unit {
"KiB" => n.saturating_mul(1024),
"MiB" => n.saturating_mul(1024 * 1024),
"GiB" => n.saturating_mul(1024 * 1024 * 1024),
_ => n, };
out.mem_bytes = out.mem_bytes.saturating_add(bytes);
}
}
out
}
#[cfg(all(target_os = "linux", feature = "gpu-nvidia"))]
fn sample_nvml_into(out: &mut HashMap<u32, ProcGpu>) {
use nvml_wrapper::Nvml;
use std::sync::OnceLock;
static NVML: OnceLock<Option<Nvml>> = OnceLock::new();
let Some(nvml) = NVML.get_or_init(|| Nvml::init().ok()).as_ref() else {
return;
};
let count = nvml.device_count().unwrap_or(0);
for i in 0..count {
let Ok(dev) = nvml.device_by_index(i) else {
continue;
};
if let Ok(procs) = dev.running_compute_processes() {
for pi in procs {
let entry = out.entry(pi.pid as u32).or_default();
if let nvml_wrapper::enums::device::UsedGpuMemory::Used(b) = pi.used_gpu_memory {
entry.gpu_mem_bytes = Some(entry.gpu_mem_bytes.unwrap_or(0).saturating_add(b));
}
}
}
if let Ok(procs) = dev.running_graphics_processes() {
for pi in procs {
let entry = out.entry(pi.pid as u32).or_default();
if let nvml_wrapper::enums::device::UsedGpuMemory::Used(b) = pi.used_gpu_memory {
entry.gpu_mem_bytes = Some(entry.gpu_mem_bytes.unwrap_or(0).saturating_add(b));
}
}
}
if let Ok(samples) = dev.process_utilization_stats(0) {
let mut latest: HashMap<u32, (u64, u32)> = HashMap::new(); for s in samples {
let pid = s.pid as u32;
let prev_ts = latest.get(&pid).map(|(t, _)| *t).unwrap_or(0);
if s.timestamp >= prev_ts {
latest.insert(pid, (s.timestamp, s.sm_util));
}
}
for (pid, (_, util)) in latest {
let entry = out.entry(pid).or_default();
entry.gpu_pct = Some((util as f32).clamp(0.0, 100.0));
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_amdgpu_fdinfo() {
let sample = "\
pos: 0
flags: 02100002
mnt_id: 30
ino: 1234
drm-driver: amdgpu
drm-client-id: 42
drm-engine-gfx: 1234567890 ns
drm-engine-compute: 9876543210 ns
drm-memory-vram: 524288 KiB
drm-memory-gtt: 65536 KiB
";
let parsed = parse_fdinfo_drm(sample);
assert_eq!(parsed.engine_ns, 1234567890 + 9876543210);
assert_eq!(parsed.mem_bytes, (524288 + 65536) * 1024);
}
#[test]
fn parses_intel_fdinfo() {
let sample = "\
drm-driver: i915
drm-engine-render: 4500000000 ns
drm-engine-blitter: 100000 ns
drm-engine-video: 0 ns
drm-engine-video-enhance: 0 ns
drm-memory-system: 16384 KiB
";
let parsed = parse_fdinfo_drm(sample);
assert_eq!(parsed.engine_ns, 4500000000 + 100000);
assert_eq!(parsed.mem_bytes, 16384 * 1024);
}
#[test]
fn ignores_non_drm_lines() {
let sample = "\
pos: 0
flags: 02100002
mnt_id: 30
size: 12345
";
let parsed = parse_fdinfo_drm(sample);
assert_eq!(parsed, FdinfoDrm::default());
}
#[test]
fn handles_missing_unit_as_bytes() {
let sample = "drm-memory-vram: 4096\n";
let parsed = parse_fdinfo_drm(sample);
assert_eq!(parsed.mem_bytes, 4096);
}
#[test]
fn handles_garbled_value() {
let sample = "drm-engine-gfx: not-a-number ns\n";
let parsed = parse_fdinfo_drm(sample);
assert_eq!(parsed.engine_ns, 0);
}
}