gpu-histop 0.1.0

High-resolution GPU history monitor for NVIDIA, AMDGPU, and Apple Silicon
Documentation
use std::collections::HashMap;
use std::fs;
use std::process::Command;
use std::time::Instant;

use anyhow::{Context, Result};
use nvml_wrapper::enum_wrappers::device::{Clock, TemperatureSensor};
use nvml_wrapper::enums::device::UsedGpuMemory;
use nvml_wrapper::struct_wrappers::device::ProcessInfo;
use nvml_wrapper::{Device, Nvml};

use crate::backend::{GpuBackend, require_devices};
use crate::model::{GpuInfo, GpuProcess, GpuProcessKind, GpuSample};

pub struct NvmlBackend {
    nvml: Nvml,
    devices: Vec<GpuInfo>,
    fan_readouts: Vec<FanReadout>,
    proc_cache: ProcInfoCache,
}

#[derive(Debug, Clone, Copy)]
enum FanReadout {
    Unavailable,
    Percent { fan_index: u32 },
}

#[derive(Debug, Clone)]
struct ProcIdentity {
    start_time_ticks: Option<u64>,
    user: Option<String>,
    command: Option<String>,
}

#[derive(Debug, Default)]
struct ProcInfoCache {
    identities: HashMap<u32, ProcIdentity>,
    usernames: HashMap<u32, String>,
}

impl NvmlBackend {
    pub fn new() -> Result<Self> {
        let nvml = Nvml::init().with_context(|| "NVML initialization failed")?;
        let count = nvml
            .device_count()
            .with_context(|| "NVML device_count failed")?;
        let mut devices = Vec::with_capacity(count as usize);
        let mut fan_readouts = Vec::with_capacity(count as usize);

        for index in 0..count {
            let device = nvml
                .device_by_index(index)
                .with_context(|| format!("failed to open NVML device {index}"))?;
            let name = device
                .name()
                .unwrap_or_else(|_| format!("NVIDIA GPU {index}"));
            let uuid = device.uuid().ok();
            devices.push(GpuInfo {
                id: index as usize,
                backend_index: index,
                name,
                uuid,
            });
            fan_readouts.push(detect_fan_readout(&device, &devices[index as usize].name));
        }

        require_devices(&devices, "NVML")?;
        Ok(Self {
            nvml,
            devices,
            fan_readouts,
            proc_cache: ProcInfoCache::default(),
        })
    }
}

impl GpuBackend for NvmlBackend {
    fn label(&self) -> &str {
        "NVML"
    }

    fn devices(&self) -> &[GpuInfo] {
        &self.devices
    }

    fn sample(&mut self) -> Result<Vec<GpuSample>> {
        let at = Instant::now();
        let mut samples = Vec::with_capacity(self.devices.len());

        for info in &self.devices {
            let device = self
                .nvml
                .device_by_index(info.backend_index)
                .with_context(|| format!("failed to open NVML device {}", info.backend_index))?;

            let utilization = device.utilization_rates().ok();
            let memory = device.memory_info().ok();
            let processes = collect_processes(&device, &mut self.proc_cache);

            samples.push(GpuSample {
                gpu_id: info.id,
                at,
                gpu_util_percent: utilization.as_ref().map(|u| u.gpu as f64),
                mem_util_percent: utilization.as_ref().map(|u| u.memory as f64),
                vram_used_bytes: memory.as_ref().map(|m| m.used),
                vram_total_bytes: memory.as_ref().map(|m| m.total),
                power_watts: device.power_usage().ok().map(|mw| mw as f64 / 1000.0),
                power_limit_watts: device
                    .enforced_power_limit()
                    .ok()
                    .map(|mw| mw as f64 / 1000.0),
                temperature_celsius: device
                    .temperature(TemperatureSensor::Gpu)
                    .ok()
                    .map(|c| c as f64),
                fan_percent: self
                    .fan_readouts
                    .get(info.id)
                    .and_then(|readout| sample_fan_percent(&device, *readout)),
                graphics_clock_mhz: device
                    .clock_info(Clock::Graphics)
                    .ok()
                    .map(|mhz| mhz as f64),
                memory_clock_mhz: device.clock_info(Clock::Memory).ok().map(|mhz| mhz as f64),
                compute_processes: Some(processes.len() as u32),
                processes,
            });
        }

        Ok(samples)
    }
}

fn detect_fan_readout(device: &Device<'_>, name: &str) -> FanReadout {
    let Ok(fan_count) = device.num_fans() else {
        return FanReadout::Unavailable;
    };
    if fan_count == 0 {
        return FanReadout::Unavailable;
    }

    let percent = device.fan_speed(0).ok();
    let rpm = device.fan_speed_rpm(0).ok();

    if is_enclosure_cooled_nvidia(name) && percent == Some(0) && rpm == Some(0) {
        return FanReadout::Unavailable;
    }

    FanReadout::Percent { fan_index: 0 }
}

fn sample_fan_percent(device: &Device<'_>, readout: FanReadout) -> Option<f64> {
    match readout {
        FanReadout::Unavailable => None,
        FanReadout::Percent { fan_index } => device.fan_speed(fan_index).ok().map(|v| v as f64),
    }
}

fn is_enclosure_cooled_nvidia(name: &str) -> bool {
    let normalized = name.to_ascii_uppercase();
    normalized == "NVIDIA A40"
}

fn collect_processes(device: &Device<'_>, proc_cache: &mut ProcInfoCache) -> Vec<GpuProcess> {
    let mut by_pid = HashMap::new();

    if let Ok(processes) = device.running_compute_processes() {
        merge_processes(&mut by_pid, processes, GpuProcessKind::Compute, proc_cache);
    }
    if let Ok(processes) = device.running_graphics_processes() {
        merge_processes(&mut by_pid, processes, GpuProcessKind::Graphics, proc_cache);
    }
    if let Ok(processes) = device.mps_running_compute_processes() {
        merge_processes(&mut by_pid, processes, GpuProcessKind::Mps, proc_cache);
    }

    let mut processes = by_pid.into_values().collect::<Vec<_>>();
    processes.sort_by(|a, b| {
        b.used_gpu_memory_bytes
            .unwrap_or(0)
            .cmp(&a.used_gpu_memory_bytes.unwrap_or(0))
            .then_with(|| a.pid.cmp(&b.pid))
    });
    processes
}

fn merge_processes(
    by_pid: &mut HashMap<u32, GpuProcess>,
    processes: Vec<ProcessInfo>,
    kind: GpuProcessKind,
    proc_cache: &mut ProcInfoCache,
) {
    for process in processes {
        let identity = proc_cache.identity(process.pid);
        let memory = used_gpu_memory_bytes(&process.used_gpu_memory);

        by_pid
            .entry(process.pid)
            .and_modify(|existing| {
                if !existing.kinds.contains(&kind) {
                    existing.kinds.push(kind);
                }
                existing.used_gpu_memory_bytes =
                    max_optional(existing.used_gpu_memory_bytes, memory);
                existing.gpu_instance_id = existing.gpu_instance_id.or(process.gpu_instance_id);
                existing.compute_instance_id =
                    existing.compute_instance_id.or(process.compute_instance_id);
            })
            .or_insert_with(|| GpuProcess {
                pid: process.pid,
                user: identity.user.clone(),
                command: identity.command.clone(),
                kinds: vec![kind],
                used_gpu_memory_bytes: memory,
                gpu_instance_id: process.gpu_instance_id,
                compute_instance_id: process.compute_instance_id,
            });
    }
}

fn used_gpu_memory_bytes(memory: &UsedGpuMemory) -> Option<u64> {
    match memory {
        UsedGpuMemory::Unavailable => None,
        UsedGpuMemory::Used(bytes) => Some(*bytes),
    }
}

fn max_optional(left: Option<u64>, right: Option<u64>) -> Option<u64> {
    match (left, right) {
        (Some(left), Some(right)) => Some(left.max(right)),
        (Some(left), None) => Some(left),
        (None, Some(right)) => Some(right),
        (None, None) => None,
    }
}

impl ProcInfoCache {
    fn identity(&mut self, pid: u32) -> ProcIdentity {
        let start_time_ticks = process_start_time_ticks(pid);
        if let Some(identity) = self.identities.get(&pid)
            && identity.start_time_ticks == start_time_ticks
        {
            return identity.clone();
        }

        let user = process_uid(pid).map(|uid| self.username(uid));
        let command = process_cmdline(pid).or_else(|| process_comm(pid));
        let identity = ProcIdentity {
            start_time_ticks,
            user,
            command,
        };
        self.identities.insert(pid, identity.clone());
        identity
    }

    fn username(&mut self, uid: u32) -> String {
        if let Some(name) = self.usernames.get(&uid) {
            return name.clone();
        }

        let name = username_from_system(uid).unwrap_or_else(|| uid.to_string());
        self.usernames.insert(uid, name.clone());
        name
    }
}

fn process_uid(pid: u32) -> Option<u32> {
    fs::read_to_string(format!("/proc/{pid}/status"))
        .ok()?
        .lines()
        .find_map(|line| {
            let mut fields = line.split_whitespace();
            (fields.next()? == "Uid:").then(|| fields.next()?.parse().ok())?
        })
}

fn process_cmdline(pid: u32) -> Option<String> {
    let bytes = fs::read(format!("/proc/{pid}/cmdline")).ok()?;
    let args = bytes
        .split(|byte| *byte == 0)
        .filter(|arg| !arg.is_empty())
        .map(|arg| String::from_utf8_lossy(arg).into_owned())
        .collect::<Vec<_>>();
    (!args.is_empty()).then(|| args.join(" "))
}

fn process_comm(pid: u32) -> Option<String> {
    let command = fs::read_to_string(format!("/proc/{pid}/comm")).ok()?;
    let command = command.trim();
    (!command.is_empty()).then(|| command.to_owned())
}

fn process_start_time_ticks(pid: u32) -> Option<u64> {
    let stat = fs::read_to_string(format!("/proc/{pid}/stat")).ok()?;
    let after_comm = stat.rsplit_once(") ")?.1;
    after_comm.split_whitespace().nth(19)?.parse().ok()
}

fn username_from_system(uid: u32) -> Option<String> {
    username_from_passwd(uid).or_else(|| username_from_getent(uid))
}

fn username_from_passwd(uid: u32) -> Option<String> {
    passwd_name(&fs::read_to_string("/etc/passwd").ok()?, uid)
}

fn username_from_getent(uid: u32) -> Option<String> {
    let output = Command::new("getent")
        .arg("passwd")
        .arg(uid.to_string())
        .output()
        .ok()?;
    if !output.status.success() {
        return None;
    }

    let passwd = String::from_utf8(output.stdout).ok()?;
    passwd_name(&passwd, uid)
}

fn passwd_name(passwd: &str, uid: u32) -> Option<String> {
    passwd.lines().find_map(|line| {
        let mut fields = line.split(':');
        let name = fields.next()?;
        let _password = fields.next()?;
        let candidate_uid = fields.next()?.parse::<u32>().ok()?;
        (candidate_uid == uid).then(|| name.to_owned())
    })
}