#[cfg(target_os = "linux")]
const NVML_LIB_PATH: &str = "libnvidia-ml.so.1";
#[cfg(target_os = "windows")]
const NVML_LIB_PATH: &str = "nvml.dll";
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
const NVML_LIB_PATH: &str = "libnvidia-ml.so.1";
const NVML_SUCCESS: u32 = 0;
const NVML_ERROR_INSUFFICIENT_SIZE: u32 = 7;
const NVML_MAX_PROCESSES: usize = 64;
const NVML_DEVICE_NAME_BUFFER_SIZE: usize = 96;
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct NvmlProcessInfo {
pid: u32,
used_gpu_memory: u64,
gpu_instance_id: u32,
compute_instance_id: u32,
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct NvmlMemoryInfo {
total: u64,
free: u64,
used: u64,
}
type NvmlDevice = *mut std::ffi::c_void;
type NvmlInitFn = unsafe extern "C" fn() -> u32;
type NvmlShutdownFn = unsafe extern "C" fn() -> u32;
type NvmlDeviceGetHandleByIndexFn = unsafe extern "C" fn(u32, *mut NvmlDevice) -> u32;
type NvmlDeviceGetMemoryInfoFn = unsafe extern "C" fn(NvmlDevice, *mut NvmlMemoryInfo) -> u32;
type NvmlDeviceGetComputeRunningProcessesFn =
unsafe extern "C" fn(NvmlDevice, *mut u32, *mut NvmlProcessInfo) -> u32;
type NvmlDeviceGetCountFn = unsafe extern "C" fn(*mut u32) -> u32;
type NvmlDeviceGetNameFn = unsafe extern "C" fn(NvmlDevice, *mut std::ffi::c_char, u32) -> u32;
pub(super) struct NvmlQueryResult {
pub process_used_bytes: Option<u64>,
pub device_total: u64,
pub device_free: u64,
pub device_used: u64,
pub device_name: Option<String>,
}
#[allow(unsafe_code)]
pub(super) fn query(idx: u32) -> Option<NvmlQueryResult> {
let lib = unsafe { libloading::Library::new(NVML_LIB_PATH) }.ok()?;
let init: libloading::Symbol<'_, NvmlInitFn> = unsafe { lib.get(b"nvmlInit_v2\0") }.ok()?;
let shutdown: libloading::Symbol<'_, NvmlShutdownFn> =
unsafe { lib.get(b"nvmlShutdown\0") }.ok()?;
let get_handle: libloading::Symbol<'_, NvmlDeviceGetHandleByIndexFn> =
unsafe { lib.get(b"nvmlDeviceGetHandleByIndex_v2\0") }.ok()?;
let get_memory: libloading::Symbol<'_, NvmlDeviceGetMemoryInfoFn> =
unsafe { lib.get(b"nvmlDeviceGetMemoryInfo\0") }.ok()?;
let get_processes: libloading::Symbol<'_, NvmlDeviceGetComputeRunningProcessesFn> =
unsafe { lib.get(b"nvmlDeviceGetComputeRunningProcesses_v3\0") }.ok()?;
let get_name: libloading::Symbol<'_, NvmlDeviceGetNameFn> =
unsafe { lib.get(b"nvmlDeviceGetName\0") }.ok()?;
let ret = unsafe { init() };
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlInit_v2 returned {ret}");
return None;
}
let mut device: NvmlDevice = std::ptr::null_mut();
let ret = unsafe { get_handle(idx, &raw mut device) };
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlDeviceGetHandleByIndex_v2(idx={idx}) returned {ret}");
unsafe { shutdown() };
return None;
}
let mut mem_info = NvmlMemoryInfo {
total: 0,
free: 0,
used: 0,
};
let ret = unsafe { get_memory(device, &raw mut mem_info) };
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlDeviceGetMemoryInfo returned {ret}");
unsafe { shutdown() };
return None;
}
let device_name = read_device_name(&get_name, device);
let process_used_bytes = read_process_used(&get_processes, device, mem_info.total);
unsafe { shutdown() };
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] device {idx}: total={} free={} used={} per_process={:?} name={:?}",
mem_info.total, mem_info.free, mem_info.used, process_used_bytes, device_name
);
Some(NvmlQueryResult {
process_used_bytes,
device_total: mem_info.total,
device_free: mem_info.free,
device_used: mem_info.used,
device_name,
})
}
#[allow(unsafe_code)]
fn read_device_name(
get_name: &libloading::Symbol<'_, NvmlDeviceGetNameFn>,
device: NvmlDevice,
) -> Option<String> {
let mut name_buf = [0_u8; NVML_DEVICE_NAME_BUFFER_SIZE];
#[allow(clippy::as_conversions, clippy::cast_possible_truncation)]
let len = NVML_DEVICE_NAME_BUFFER_SIZE as u32;
let ret = unsafe {
get_name(
device,
name_buf.as_mut_ptr().cast::<std::ffi::c_char>(),
len,
)
};
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlDeviceGetName returned {ret}");
return None;
}
let nul_pos = name_buf
.iter()
.position(|&b| b == 0)
.unwrap_or(name_buf.len());
name_buf
.get(..nul_pos)
.map(|slice| String::from_utf8_lossy(slice).into_owned())
.filter(|s| !s.is_empty())
}
#[allow(unsafe_code)]
fn read_process_used(
get_processes: &libloading::Symbol<'_, NvmlDeviceGetComputeRunningProcessesFn>,
device: NvmlDevice,
device_total: u64,
) -> Option<u64> {
#[allow(clippy::as_conversions, clippy::cast_possible_truncation)]
let mut count = NVML_MAX_PROCESSES as u32;
let mut infos = [NvmlProcessInfo {
pid: 0,
used_gpu_memory: 0,
gpu_instance_id: 0,
compute_instance_id: 0,
}; NVML_MAX_PROCESSES];
let ret = unsafe { get_processes(device, &raw mut count, infos.as_mut_ptr()) };
if ret != NVML_SUCCESS && ret != NVML_ERROR_INSUFFICIENT_SIZE {
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] nvmlDeviceGetComputeRunningProcesses_v3 returned {ret} \
(likely WDDM NVML_VALUE_NOT_AVAILABLE)"
);
return None;
}
let my_pid = std::process::id();
#[allow(clippy::as_conversions)]
let actual_count = (count as usize).min(NVML_MAX_PROCESSES);
let my_vram = infos
.get(..actual_count)
.and_then(|s| s.iter().find(|info| info.pid == my_pid))
.map(|info| info.used_gpu_memory);
match my_vram {
Some(u64::MAX) => {
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] process used_gpu_memory == u64::MAX (R570 sentinel); falling back"
);
None
}
Some(used) if used > device_total => {
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] process used_gpu_memory ({used}) > device total ({device_total}); \
falling back"
);
None
}
other => other,
}
}
#[allow(unsafe_code)]
#[must_use]
pub(super) fn list_compute_processes(idx: u32) -> Option<Vec<(u32, u64)>> {
let lib = unsafe { libloading::Library::new(NVML_LIB_PATH) }.ok()?;
let init: libloading::Symbol<'_, NvmlInitFn> = unsafe { lib.get(b"nvmlInit_v2\0") }.ok()?;
let shutdown: libloading::Symbol<'_, NvmlShutdownFn> =
unsafe { lib.get(b"nvmlShutdown\0") }.ok()?;
let get_handle: libloading::Symbol<'_, NvmlDeviceGetHandleByIndexFn> =
unsafe { lib.get(b"nvmlDeviceGetHandleByIndex_v2\0") }.ok()?;
let get_memory: libloading::Symbol<'_, NvmlDeviceGetMemoryInfoFn> =
unsafe { lib.get(b"nvmlDeviceGetMemoryInfo\0") }.ok()?;
let get_processes: libloading::Symbol<'_, NvmlDeviceGetComputeRunningProcessesFn> =
unsafe { lib.get(b"nvmlDeviceGetComputeRunningProcesses_v3\0") }.ok()?;
let ret = unsafe { init() };
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlInit_v2 returned {ret} in list_compute_processes");
return None;
}
let mut device: NvmlDevice = std::ptr::null_mut();
let ret = unsafe { get_handle(idx, &raw mut device) };
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] nvmlDeviceGetHandleByIndex_v2(idx={idx}) returned {ret} \
in list_compute_processes"
);
unsafe { shutdown() };
return None;
}
let mut mem_info = NvmlMemoryInfo {
total: 0,
free: 0,
used: 0,
};
let ret = unsafe { get_memory(device, &raw mut mem_info) };
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlDeviceGetMemoryInfo returned {ret} in list_compute_processes");
unsafe { shutdown() };
return None;
}
let device_total = mem_info.total;
#[allow(clippy::as_conversions, clippy::cast_possible_truncation)]
let mut count = NVML_MAX_PROCESSES as u32;
let mut infos = [NvmlProcessInfo {
pid: 0,
used_gpu_memory: 0,
gpu_instance_id: 0,
compute_instance_id: 0,
}; NVML_MAX_PROCESSES];
let ret = unsafe { get_processes(device, &raw mut count, infos.as_mut_ptr()) };
unsafe { shutdown() };
if ret != NVML_SUCCESS && ret != NVML_ERROR_INSUFFICIENT_SIZE {
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] nvmlDeviceGetComputeRunningProcesses_v3 returned {ret} \
in list_compute_processes (likely WDDM NVML_VALUE_NOT_AVAILABLE)"
);
return None;
}
#[allow(clippy::as_conversions)]
let actual_count = (count as usize).min(NVML_MAX_PROCESSES);
let rows: Vec<(u32, u64)> = infos
.get(..actual_count)
.map(|s| {
s.iter()
.filter_map(|info| {
if info.used_gpu_memory == u64::MAX {
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] list_compute_processes: pid {} used_gpu_memory == u64::MAX \
(R570 sentinel); dropping row",
info.pid
);
None
} else if info.used_gpu_memory > device_total {
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] list_compute_processes: pid {} used_gpu_memory ({}) > \
device total ({device_total}); dropping row",
info.pid, info.used_gpu_memory
);
None
} else {
Some((info.pid, info.used_gpu_memory))
}
})
.collect()
})
.unwrap_or_default();
#[cfg(feature = "debug-output")]
eprintln!(
"[NVML debug] list_compute_processes(idx={idx}): {} row(s) after filtering \
({} reported by NVML, {} buffer cap)",
rows.len(),
count,
NVML_MAX_PROCESSES
);
Some(rows)
}
#[allow(unsafe_code)]
pub(super) fn device_count() -> Option<u32> {
let lib = unsafe { libloading::Library::new(NVML_LIB_PATH) }.ok()?;
let init: libloading::Symbol<'_, NvmlInitFn> = unsafe { lib.get(b"nvmlInit_v2\0") }.ok()?;
let shutdown: libloading::Symbol<'_, NvmlShutdownFn> =
unsafe { lib.get(b"nvmlShutdown\0") }.ok()?;
let get_count: libloading::Symbol<'_, NvmlDeviceGetCountFn> =
unsafe { lib.get(b"nvmlDeviceGetCount_v2\0") }.ok()?;
let ret = unsafe { init() };
if ret != NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlInit_v2 returned {ret} in device_count");
return None;
}
let mut count: u32 = 0;
let ret = unsafe { get_count(&raw mut count) };
unsafe { shutdown() };
if ret == NVML_SUCCESS {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] device_count = {count}");
Some(count)
} else {
#[cfg(feature = "debug-output")]
eprintln!("[NVML debug] nvmlDeviceGetCount_v2 returned {ret}");
None
}
}