#![cfg(feature = "gpu-topology")]
use crate::misc::read_from_file;
use crate::{Cpumask, NR_CPU_IDS};
use nvml_wrapper::bitmasks::InitFlags;
use nvml_wrapper::enum_wrappers::device::{Clock, PerformanceState, TopologyLevel};
use nvml_wrapper::Nvml;
use nvml_wrapper_sys::bindings::NVML_AFFINITY_SCOPE_NODE;
use std::collections::BTreeMap;
use std::path::Path;
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)]
pub enum GpuIndex {
Nvidia { nvml_id: u32 },
}
#[derive(Debug, Clone)]
pub struct Gpu {
pub index: GpuIndex,
pub node_id: usize,
pub max_graphics_clock: usize,
pub max_sm_clock: usize,
pub max_mem_clock: usize,
pub multiproc_count: usize,
pub memory: u64,
pub cpu_mask: Cpumask,
pub nearest: Vec<GpuIndex>,
pub perf_state: PerformanceState,
}
pub fn create_gpus() -> BTreeMap<usize, Vec<Gpu>> {
let mut gpus: BTreeMap<usize, Vec<Gpu>> = BTreeMap::new();
let Ok(nvml) = Nvml::init_with_flags(InitFlags::NO_GPUS) else {
return BTreeMap::new();
};
if let Ok(nvidia_gpu_count) = nvml.device_count() {
for i in 0..nvidia_gpu_count {
let Ok(nvidia_gpu) = nvml.device_by_index(i) else {
continue;
};
let graphics_boost_clock = nvidia_gpu
.max_customer_boost_clock(Clock::Graphics)
.unwrap_or(0);
let sm_boost_clock = nvidia_gpu.max_customer_boost_clock(Clock::SM).unwrap_or(0);
let mem_boost_clock = nvidia_gpu
.max_customer_boost_clock(Clock::Memory)
.unwrap_or(0);
let Ok(memory_info) = nvidia_gpu.memory_info() else {
continue;
};
let Ok(pci_info) = nvidia_gpu.pci_info() else {
continue;
};
let Ok(index) = nvidia_gpu.index() else {
continue;
};
let cpu_mask = if let Ok(cpu_affinity) =
nvidia_gpu.cpu_affinity_within_scope(*NR_CPU_IDS, NVML_AFFINITY_SCOPE_NODE)
{
#[cfg(target_pointer_width = "32")]
let cpu_affinity: Vec<u64> = cpu_affinity
.chunks_exact(2)
.map(|pair| (pair[1] as u64) << 32 | pair[0] as u64)
.collect();
Cpumask::from_vec(cpu_affinity)
} else {
Cpumask::new()
};
let multiproc_count = if let Ok(attributes) = nvidia_gpu.attributes() {
attributes.multiprocessor_count
} else {
0
};
let nearest_gpu_topology_level = if nvidia_gpu.is_multi_gpu_board().unwrap_or(false) {
TopologyLevel::Internal
} else {
TopologyLevel::HostBridge
};
let nearest = if let Ok(nearest_gpus) =
nvidia_gpu.topology_nearest_gpus(nearest_gpu_topology_level)
{
nearest_gpus
.iter()
.filter_map(|d| {
if let Ok(idx) = d.index() {
Some(GpuIndex::Nvidia { nvml_id: idx })
} else {
None
}
})
.collect()
} else {
Vec::new()
};
let perf_state = nvidia_gpu
.performance_state()
.unwrap_or(PerformanceState::Unknown);
let bus_id = pci_info.bus_id.to_lowercase();
let fixed_bus_id = bus_id.strip_prefix("0000").unwrap_or("");
let numa_path = format!("/sys/bus/pci/devices/{}/numa_node", fixed_bus_id);
let numa_node = read_from_file(Path::new(&numa_path)).unwrap_or(0_usize);
let gpu = Gpu {
index: GpuIndex::Nvidia { nvml_id: index },
node_id: numa_node as usize,
max_graphics_clock: graphics_boost_clock as usize,
max_sm_clock: sm_boost_clock as usize,
max_mem_clock: mem_boost_clock as usize,
multiproc_count: multiproc_count as usize,
memory: memory_info.total,
cpu_mask,
nearest,
perf_state,
};
gpus.entry(gpu.node_id).or_default().push(gpu);
}
}
gpus
}