#[cfg(feature = "amd-xdna")]
pub(crate) mod amd_xdna;
#[cfg(feature = "apple")]
pub(crate) mod apple;
pub mod bandwidth;
#[cfg(feature = "cerebras")]
pub(crate) mod cerebras;
pub(crate) mod command;
#[cfg(feature = "cuda")]
pub mod cuda;
pub(crate) mod disk;
pub(crate) mod environment;
#[cfg(feature = "gaudi")]
pub mod gaudi;
#[cfg(feature = "graphcore")]
pub(crate) mod graphcore;
#[cfg(feature = "groq")]
pub(crate) mod groq;
#[cfg(feature = "intel-npu")]
pub(crate) mod intel_npu;
#[cfg(feature = "intel-oneapi")]
pub(crate) mod intel_oneapi;
pub mod interconnect;
#[cfg(feature = "mediatek-apu")]
pub(crate) mod mediatek_apu;
#[cfg(feature = "aws-neuron")]
pub(crate) mod neuron;
pub(crate) mod numa;
pub mod pcie;
#[cfg(feature = "qualcomm")]
pub(crate) mod qualcomm;
#[cfg(feature = "rocm")]
pub(crate) mod rocm;
#[cfg(feature = "samsung-npu")]
pub(crate) mod samsung_npu;
#[cfg(feature = "tpu")]
pub(crate) mod tpu;
#[cfg(feature = "vulkan")]
pub mod vulkan;
#[cfg(feature = "windows-wmi")]
pub(crate) mod windows;
pub mod platform;
use std::collections::HashMap;
use std::path::Path;
use std::time::{Duration, Instant};
use tracing::debug;
use crate::error::DetectionError;
use crate::hardware::AcceleratorType;
use crate::profile::AcceleratorProfile;
use crate::registry::{AcceleratorRegistry, Backend, DetectBuilder};
use crate::system_io::SystemIo;
type DetectResult = (Vec<AcceleratorProfile>, Vec<DetectionError>);
type TimedDetectResult = (Vec<AcceleratorProfile>, Vec<DetectionError>, Duration);
macro_rules! backend_table {
($callback:ident) => {
$callback!("cuda", Backend::Cuda, "cuda", cuda::detect_cuda);
$callback!("rocm", Backend::Rocm, "rocm", rocm::detect_rocm);
$callback!(
"apple",
Backend::Apple,
"apple",
apple::detect_metal_and_ane
);
$callback!("vulkan", Backend::Vulkan, "vulkan", vulkan::detect_vulkan);
$callback!(
"intel-npu",
Backend::IntelNpu,
"intel_npu",
intel_npu::detect_intel_npu
);
$callback!(
"amd-xdna",
Backend::AmdXdna,
"amd_xdna",
amd_xdna::detect_amd_xdna
);
$callback!("tpu", Backend::Tpu, "tpu", tpu::detect_tpu);
$callback!("gaudi", Backend::Gaudi, "gaudi", gaudi::detect_gaudi);
$callback!(
"aws-neuron",
Backend::AwsNeuron,
"aws_neuron",
neuron::detect_aws_neuron
);
$callback!(
"intel-oneapi",
Backend::IntelOneApi,
"intel_oneapi",
intel_oneapi::detect_intel_oneapi
);
$callback!(
"qualcomm",
Backend::Qualcomm,
"qualcomm",
qualcomm::detect_qualcomm_ai100
);
$callback!(
"cerebras",
Backend::Cerebras,
"cerebras",
cerebras::detect_cerebras_wse
);
$callback!(
"graphcore",
Backend::Graphcore,
"graphcore",
graphcore::detect_graphcore_ipu
);
$callback!("groq", Backend::Groq, "groq", groq::detect_groq_lpu);
$callback!(
"samsung-npu",
Backend::SamsungNpu,
"samsung_npu",
samsung_npu::detect_samsung_npu
);
$callback!(
"mediatek-apu",
Backend::MediaTekApu,
"mediatek_apu",
mediatek_apu::detect_mediatek_apu
);
$callback!(
"windows-wmi",
Backend::WindowsWmi,
"windows_wmi",
windows::detect_windows_gpu
);
};
}
#[cfg(feature = "async-detect")]
macro_rules! async_cli_backends {
($callback:ident) => {
$callback!("cuda", Backend::Cuda, cuda::detect_cuda_async);
$callback!("vulkan", Backend::Vulkan, vulkan::detect_vulkan_async);
$callback!("gaudi", Backend::Gaudi, gaudi::detect_gaudi_async);
$callback!(
"aws-neuron",
Backend::AwsNeuron,
neuron::detect_aws_neuron_async
);
$callback!("apple", Backend::Apple, apple::detect_metal_and_ane_async);
$callback!(
"intel-oneapi",
Backend::IntelOneApi,
intel_oneapi::detect_intel_oneapi_async
);
};
}
#[cfg(feature = "async-detect")]
macro_rules! sysfs_backends {
($callback:ident) => {
$callback!("rocm", Backend::Rocm, rocm::detect_rocm);
$callback!("intel-npu", Backend::IntelNpu, intel_npu::detect_intel_npu);
$callback!("amd-xdna", Backend::AmdXdna, amd_xdna::detect_amd_xdna);
$callback!("tpu", Backend::Tpu, tpu::detect_tpu);
$callback!(
"qualcomm",
Backend::Qualcomm,
qualcomm::detect_qualcomm_ai100
);
$callback!("cerebras", Backend::Cerebras, cerebras::detect_cerebras_wse);
$callback!(
"graphcore",
Backend::Graphcore,
graphcore::detect_graphcore_ipu
);
$callback!("groq", Backend::Groq, groq::detect_groq_lpu);
$callback!(
"samsung-npu",
Backend::SamsungNpu,
samsung_npu::detect_samsung_npu
);
$callback!(
"mediatek-apu",
Backend::MediaTekApu,
mediatek_apu::detect_mediatek_apu
);
$callback!(
"windows-wmi",
Backend::WindowsWmi,
windows::detect_windows_gpu
);
};
}
#[derive(Debug, Clone)]
pub struct TimedDetection {
pub registry: AcceleratorRegistry,
pub timings: HashMap<String, Duration>,
pub total: Duration,
}
impl AcceleratorRegistry {
pub fn detect() -> Self {
detect_with_builder(DetectBuilder::new())
}
pub fn detect_with_timing() -> TimedDetection {
detect_with_builder_timed(DetectBuilder::new())
}
}
pub(crate) fn detect_with_builder(builder: DetectBuilder) -> AcceleratorRegistry {
let mut all_profiles = Vec::with_capacity(8);
all_profiles.push(cpu_profile());
let mut all_warnings: Vec<DetectionError> = Vec::new();
let use_threads = builder.enabled_count() >= 2;
if use_threads {
std::thread::scope(|s| {
let mut handles: Vec<std::thread::ScopedJoinHandle<'_, DetectResult>> = Vec::new();
macro_rules! do_spawn {
($feature:literal, $backend:expr, $name:literal, $detect_fn:expr) => {
#[cfg(feature = $feature)]
if builder.backend_enabled($backend) {
handles.push(s.spawn(|| {
let mut p = Vec::new();
let mut w = Vec::new();
$detect_fn(&mut p, &mut w);
(p, w)
}));
}
};
}
backend_table!(do_spawn);
for handle in handles {
if let Ok((profiles, warnings)) = handle.join() {
all_profiles.extend(profiles);
all_warnings.extend(warnings);
}
}
});
} else {
macro_rules! do_run {
($feature:literal, $backend:expr, $name:literal, $detect_fn:expr) => {
#[cfg(feature = $feature)]
if builder.backend_enabled($backend) {
$detect_fn(&mut all_profiles, &mut all_warnings);
}
};
}
backend_table!(do_run);
}
#[cfg(feature = "vulkan")]
{
let has_vulkan = all_profiles
.iter()
.any(|p| matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
let has_dedicated = all_profiles.iter().any(|p| {
matches!(
p.accelerator,
AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
)
});
if !has_vulkan && !has_dedicated && builder.backend_enabled(Backend::Vulkan) {
vulkan::detect_vulkan_sysfs(&mut all_profiles, &mut all_warnings);
}
}
let has_dedicated = all_profiles.iter().any(|p| {
matches!(
p.accelerator,
AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
)
});
if has_dedicated {
all_profiles.retain(|p| !matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
}
bandwidth::enrich_bandwidth(&mut all_profiles, &mut all_warnings);
let nvidia_pci = list_driver_pci_addrs("nvidia");
let amdgpu_pci = list_driver_pci_addrs("amdgpu");
pcie::enrich_pcie(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
numa::enrich_numa(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
let system_interconnects = interconnect::detect_interconnects(&mut all_warnings);
let system_storage = disk::detect_storage();
let system_environment = environment::detect_environment();
let system_io = SystemIo {
interconnects: system_interconnects,
storage: system_storage,
environment: Some(system_environment),
};
debug!(
count = all_profiles.len(),
warnings = all_warnings.len(),
interconnects = system_io.interconnects.len(),
storage_devices = system_io.storage.len(),
"accelerator detection complete"
);
AcceleratorRegistry {
schema_version: crate::registry::SCHEMA_VERSION,
profiles: all_profiles,
warnings: all_warnings,
system_io,
}
}
pub(crate) fn detect_with_builder_timed(builder: DetectBuilder) -> TimedDetection {
let wall_start = Instant::now();
let mut all_profiles = Vec::with_capacity(8);
all_profiles.push(cpu_profile());
let mut all_warnings: Vec<DetectionError> = Vec::new();
let mut timings: HashMap<String, Duration> = HashMap::new();
let use_threads = builder.enabled_count() >= 2;
if use_threads {
std::thread::scope(|s| {
let mut handles: Vec<(&str, std::thread::ScopedJoinHandle<'_, TimedDetectResult>)> =
Vec::new();
macro_rules! do_spawn_timed {
($feature:literal, $backend:expr, $name:literal, $detect_fn:expr) => {
#[cfg(feature = $feature)]
if builder.backend_enabled($backend) {
handles.push((
$name,
s.spawn(|| {
let start = Instant::now();
let mut p = Vec::new();
let mut w = Vec::new();
$detect_fn(&mut p, &mut w);
(p, w, start.elapsed())
}),
));
}
};
}
backend_table!(do_spawn_timed);
for (name, handle) in handles {
if let Ok((profiles, warnings, duration)) = handle.join() {
all_profiles.extend(profiles);
all_warnings.extend(warnings);
timings.insert(name.into(), duration);
}
}
});
} else {
macro_rules! do_run_timed {
($feature:literal, $backend:expr, $name:literal, $detect_fn:expr) => {
#[cfg(feature = $feature)]
if builder.backend_enabled($backend) {
let start = Instant::now();
$detect_fn(&mut all_profiles, &mut all_warnings);
timings.insert($name.into(), start.elapsed());
}
};
}
backend_table!(do_run_timed);
}
#[cfg(feature = "vulkan")]
{
let has_vulkan = all_profiles
.iter()
.any(|p| matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
let has_dedicated = all_profiles.iter().any(|p| {
matches!(
p.accelerator,
AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
)
});
if !has_vulkan && !has_dedicated && builder.backend_enabled(Backend::Vulkan) {
let start = Instant::now();
vulkan::detect_vulkan_sysfs(&mut all_profiles, &mut all_warnings);
timings.insert("vulkan_sysfs".into(), start.elapsed());
}
}
let has_dedicated = all_profiles.iter().any(|p| {
matches!(
p.accelerator,
AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
)
});
if has_dedicated {
all_profiles.retain(|p| !matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
}
let enrich_start = Instant::now();
bandwidth::enrich_bandwidth(&mut all_profiles, &mut all_warnings);
let nvidia_pci = list_driver_pci_addrs("nvidia");
let amdgpu_pci = list_driver_pci_addrs("amdgpu");
pcie::enrich_pcie(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
numa::enrich_numa(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
timings.insert("_enrich".into(), enrich_start.elapsed());
let sysio_start = Instant::now();
let system_interconnects = interconnect::detect_interconnects(&mut all_warnings);
let system_storage = disk::detect_storage();
let system_environment = environment::detect_environment();
let system_io = SystemIo {
interconnects: system_interconnects,
storage: system_storage,
environment: Some(system_environment),
};
timings.insert("_system_io".into(), sysio_start.elapsed());
let registry = AcceleratorRegistry {
schema_version: crate::registry::SCHEMA_VERSION,
profiles: all_profiles,
warnings: all_warnings,
system_io,
};
TimedDetection {
registry,
timings,
total: wall_start.elapsed(),
}
}
pub(super) fn list_driver_pci_addrs(driver: &str) -> Vec<String> {
let dir = Path::new("/sys/bus/pci/drivers").join(driver);
if !dir.exists() {
return Vec::new();
}
let mut addrs: Vec<String> = std::fs::read_dir(&dir)
.into_iter()
.flatten()
.flatten()
.filter_map(|e| {
let name = e.file_name();
let name_bytes = name.as_encoded_bytes();
if name_bytes.contains(&b':')
&& name_bytes.contains(&b'.')
&& name_bytes
.iter()
.all(|&b| b.is_ascii_hexdigit() || b == b':' || b == b'.')
{
Some(name.to_string_lossy().into_owned())
} else {
None
}
})
.collect();
addrs.sort();
addrs
}
pub(super) fn iter_dev_devices(prefix: &str) -> impl Iterator<Item = u32> + '_ {
std::fs::read_dir("/dev")
.into_iter()
.flatten()
.flatten()
.filter_map(move |entry| {
let name = entry.file_name();
let name_str = name.to_string_lossy();
let suffix = name_str.strip_prefix(prefix)?;
if suffix.is_empty() || !suffix.chars().all(|c| c.is_ascii_digit()) {
return None;
}
suffix.parse::<u32>().ok()
})
}
pub(super) fn has_dev_device(prefix: &str) -> bool {
std::fs::read_dir("/dev")
.into_iter()
.flatten()
.flatten()
.any(|entry| entry.file_name().to_string_lossy().starts_with(prefix))
}
pub(crate) fn cpu_profile() -> AcceleratorProfile {
AcceleratorProfile {
accelerator: AcceleratorType::Cpu,
available: true,
memory_bytes: detect_cpu_memory(),
..Default::default()
}
}
pub(crate) fn detect_cpu_memory() -> u64 {
if let Some(info) = read_sysfs_string(std::path::Path::new("/proc/meminfo"), 64 * 1024) {
for line in info.lines() {
if line.starts_with("MemTotal:")
&& let Some(kb_str) = line.split_whitespace().nth(1)
&& let Ok(kb) = kb_str.parse::<u64>()
{
return kb.saturating_mul(1024);
}
}
}
if let Ok(output) = command::run_tool("sysctl", &["-n", "hw.memsize"], command::DEFAULT_TIMEOUT)
&& let Ok(bytes) = output.stdout.trim().parse::<u64>()
{
return bytes;
}
debug!("could not read system memory, defaulting to 16 GiB");
16 * 1024 * 1024 * 1024
}
pub(super) fn read_sysfs_u64(path: &Path) -> Option<u64> {
read_sysfs_string(path, 64).and_then(|s| s.trim().parse().ok())
}
pub(super) fn read_sysfs_string(path: &Path, max_bytes: usize) -> Option<String> {
use std::io::Read;
let mut file = std::fs::File::open(path).ok()?;
const STACK_SIZE: usize = 512;
if max_bytes < STACK_SIZE {
let mut buf = [0u8; STACK_SIZE];
let n = file.read(&mut buf[..max_bytes + 1]).ok()?;
if n > max_bytes {
return None;
}
return String::from_utf8(buf[..n].to_vec()).ok();
}
let mut buf = vec![0u8; max_bytes + 1];
let n = file.read(&mut buf).ok()?;
if n > max_bytes {
return None;
}
buf.truncate(n);
String::from_utf8(buf).ok()
}
#[cfg(feature = "async-detect")]
pub(crate) async fn detect_with_builder_async(builder: DetectBuilder) -> AcceleratorRegistry {
let mut all_profiles = vec![cpu_profile()];
let mut all_warnings: Vec<DetectionError> = Vec::new();
debug!(
backends = builder.enabled_count(),
"starting async detection"
);
let mut handles: Vec<tokio::task::JoinHandle<DetectResult>> = Vec::new();
macro_rules! do_spawn_async {
($feature:literal, $backend:expr, $detect_fn:path) => {
#[cfg(feature = $feature)]
if builder.backend_enabled($backend) {
handles.push(tokio::spawn($detect_fn()));
}
};
}
async_cli_backends!(do_spawn_async);
let sysfs_builder = builder.clone();
let sysfs_handle = tokio::task::spawn_blocking(move || {
let mut profiles = Vec::new();
let mut warnings: Vec<DetectionError> = Vec::new();
macro_rules! do_run_sysfs {
($feature:literal, $backend:expr, $detect_fn:expr) => {
#[cfg(feature = $feature)]
if sysfs_builder.backend_enabled($backend) {
$detect_fn(&mut profiles, &mut warnings);
}
};
}
sysfs_backends!(do_run_sysfs);
(profiles, warnings)
});
for handle in handles {
if let Ok((profiles, warnings)) = handle.await {
all_profiles.extend(profiles);
all_warnings.extend(warnings);
}
}
if let Ok((profiles, warnings)) = sysfs_handle.await {
all_profiles.extend(profiles);
all_warnings.extend(warnings);
}
#[cfg(feature = "vulkan")]
{
let has_vulkan = all_profiles
.iter()
.any(|p| matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
let has_dedicated = all_profiles.iter().any(|p| {
matches!(
p.accelerator,
AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
)
});
if !has_vulkan && !has_dedicated && builder.backend_enabled(Backend::Vulkan) {
vulkan::detect_vulkan_sysfs(&mut all_profiles, &mut all_warnings);
}
}
let has_dedicated = all_profiles.iter().any(|p| {
matches!(
p.accelerator,
AcceleratorType::CudaGpu { .. } | AcceleratorType::RocmGpu { .. }
)
});
if has_dedicated {
all_profiles.retain(|p| !matches!(p.accelerator, AcceleratorType::VulkanGpu { .. }));
}
bandwidth::enrich_bandwidth_async(&mut all_profiles, &mut all_warnings).await;
let nvidia_pci = list_driver_pci_addrs("nvidia");
let amdgpu_pci = list_driver_pci_addrs("amdgpu");
pcie::enrich_pcie(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
numa::enrich_numa(&mut all_profiles, &nvidia_pci, &amdgpu_pci);
let (system_interconnects, ic_warnings) = interconnect::detect_interconnects_async().await;
all_warnings.extend(ic_warnings);
let system_storage = tokio::task::spawn_blocking(disk::detect_storage)
.await
.unwrap_or_default();
let system_environment = environment::detect_environment();
let system_io = SystemIo {
interconnects: system_interconnects,
storage: system_storage,
environment: Some(system_environment),
};
debug!(
count = all_profiles.len(),
warnings = all_warnings.len(),
interconnects = system_io.interconnects.len(),
storage_devices = system_io.storage.len(),
"async accelerator detection complete"
);
AcceleratorRegistry {
schema_version: crate::registry::SCHEMA_VERSION,
profiles: all_profiles,
warnings: all_warnings,
system_io,
}
}