use crate::error::{CoreError, CoreResult};
#[derive(Debug, Clone)]
pub struct GpuInfo {
pub name: String,
pub vendor: GpuVendor,
pub memory_total: usize,
pub memory_available: usize,
pub memorybandwidth_gbps: f64,
pub compute_units: usize,
pub base_clock_mhz: usize,
pub memory_clock_mhz: usize,
pub compute_capability: ComputeCapability,
pub features: GpuFeatures,
pub performance: GpuPerformance,
}
impl GpuInfo {
pub fn detect() -> CoreResult<Self> {
#[cfg(feature = "gpu")]
{
if let Ok(gpu) = Self::detect_cuda() {
return Ok(gpu);
}
if let Ok(gpu) = Self::detect_opencl() {
return Ok(gpu);
}
if let Ok(gpu) = Self::detect_vulkan() {
return Ok(gpu);
}
}
#[cfg(target_os = "linux")]
if let Ok(gpu) = Self::detect_linux() {
return Ok(gpu);
}
#[cfg(target_os = "windows")]
if let Ok(gpu) = Self::detect_windows() {
return Ok(gpu);
}
#[cfg(target_os = "macos")]
if let Ok(gpu) = Self::detect_macos() {
return Ok(gpu);
}
Err(CoreError::ComputationError(
crate::error::ErrorContext::new("No GPU detected"),
))
}
#[cfg(feature = "gpu")]
fn detect_cuda() -> CoreResult<Self> {
Err(CoreError::ComputationError(
crate::error::ErrorContext::new("CUDA detection not implemented"),
))
}
#[cfg(feature = "gpu")]
fn detect_opencl() -> CoreResult<Self> {
Err(CoreError::ComputationError(
crate::error::ErrorContext::new("OpenCL detection not implemented"),
))
}
#[cfg(feature = "gpu")]
fn detect_vulkan() -> CoreResult<Self> {
Err(CoreError::ComputationError(
crate::error::ErrorContext::new("Vulkan detection not implemented"),
))
}
#[cfg(target_os = "linux")]
fn detect_linux() -> CoreResult<Self> {
use std::fs;
if let Ok(entries) = fs::read_dir("/sys/class/drm") {
for entry in entries.flatten() {
let path = entry.path();
if let Some(name) = path.file_name() {
if name.to_string_lossy().starts_with("card") {
let device_path = path.join("device");
if let Ok(vendor) = fs::read_to_string(device_path.join("vendor")) {
if let Ok(device) = fs::read_to_string(device_path.join("device")) {
let vendor_id = vendor.trim();
let device_id = device.trim();
return Ok(Self::create_from_pci_ids(vendor_id, device_id));
}
}
}
}
}
}
Err(CoreError::ComputationError(
crate::error::ErrorContext::new("No GPU detected on Linux"),
))
}
#[cfg(target_os = "windows")]
fn detect_windows() -> CoreResult<Self> {
Err(CoreError::ComputationError(
crate::error::ErrorContext::new("Windows GPU detection not implemented"),
))
}
#[cfg(target_os = "macos")]
fn detect_macos() -> CoreResult<Self> {
#[cfg(target_arch = "aarch64")]
{
Ok(Self {
name: "Apple GPU".to_string(),
vendor: GpuVendor::Apple,
memory_total: 8 * 1024 * 1024 * 1024, memory_available: 6 * 1024 * 1024 * 1024,
memorybandwidth_gbps: 200.0,
compute_units: 8,
base_clock_mhz: 1000,
memory_clock_mhz: 2000,
compute_capability: ComputeCapability::Metal,
features: GpuFeatures {
unified_memory: true,
double_precision: true,
half_precision: true,
tensor_cores: false,
ray_tracing: false,
},
performance: GpuPerformance {
fp32_gflops: 2600.0,
fp16_gflops: 5200.0,
memorybandwidth_gbps: 200.0,
efficiency_score: 0.9,
},
})
}
#[cfg(not(target_arch = "aarch64"))]
{
Err(CoreError::ComputationError(
crate::error::ErrorContext::new("macOS GPU detection not implemented"),
))
}
}
#[allow(dead_code)]
fn from_pci_ids(vendor_id: u16, _device_id: &str) -> Self {
let vendor = match vendor_id {
0x10de => GpuVendor::Nvidia,
0x1002 => GpuVendor::Amd,
0x8086 => GpuVendor::Intel,
_ => GpuVendor::Unknown,
};
let (name, memory_gb, compute_units) = match vendor_id {
0x10de => ("NVIDIA GPU".to_string(), 8, 2048),
0x1002 => ("AMD GPU".to_string(), 8, 64),
0x8086 => ("Intel GPU".to_string(), 4, 96),
_ => ("Unknown GPU".to_string(), 4, 32),
};
Self {
name,
vendor,
memory_total: memory_gb * 1024 * 1024 * 1024,
memory_available: (memory_gb * 1024 * 1024 * 1024 * 3) / 4, memorybandwidth_gbps: 500.0,
compute_units,
base_clock_mhz: 1500,
memory_clock_mhz: 7000,
compute_capability: ComputeCapability::Unknown,
features: GpuFeatures::default(),
performance: GpuPerformance::default(),
}
}
pub fn performance_score(&self) -> f64 {
let memory_score = (self.memory_total as f64 / (24.0 * 1024.0 * 1024.0 * 1024.0)).min(1.0); let compute_score = (self.compute_units as f64 / 4096.0).min(1.0); let bandwidth_score = (self.memorybandwidth_gbps / 1000.0).min(1.0); let efficiency_score = self.performance.efficiency_score;
(memory_score + compute_score + bandwidth_score + efficiency_score) / 4.0
}
pub fn optimal_workgroup_size(&self) -> usize {
match self.vendor {
GpuVendor::Nvidia => 256, GpuVendor::Amd => 64, GpuVendor::Intel => 128, GpuVendor::Apple => 32, GpuVendor::Unknown => 64,
}
}
pub fn is_compute_capable(&self) -> bool {
self.memory_total >= 2 * 1024 * 1024 * 1024 && self.compute_units >= 32 }
pub fn is_ml_capable(&self) -> bool {
self.is_compute_capable() && (self.features.tensor_cores || self.features.half_precision)
}
pub fn create_from_pci_ids(vendor_id: &str, device_id: &str) -> Self {
let vendor_id = vendor_id.strip_prefix("0x").unwrap_or(vendor_id);
let vendor = match vendor_id {
"10de" => GpuVendor::Nvidia,
"1002" => GpuVendor::Amd,
"8086" => GpuVendor::Intel,
_ => GpuVendor::Unknown,
};
let name = match vendor {
GpuVendor::Nvidia => format!("NVIDIA GPU {}", device_id),
GpuVendor::Amd => format!("AMD GPU {}", device_id),
GpuVendor::Intel => format!("Intel GPU {}", device_id),
GpuVendor::Apple => format!("Apple GPU {}", device_id),
GpuVendor::Unknown => format!("Unknown GPU {}", device_id),
};
Self {
name,
vendor,
memory_total: (8u64 * 1024 * 1024 * 1024) as usize, memory_available: (8u64 * 1024 * 1024 * 1024) as usize,
memorybandwidth_gbps: 400.0,
compute_capability: ComputeCapability::Cuda(7, 0), compute_units: 128,
base_clock_mhz: 1500,
memory_clock_mhz: 1750, features: GpuFeatures::default(),
performance: GpuPerformance::default(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuVendor {
Nvidia,
Amd,
Intel,
Apple,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ComputeCapability {
Cuda(u32, u32), OpenCL(u32, u32), Vulkan(u32, u32), Metal,
DirectCompute,
Unknown,
}
#[derive(Debug, Clone)]
pub struct GpuFeatures {
pub unified_memory: bool,
pub double_precision: bool,
pub half_precision: bool,
pub tensor_cores: bool,
pub ray_tracing: bool,
}
impl Default for GpuFeatures {
fn default() -> Self {
Self {
unified_memory: false,
double_precision: true,
half_precision: false,
tensor_cores: false,
ray_tracing: false,
}
}
}
#[derive(Debug, Clone)]
pub struct GpuPerformance {
pub fp32_gflops: f64,
pub fp16_gflops: f64,
pub memorybandwidth_gbps: f64,
pub efficiency_score: f64,
}
impl Default for GpuPerformance {
fn default() -> Self {
Self {
fp32_gflops: 1000.0,
fp16_gflops: 2000.0,
memorybandwidth_gbps: 500.0,
efficiency_score: 0.7,
}
}
}
#[derive(Debug, Clone)]
pub struct MultiGpuInfo {
pub gpus: Vec<GpuInfo>,
pub total_memory: usize,
pub p2p_capable: bool,
pub multi_gpuconfig: MultiGpuConfig,
}
impl MultiGpuInfo {
pub fn detect() -> CoreResult<Self> {
let mut gpus = Vec::new();
if let Ok(gpu) = GpuInfo::detect() {
gpus.push(gpu);
}
let total_memory = gpus.iter().map(|gpu| gpu.memory_total).sum();
Ok(Self {
gpus,
total_memory,
p2p_capable: false,
multi_gpuconfig: MultiGpuConfig::Single,
})
}
pub fn best_compute_gpu(&self) -> Option<&GpuInfo> {
self.gpus
.iter()
.filter(|gpu| gpu.is_compute_capable())
.max_by(|a, b| {
a.performance_score()
.partial_cmp(&b.performance_score())
.expect("Operation failed")
})
}
pub fn total_compute_units(&self) -> usize {
self.gpus.iter().map(|gpu| gpu.compute_units).sum()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MultiGpuConfig {
Single,
Sli,
CrossFire,
NvLink,
Independent,
}
impl Default for GpuInfo {
fn default() -> Self {
Self {
name: "Default GPU".to_string(),
vendor: GpuVendor::Unknown,
memory_total: (4u64 * 1024 * 1024 * 1024) as usize, memory_available: (3u64 * 1024 * 1024 * 1024) as usize, memorybandwidth_gbps: 200.0,
compute_units: 512,
base_clock_mhz: 1000,
memory_clock_mhz: 4000,
compute_capability: ComputeCapability::Unknown,
features: GpuFeatures::default(),
performance: GpuPerformance::default(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gpu_vendor() {
assert_eq!(GpuVendor::Nvidia, GpuVendor::Nvidia);
assert_ne!(GpuVendor::Nvidia, GpuVendor::Amd);
}
#[test]
fn test_compute_capability() {
let cuda_cap = ComputeCapability::Cuda(7, 5);
assert_eq!(cuda_cap, ComputeCapability::Cuda(7, 5));
assert_ne!(cuda_cap, ComputeCapability::Metal);
}
#[test]
fn test_gpu_features() {
let features = GpuFeatures {
unified_memory: true,
tensor_cores: true,
..Default::default()
};
assert!(features.unified_memory);
assert!(features.tensor_cores);
assert!(!features.ray_tracing);
}
#[test]
fn test_gpu_performance() {
let perf = GpuPerformance::default();
assert!(perf.fp32_gflops > 0.0);
assert!(perf.efficiency_score >= 0.0 && perf.efficiency_score <= 1.0);
}
#[test]
fn test_pci_id_parsing() {
let gpu = GpuInfo::create_from_pci_ids("0x10de", "0x1234");
assert_eq!(gpu.vendor, GpuVendor::Nvidia);
assert!(gpu.name.contains("NVIDIA"));
}
#[test]
fn test_multi_gpu_config() {
assert_eq!(MultiGpuConfig::Single, MultiGpuConfig::Single);
assert_ne!(MultiGpuConfig::Single, MultiGpuConfig::Sli);
}
#[test]
fn test_optimal_workgroup_size() {
let nvidia_gpu = GpuInfo {
vendor: GpuVendor::Nvidia,
..Default::default()
};
assert_eq!(nvidia_gpu.optimal_workgroup_size(), 256);
let amd_gpu = GpuInfo {
vendor: GpuVendor::Amd,
..Default::default()
};
assert_eq!(amd_gpu.optimal_workgroup_size(), 64);
}
}