#[cfg(feature = "gpu")]
use scirs2_core::gpu::backends::{
check_backend_installation, detect_gpu_backends, get_device_info,
};
#[cfg(feature = "gpu")]
use scirs2_core::gpu::GpuBackend;
#[allow(dead_code)]
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("=== GPU Detection Example ===\n");
#[cfg(feature = "gpu")]
run_detection_demo()?;
#[cfg(not(feature = "gpu"))]
println!("GPU feature not enabled. Run with --features=\"gpu\" to see the detection example.");
Ok(())
}
#[cfg(feature = "gpu")]
#[allow(dead_code)]
fn run_detection_demo() -> Result<(), Box<dyn std::error::Error>> {
println!("1. Backend Installation Check");
println!("-----------------------------");
let backends = [
GpuBackend::Cuda,
GpuBackend::Metal,
GpuBackend::OpenCL,
GpuBackend::Wgpu,
GpuBackend::Cpu,
];
for backend in backends.iter() {
match check_backend_installation(*backend) {
Ok(installed) => {
let status = if installed {
"✓ Installed"
} else {
"✗ Not installed"
};
println!("{}: {}", backend, status);
}
Err(e) => {
println!("{}: ✗ Error checking - {}", backend, e);
}
}
}
println!("\n2. GPU Device Detection");
println!("-----------------------");
let detection_result = detect_gpu_backends();
println!(
"Recommended backend: {}",
detection_result.recommended_backend
);
println!("Detected {} GPU device(s):", detection_result.devices.len());
for (i, device) in detection_result.devices.iter().enumerate() {
println!("\nDevice {}: {}", i, device.device_name);
println!(" Backend: {}", device.backend);
if let Some(memory) = device.memory_bytes {
let memory_gb = memory as f64 / (1024.0 * 1024.0 * 1024.0);
println!(" Memory: {:.1} GB", memory_gb);
} else {
println!(" Memory: Unknown");
}
if let Some(ref capability) = device.compute_capability {
println!(" Compute capability: {}", capability);
}
println!(
" Tensor support: {}",
if device.supports_tensors { "Yes" } else { "No" }
);
}
println!("\n3. Specific Device Information");
println!("------------------------------");
match get_device_info(GpuBackend::Cuda, 0) {
Ok(device) => {
println!("CUDA Device 0 details:");
println!(" Name: {}", device.device_name);
if let Some(memory) = device.memory_bytes {
println!(" Memory: {} bytes", memory);
}
if let Some(ref capability) = device.compute_capability {
println!(" Compute capability: {}", capability);
}
}
Err(e) => {
println!("No CUDA device found: {}", e);
}
}
println!("\n4. Backend Recommendations");
println!("---------------------------");
let cuda_devices = detection_result
.devices
.iter()
.filter(|d| d.backend == GpuBackend::Cuda)
.count();
let metal_devices = detection_result
.devices
.iter()
.filter(|d| d.backend == GpuBackend::Metal)
.count();
let opencl_devices = detection_result
.devices
.iter()
.filter(|d| d.backend == GpuBackend::OpenCL)
.count();
println!("Backend suitability for scientific computing:");
if cuda_devices > 0 {
println!("✓ CUDA: Excellent (found {} device(s))", cuda_devices);
println!(" - Best performance for deep learning and HPC");
println!(" - Extensive library ecosystem (cuBLAS, cuFFT, etc.)");
println!(" - Mature compute platform");
} else {
println!("✗ CUDA: Not available");
}
if metal_devices > 0 {
println!("✓ Metal: Good (found {} device(s))", metal_devices);
println!(" - Excellent integration with Apple hardware");
println!(" - Unified memory architecture benefits");
println!(" - Good for machine learning on Apple Silicon");
} else {
println!("✗ Metal: Not available (requires macOS)");
}
if opencl_devices > 0 {
println!("○ OpenCL: Fair (found {} device(s))", opencl_devices);
println!(" - Cross-platform compatibility");
println!(" - Good for basic parallel computing");
println!(" - Limited ecosystem compared to CUDA");
} else {
println!("✗ OpenCL: Not available");
}
println!("○ WebGPU: Fair (browser-compatible)");
println!(" - Excellent for web deployment");
println!(" - Growing ecosystem");
println!(" - Good for lightweight computations");
println!("✓ CPU: Always available (fallback)");
println!(" - Guaranteed compatibility");
println!(" - Good for development and testing");
println!(" - SIMD acceleration available");
println!("\n5. Performance Characteristics");
println!("------------------------------");
println!("Expected relative performance for matrix operations:");
if cuda_devices > 0 {
let has_tensor_cores = detection_result
.devices
.iter()
.any(|d| d.backend == GpuBackend::Cuda && d.supports_tensors);
if has_tensor_cores {
println!(" CUDA with Tensor Cores: 100x (excellent for FP16/BF16)");
} else {
println!(" CUDA without Tensor Cores: 50x");
}
}
if metal_devices > 0 {
println!(" Metal: 30-60x (varies by Apple Silicon generation)");
}
if opencl_devices > 0 {
println!(" OpenCL: 10-40x (varies greatly by hardware)");
}
println!(" WebGPU: 5-20x (good for web applications)");
println!(" CPU (SIMD): 1x (baseline)");
println!("\nNote: Performance ratios are approximate and vary by:");
println!("- Problem size and complexity");
println!("- Memory access patterns");
println!("- Specific hardware configuration");
println!("- Driver and software optimization");
Ok(())
}