use crate::error::{HiveGpuError, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuBackendType {
Metal,
Cuda,
Rocm,
Intel,
Cpu,
}
impl std::fmt::Display for GpuBackendType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
GpuBackendType::Metal => write!(f, "Metal"),
GpuBackendType::Cuda => write!(f, "CUDA"),
GpuBackendType::Rocm => write!(f, "ROCm"),
GpuBackendType::Intel => write!(f, "Intel"),
GpuBackendType::Cpu => write!(f, "CPU"),
}
}
}
#[allow(clippy::vec_init_then_push)] pub fn detect_available_backends() -> Vec<GpuBackendType> {
let mut backends = Vec::new();
#[cfg(all(target_os = "macos", feature = "metal-native"))]
{
if is_metal_available() {
backends.push(GpuBackendType::Metal);
}
}
#[cfg(all(feature = "cuda", any(target_os = "linux", target_os = "windows")))]
{
if is_cuda_available() {
backends.push(GpuBackendType::Cuda);
}
}
#[cfg(all(feature = "rocm", target_os = "linux"))]
{
if is_rocm_available() {
backends.push(GpuBackendType::Rocm);
}
}
#[cfg(all(feature = "intel", any(target_os = "linux", target_os = "windows")))]
{
if is_intel_available() {
backends.push(GpuBackendType::Intel);
}
}
backends.push(GpuBackendType::Cpu);
backends
}
pub fn select_best_backend() -> Result<GpuBackendType> {
let available = detect_available_backends();
if available.contains(&GpuBackendType::Metal) {
Ok(GpuBackendType::Metal)
} else if available.contains(&GpuBackendType::Cuda) {
Ok(GpuBackendType::Cuda)
} else if available.contains(&GpuBackendType::Rocm) {
Ok(GpuBackendType::Rocm)
} else if available.contains(&GpuBackendType::Intel) {
Ok(GpuBackendType::Intel)
} else if available.contains(&GpuBackendType::Cpu) {
Ok(GpuBackendType::Cpu)
} else {
Err(HiveGpuError::NoDeviceAvailable)
}
}
#[cfg(all(target_os = "macos", feature = "metal-native"))]
fn is_metal_available() -> bool {
use objc2_metal::MTLCreateSystemDefaultDevice;
MTLCreateSystemDefaultDevice().is_some()
}
#[cfg(all(feature = "cuda", any(target_os = "linux", target_os = "windows")))]
fn is_cuda_available() -> bool {
std::panic::catch_unwind(|| {
use cudarc::driver::result;
match result::init() {
Ok(()) => result::device::get_count()
.map(|count| count > 0)
.unwrap_or(false),
Err(_) => false,
}
})
.unwrap_or(false)
}
#[cfg(all(feature = "rocm", target_os = "linux"))]
fn is_rocm_available() -> bool {
crate::rocm::context::RocmContext::is_available()
}
#[cfg(all(feature = "intel", any(target_os = "linux", target_os = "windows")))]
fn is_intel_available() -> bool {
crate::intel::context::IntelContext::is_available()
}
pub fn get_backend_info(backend: GpuBackendType) -> Result<String> {
match backend {
GpuBackendType::Metal => {
#[cfg(all(target_os = "macos", feature = "metal-native"))]
{
use objc2_metal::{MTLCreateSystemDefaultDevice, MTLDevice};
if let Some(device) = MTLCreateSystemDefaultDevice() {
Ok(format!("Metal device: {}", device.name()))
} else {
Err(HiveGpuError::NoDeviceAvailable)
}
}
#[cfg(not(all(target_os = "macos", feature = "metal-native")))]
{
Err(HiveGpuError::NoDeviceAvailable)
}
}
GpuBackendType::Cuda => {
#[cfg(feature = "cuda")]
{
Ok("CUDA device available".to_string())
}
#[cfg(not(feature = "cuda"))]
{
Err(HiveGpuError::NoDeviceAvailable)
}
}
GpuBackendType::Rocm => {
#[cfg(all(feature = "rocm", target_os = "linux"))]
{
Ok("ROCm device available".to_string())
}
#[cfg(not(all(feature = "rocm", target_os = "linux")))]
{
Err(HiveGpuError::NoDeviceAvailable)
}
}
GpuBackendType::Intel => {
#[cfg(all(feature = "intel", any(target_os = "linux", target_os = "windows")))]
{
Ok("Intel (Vulkan) device available".to_string())
}
#[cfg(not(all(feature = "intel", any(target_os = "linux", target_os = "windows"))))]
{
Err(HiveGpuError::NoDeviceAvailable)
}
}
GpuBackendType::Cpu => Ok("CPU fallback".to_string()),
}
}
pub fn get_backend_performance_info(backend: GpuBackendType) -> BackendPerformanceInfo {
match backend {
GpuBackendType::Metal => BackendPerformanceInfo {
name: "Metal".to_string(),
memory_bandwidth_gbps: 400.0, compute_units: 8, memory_size_gb: 16,
supports_hnsw: true,
supports_batch: true,
},
GpuBackendType::Cuda => BackendPerformanceInfo {
name: "CUDA".to_string(),
memory_bandwidth_gbps: 900.0, compute_units: 128, memory_size_gb: 24,
supports_hnsw: true,
supports_batch: true,
},
GpuBackendType::Rocm => BackendPerformanceInfo {
name: "ROCm".to_string(),
memory_bandwidth_gbps: 960.0, compute_units: 96, memory_size_gb: 24,
supports_hnsw: false,
supports_batch: true,
},
GpuBackendType::Intel => BackendPerformanceInfo {
name: "Intel".to_string(),
memory_bandwidth_gbps: 560.0, compute_units: 20, memory_size_gb: 12,
supports_hnsw: false,
supports_batch: true,
},
GpuBackendType::Cpu => BackendPerformanceInfo {
name: "CPU".to_string(),
memory_bandwidth_gbps: 50.0, compute_units: 16, memory_size_gb: 32,
supports_hnsw: false,
supports_batch: true,
},
}
}
#[derive(Debug, Clone)]
pub struct BackendPerformanceInfo {
pub name: String,
pub memory_bandwidth_gbps: f32,
pub compute_units: usize,
pub memory_size_gb: usize,
pub supports_hnsw: bool,
pub supports_batch: bool,
}