Skip to main content

hive_gpu/backends/
detector.rs

1//! GPU Backend Detection
2//!
3//! This module provides detection of available GPU backends and selection
4//! of the best backend for the current system.
5
6use crate::error::{HiveGpuError, Result};
7
8/// Available GPU backend types
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum GpuBackendType {
11    /// Metal (Apple Silicon)
12    Metal,
13    /// CUDA (NVIDIA)
14    Cuda,
15    /// ROCm / HIP (AMD)
16    Rocm,
17    /// Intel / Vulkan Compute (also covers the universal-Vulkan fallback)
18    Intel,
19    /// CPU fallback
20    Cpu,
21}
22
23impl std::fmt::Display for GpuBackendType {
24    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25        match self {
26            GpuBackendType::Metal => write!(f, "Metal"),
27            GpuBackendType::Cuda => write!(f, "CUDA"),
28            GpuBackendType::Rocm => write!(f, "ROCm"),
29            GpuBackendType::Intel => write!(f, "Intel"),
30            GpuBackendType::Cpu => write!(f, "CPU"),
31        }
32    }
33}
34
35/// Detect all available GPU backends on the current system
36#[allow(clippy::vec_init_then_push)] // pushes live inside cfg-gated blocks
37pub fn detect_available_backends() -> Vec<GpuBackendType> {
38    let mut backends = Vec::new();
39
40    // Check Metal availability (macOS only)
41    #[cfg(all(target_os = "macos", feature = "metal-native"))]
42    {
43        if is_metal_available() {
44            backends.push(GpuBackendType::Metal);
45        }
46    }
47
48    // Check CUDA availability
49    #[cfg(all(feature = "cuda", any(target_os = "linux", target_os = "windows")))]
50    {
51        if is_cuda_available() {
52            backends.push(GpuBackendType::Cuda);
53        }
54    }
55
56    // Check ROCm availability (Linux-only)
57    #[cfg(all(feature = "rocm", target_os = "linux"))]
58    {
59        if is_rocm_available() {
60            backends.push(GpuBackendType::Rocm);
61        }
62    }
63
64    // Check Intel / Vulkan availability (Linux + Windows)
65    #[cfg(all(feature = "intel", any(target_os = "linux", target_os = "windows")))]
66    {
67        if is_intel_available() {
68            backends.push(GpuBackendType::Intel);
69        }
70    }
71
72    // CPU is always available as fallback
73    backends.push(GpuBackendType::Cpu);
74
75    backends
76}
77
78/// Select the best available backend based on performance priority
79pub fn select_best_backend() -> Result<GpuBackendType> {
80    let available = detect_available_backends();
81
82    // Priority order: Metal > CUDA > ROCm > Intel > CPU
83    if available.contains(&GpuBackendType::Metal) {
84        Ok(GpuBackendType::Metal)
85    } else if available.contains(&GpuBackendType::Cuda) {
86        Ok(GpuBackendType::Cuda)
87    } else if available.contains(&GpuBackendType::Rocm) {
88        Ok(GpuBackendType::Rocm)
89    } else if available.contains(&GpuBackendType::Intel) {
90        Ok(GpuBackendType::Intel)
91    } else if available.contains(&GpuBackendType::Cpu) {
92        Ok(GpuBackendType::Cpu)
93    } else {
94        Err(HiveGpuError::NoDeviceAvailable)
95    }
96}
97
98/// Check if Metal is available on the current system
99#[cfg(all(target_os = "macos", feature = "metal-native"))]
100fn is_metal_available() -> bool {
101    use objc2_metal::MTLCreateSystemDefaultDevice;
102
103    MTLCreateSystemDefaultDevice().is_some()
104}
105
106/// Check if CUDA is available on the current system.
107///
108/// Uses cudarc's driver-API probe, which dynamically loads the CUDA driver at
109/// call time. Returns `false` when the driver is missing or no devices are
110/// enumerable — never panics.
111#[cfg(all(feature = "cuda", any(target_os = "linux", target_os = "windows")))]
112fn is_cuda_available() -> bool {
113    // cudarc's dynamic-linking panics when libcuda.so is missing (hosts
114    // with the toolkit but no driver). catch_unwind so detection on such
115    // hosts returns false cleanly instead of aborting the process.
116    std::panic::catch_unwind(|| {
117        use cudarc::driver::result;
118        match result::init() {
119            Ok(()) => result::device::get_count()
120                .map(|count| count > 0)
121                .unwrap_or(false),
122            Err(_) => false,
123        }
124    })
125    .unwrap_or(false)
126}
127
128/// Check if ROCm / HIP is available on the current system.
129///
130/// Uses the libloading-based probe in `crate::rocm::context::RocmContext`
131/// which dlopens `libamdhip64` / `librocblas` at call time. Returns
132/// `false` when neither library is reachable.
133#[cfg(all(feature = "rocm", target_os = "linux"))]
134fn is_rocm_available() -> bool {
135    crate::rocm::context::RocmContext::is_available()
136}
137
138/// Check if a Vulkan-capable device (Intel by default, any device when
139/// `HIVE_GPU_VULKAN_UNIVERSAL=1` is set) is reachable on the current
140/// system. Ash loads the Vulkan loader dynamically — returns `false`
141/// gracefully when no loader is present.
142#[cfg(all(feature = "intel", any(target_os = "linux", target_os = "windows")))]
143fn is_intel_available() -> bool {
144    crate::intel::context::IntelContext::is_available()
145}
146
147/// Get backend-specific device information
148pub fn get_backend_info(backend: GpuBackendType) -> Result<String> {
149    match backend {
150        GpuBackendType::Metal => {
151            #[cfg(all(target_os = "macos", feature = "metal-native"))]
152            {
153                use objc2_metal::{MTLCreateSystemDefaultDevice, MTLDevice};
154                if let Some(device) = MTLCreateSystemDefaultDevice() {
155                    Ok(format!("Metal device: {}", device.name()))
156                } else {
157                    Err(HiveGpuError::NoDeviceAvailable)
158                }
159            }
160            #[cfg(not(all(target_os = "macos", feature = "metal-native")))]
161            {
162                Err(HiveGpuError::NoDeviceAvailable)
163            }
164        }
165        GpuBackendType::Cuda => {
166            #[cfg(feature = "cuda")]
167            {
168                // In practice, you'd query CUDA devices here
169                Ok("CUDA device available".to_string())
170            }
171            #[cfg(not(feature = "cuda"))]
172            {
173                Err(HiveGpuError::NoDeviceAvailable)
174            }
175        }
176        GpuBackendType::Rocm => {
177            #[cfg(all(feature = "rocm", target_os = "linux"))]
178            {
179                Ok("ROCm device available".to_string())
180            }
181            #[cfg(not(all(feature = "rocm", target_os = "linux")))]
182            {
183                Err(HiveGpuError::NoDeviceAvailable)
184            }
185        }
186        GpuBackendType::Intel => {
187            #[cfg(all(feature = "intel", any(target_os = "linux", target_os = "windows")))]
188            {
189                Ok("Intel (Vulkan) device available".to_string())
190            }
191            #[cfg(not(all(feature = "intel", any(target_os = "linux", target_os = "windows"))))]
192            {
193                Err(HiveGpuError::NoDeviceAvailable)
194            }
195        }
196        GpuBackendType::Cpu => Ok("CPU fallback".to_string()),
197    }
198}
199
200/// Get performance characteristics for each backend
201pub fn get_backend_performance_info(backend: GpuBackendType) -> BackendPerformanceInfo {
202    match backend {
203        GpuBackendType::Metal => BackendPerformanceInfo {
204            name: "Metal".to_string(),
205            memory_bandwidth_gbps: 400.0, // Apple Silicon typical
206            compute_units: 8,             // M1 Pro example
207            memory_size_gb: 16,
208            supports_hnsw: true,
209            supports_batch: true,
210        },
211        GpuBackendType::Cuda => BackendPerformanceInfo {
212            name: "CUDA".to_string(),
213            memory_bandwidth_gbps: 900.0, // RTX 4090 example
214            compute_units: 128,           // RTX 4090 example
215            memory_size_gb: 24,
216            supports_hnsw: true,
217            supports_batch: true,
218        },
219        GpuBackendType::Rocm => BackendPerformanceInfo {
220            name: "ROCm".to_string(),
221            memory_bandwidth_gbps: 960.0, // RX 7900 XTX example
222            compute_units: 96,            // RX 7900 XTX example
223            memory_size_gb: 24,
224            supports_hnsw: false,
225            supports_batch: true,
226        },
227        GpuBackendType::Intel => BackendPerformanceInfo {
228            name: "Intel".to_string(),
229            memory_bandwidth_gbps: 560.0, // Arc B580 (Battlemage) approx
230            compute_units: 20,            // Arc B580 Xe-cores
231            memory_size_gb: 12,
232            supports_hnsw: false,
233            supports_batch: true,
234        },
235        GpuBackendType::Cpu => BackendPerformanceInfo {
236            name: "CPU".to_string(),
237            memory_bandwidth_gbps: 50.0, // DDR4-3200 example
238            compute_units: 16,           // 16-core example
239            memory_size_gb: 32,
240            supports_hnsw: false,
241            supports_batch: true,
242        },
243    }
244}
245
246/// Backend performance characteristics
247#[derive(Debug, Clone)]
248pub struct BackendPerformanceInfo {
249    pub name: String,
250    pub memory_bandwidth_gbps: f32,
251    pub compute_units: usize,
252    pub memory_size_gb: usize,
253    pub supports_hnsw: bool,
254    pub supports_batch: bool,
255}