phop_core/accel.rs
1//! Accelerator selection for the tensorized EML forward pass.
2//!
3//! phop has four forward-eval backends: NVIDIA CUDA (`gpu-cuda`, exact `f64`, via oxicuda), native
4//! Apple Metal (`gpu-metal`, `f32`, via oxicuda-metal, macOS only), a portable
5//! WebGPU/Metal/Vulkan/DX12 path (`gpu-wgpu`, `f32`, via `crate::wgpu_forward`), and the
6//! always-available CPU path (`f64`). [`gpu_backend`] picks the best one present at runtime, in the
7//! order **CUDA → Metal → wgpu → CPU** — CUDA first for its `f64` precision and existing tuned
8//! kernels, native Metal next on Apple hardware, wgpu for portability elsewhere, CPU as the
9//! universal fallback.
10
11/// A compute backend for the EML forward pass.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum GpuBackend {
14 /// NVIDIA CUDA via oxicuda (`f64`). Requires the `gpu-cuda` feature **and** a device at runtime.
15 Cuda,
16 /// Native Apple Metal via oxicuda-metal (`f32`). Requires the `gpu-metal` feature **and** a
17 /// Metal device at runtime (macOS only).
18 Metal,
19 /// Portable WebGPU/Metal/Vulkan/DX12 via wgpu (`f32`). Requires the `gpu-wgpu` feature **and** an
20 /// adapter at runtime.
21 Wgpu,
22 /// CPU fallback — always available, exact `f64`.
23 Cpu,
24}
25
26/// Select the best available forward-eval backend at runtime: **CUDA → Metal → wgpu → CPU**.
27///
28/// Each accelerator branch only exists when its feature is compiled in, and is taken only if the
29/// corresponding device/adapter is actually present (so a build with both GPU features still degrades
30/// cleanly to CPU on a machine with neither).
31#[must_use]
32pub fn gpu_backend() -> GpuBackend {
33 #[cfg(feature = "gpu-cuda")]
34 {
35 if crate::gpu::cuda_available() {
36 return GpuBackend::Cuda;
37 }
38 }
39 #[cfg(feature = "gpu-metal")]
40 {
41 if crate::metal::metal_available() {
42 return GpuBackend::Metal;
43 }
44 }
45 #[cfg(feature = "gpu-wgpu")]
46 {
47 if crate::wgpu_forward::wgpu_available() {
48 return GpuBackend::Wgpu;
49 }
50 }
51 GpuBackend::Cpu
52}
53
54#[cfg(test)]
55mod tests {
56 use super::*;
57
58 #[test]
59 fn selects_cpu_without_gpu_features() {
60 // With no GPU feature compiled in, the only possible choice is CPU. (We deliberately do NOT
61 // call `gpu_backend()` when a GPU feature is enabled: the wgpu adapter probe can crash on
62 // broken software drivers, and the meaningful invariant — CPU when no accelerator exists — is
63 // exactly this branch.)
64 #[cfg(not(any(feature = "gpu-cuda", feature = "gpu-metal", feature = "gpu-wgpu")))]
65 assert_eq!(gpu_backend(), GpuBackend::Cpu);
66 }
67
68 #[test]
69 fn backend_variants_are_distinct() {
70 assert_ne!(GpuBackend::Cuda, GpuBackend::Wgpu);
71 assert_ne!(GpuBackend::Wgpu, GpuBackend::Cpu);
72 assert_ne!(GpuBackend::Cuda, GpuBackend::Metal);
73 assert_ne!(GpuBackend::Metal, GpuBackend::Wgpu);
74 assert_ne!(GpuBackend::Metal, GpuBackend::Cpu);
75 }
76}