phop_core/
accel.rs

1//! Accelerator selection for the tensorized EML forward pass.
2//!
3//! phop has four forward-eval backends: NVIDIA CUDA (`gpu-cuda`, exact `f64`, via oxicuda), native
4//! Apple Metal (`gpu-metal`, `f32`, via oxicuda-metal, macOS only), a portable
5//! WebGPU/Metal/Vulkan/DX12 path (`gpu-wgpu`, `f32`, via `crate::wgpu_forward`), and the
6//! always-available CPU path (`f64`). [`gpu_backend`] picks the best one present at runtime, in the
7//! order **CUDA → Metal → wgpu → CPU** — CUDA first for its `f64` precision and existing tuned
8//! kernels, native Metal next on Apple hardware, wgpu for portability elsewhere, CPU as the
9//! universal fallback.
10
11/// A compute backend for the EML forward pass.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum GpuBackend {
14    /// NVIDIA CUDA via oxicuda (`f64`). Requires the `gpu-cuda` feature **and** a device at runtime.
15    Cuda,
16    /// Native Apple Metal via oxicuda-metal (`f32`). Requires the `gpu-metal` feature **and** a
17    /// Metal device at runtime (macOS only).
18    Metal,
19    /// Portable WebGPU/Metal/Vulkan/DX12 via wgpu (`f32`). Requires the `gpu-wgpu` feature **and** an
20    /// adapter at runtime.
21    Wgpu,
22    /// CPU fallback — always available, exact `f64`.
23    Cpu,
24}
25
26/// Select the best available forward-eval backend at runtime: **CUDA → Metal → wgpu → CPU**.
27///
28/// Each accelerator branch only exists when its feature is compiled in, and is taken only if the
29/// corresponding device/adapter is actually present (so a build with both GPU features still degrades
30/// cleanly to CPU on a machine with neither).
31#[must_use]
32pub fn gpu_backend() -> GpuBackend {
33    #[cfg(feature = "gpu-cuda")]
34    {
35        if crate::gpu::cuda_available() {
36            return GpuBackend::Cuda;
37        }
38    }
39    #[cfg(feature = "gpu-metal")]
40    {
41        if crate::metal::metal_available() {
42            return GpuBackend::Metal;
43        }
44    }
45    #[cfg(feature = "gpu-wgpu")]
46    {
47        if crate::wgpu_forward::wgpu_available() {
48            return GpuBackend::Wgpu;
49        }
50    }
51    GpuBackend::Cpu
52}
53
54#[cfg(test)]
55mod tests {
56    use super::*;
57
58    #[test]
59    fn selects_cpu_without_gpu_features() {
60        // With no GPU feature compiled in, the only possible choice is CPU. (We deliberately do NOT
61        // call `gpu_backend()` when a GPU feature is enabled: the wgpu adapter probe can crash on
62        // broken software drivers, and the meaningful invariant — CPU when no accelerator exists — is
63        // exactly this branch.)
64        #[cfg(not(any(feature = "gpu-cuda", feature = "gpu-metal", feature = "gpu-wgpu")))]
65        assert_eq!(gpu_backend(), GpuBackend::Cpu);
66    }
67
68    #[test]
69    fn backend_variants_are_distinct() {
70        assert_ne!(GpuBackend::Cuda, GpuBackend::Wgpu);
71        assert_ne!(GpuBackend::Wgpu, GpuBackend::Cpu);
72        assert_ne!(GpuBackend::Cuda, GpuBackend::Metal);
73        assert_ne!(GpuBackend::Metal, GpuBackend::Wgpu);
74        assert_ne!(GpuBackend::Metal, GpuBackend::Cpu);
75    }
76}
phop_core/accel.rs

phop_core/
accel.rs