axonml_core/backends/
mod.rs

1//! Backends - Device-Specific Implementations
2//!
3//! # File
4//! `crates/axonml-core/src/backends/mod.rs`
5//!
6//! # Author
7//! Andrew Jewell Sr - AutomataNexus
8//!
9//! # Updated
10//! March 8, 2026
11//!
12//! # Disclaimer
13//! Use at own risk. This software is provided "as is", without warranty of any
14//! kind, express or implied. The author and AutomataNexus shall not be held
15//! liable for any damages arising from the use of this software.
16
17use crate::device::DeviceCapabilities;
18
19// =============================================================================
20// Backend Modules
21// =============================================================================
22
23pub mod cpu;
24
25pub mod cuda;
26
27#[cfg(feature = "cuda")]
28pub mod cuda_kernels;
29
30pub mod cuda_pool;
31
32#[cfg(feature = "cudnn")]
33pub mod cudnn_ops;
34
35#[cfg(feature = "vulkan")]
36pub mod vulkan;
37
38#[cfg(feature = "metal")]
39pub mod metal;
40
41#[cfg(feature = "wgpu")]
42pub mod wgpu_backend;
43
44// GPU testing infrastructure
45pub mod gpu_tests;
46
47// =============================================================================
48// Re-exports
49// =============================================================================
50
51pub use cpu::CpuBackend;
52
53pub use cuda::CudaBackend;
54
55#[cfg(feature = "vulkan")]
56pub use vulkan::VulkanBackend;
57
58#[cfg(feature = "metal")]
59pub use metal::MetalBackend;
60
61#[cfg(feature = "wgpu")]
62pub use wgpu_backend::WgpuBackend;
63
64// =============================================================================
65// Backend Trait
66// =============================================================================
67
68/// Common trait for all compute backends.
69///
70/// This trait defines the interface that all backends must implement,
71/// enabling device-agnostic tensor operations.
72pub trait Backend: Send + Sync {
73    /// Returns the name of this backend.
74    fn name(&self) -> &'static str;
75
76    /// Returns whether this backend is available on the current system.
77    fn is_available(&self) -> bool;
78
79    /// Returns the device capabilities.
80    fn capabilities(&self) -> DeviceCapabilities;
81
82    /// Allocates memory on this backend.
83    fn allocate(&self, size: usize) -> *mut u8;
84
85    /// Deallocates memory on this backend.
86    fn deallocate(&self, ptr: *mut u8, size: usize);
87
88    /// Copies data from host to device.
89    fn copy_to_device(&self, dst: *mut u8, src: *const u8, size: usize);
90
91    /// Copies data from device to host.
92    fn copy_to_host(&self, dst: *mut u8, src: *const u8, size: usize);
93
94    /// Copies data within the device.
95    fn copy_device_to_device(&self, dst: *mut u8, src: *const u8, size: usize);
96
97    /// Synchronizes the device (waits for all operations to complete).
98    fn synchronize(&self);
99}
100
101// =============================================================================
102// GPU Memory Management
103// =============================================================================
104
105/// GPU memory handle for safe memory management.
106#[derive(Debug)]
107pub struct GpuMemory {
108    ptr: *mut u8,
109    size: usize,
110    device_index: usize,
111    backend_type: BackendType,
112}
113
114/// Type of backend for a GPU memory allocation.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub enum BackendType {
117    /// CPU backend.
118    Cpu,
119    /// CUDA backend.
120    #[cfg(feature = "cuda")]
121    Cuda,
122    /// Vulkan backend.
123    #[cfg(feature = "vulkan")]
124    Vulkan,
125    /// Metal backend.
126    #[cfg(feature = "metal")]
127    Metal,
128    /// WebGPU backend.
129    #[cfg(feature = "wgpu")]
130    Wgpu,
131}
132
133impl GpuMemory {
134    /// Creates a new GPU memory handle.
135    pub fn new(ptr: *mut u8, size: usize, device_index: usize, backend_type: BackendType) -> Self {
136        Self {
137            ptr,
138            size,
139            device_index,
140            backend_type,
141        }
142    }
143
144    /// Returns the raw pointer.
145    #[must_use]
146    pub fn ptr(&self) -> *mut u8 {
147        self.ptr
148    }
149
150    /// Returns the size in bytes.
151    #[must_use]
152    pub fn size(&self) -> usize {
153        self.size
154    }
155
156    /// Returns the device index.
157    #[must_use]
158    pub fn device_index(&self) -> usize {
159        self.device_index
160    }
161
162    /// Returns the backend type.
163    #[must_use]
164    pub fn backend_type(&self) -> BackendType {
165        self.backend_type
166    }
167}
168
169// =============================================================================
170// GPU Stream/Queue Abstraction
171// =============================================================================
172
173/// GPU execution stream for async operations.
174#[derive(Debug)]
175pub struct GpuStream {
176    /// Stream handle (backend-specific).
177    handle: usize,
178    /// Device index.
179    device_index: usize,
180    /// Backend type.
181    backend_type: BackendType,
182}
183
184impl GpuStream {
185    /// Creates a new GPU stream.
186    #[must_use]
187    pub fn new(handle: usize, device_index: usize, backend_type: BackendType) -> Self {
188        Self {
189            handle,
190            device_index,
191            backend_type,
192        }
193    }
194
195    /// Returns the stream handle.
196    #[must_use]
197    pub fn handle(&self) -> usize {
198        self.handle
199    }
200
201    /// Returns the device index.
202    #[must_use]
203    pub fn device_index(&self) -> usize {
204        self.device_index
205    }
206
207    /// Synchronizes this stream (waits for all operations to complete).
208    ///
209    /// # Backend-specific behavior
210    /// - **CPU**: No-op (CPU operations are synchronous)
211    /// - **CUDA**: No-op at stream level; use `CudaBackend::synchronize()` for device sync
212    /// - **Vulkan**: Waits for queue to become idle
213    /// - **Metal**: Waits for command buffer completion
214    /// - **WebGPU**: Submits pending commands to queue
215    ///
216    /// For CUDA, proper synchronization should be done through `CudaBackend::synchronize()`
217    /// which performs device-level synchronization.
218    pub fn synchronize(&self) {
219        match self.backend_type {
220            BackendType::Cpu => {} // No-op for CPU (synchronous)
221            #[cfg(feature = "cuda")]
222            BackendType::Cuda => cuda::stream_synchronize(self.handle),
223            #[cfg(feature = "vulkan")]
224            BackendType::Vulkan => vulkan::queue_wait_idle(self.handle),
225            #[cfg(feature = "metal")]
226            BackendType::Metal => metal::command_buffer_wait(self.handle),
227            #[cfg(feature = "wgpu")]
228            BackendType::Wgpu => wgpu_backend::queue_submit(self.handle),
229        }
230    }
231}
232
233// =============================================================================
234// Device Selection Utilities
235// =============================================================================
236
237/// Returns the best available GPU backend.
238#[must_use]
239pub fn best_available_backend() -> BackendType {
240    #[cfg(feature = "cuda")]
241    if cuda::is_available() {
242        return BackendType::Cuda;
243    }
244
245    #[cfg(feature = "metal")]
246    if metal::is_available() {
247        return BackendType::Metal;
248    }
249
250    #[cfg(feature = "vulkan")]
251    if vulkan::is_available() {
252        return BackendType::Vulkan;
253    }
254
255    #[cfg(feature = "wgpu")]
256    if wgpu_backend::is_available() {
257        return BackendType::Wgpu;
258    }
259
260    BackendType::Cpu
261}
262
263/// Returns the number of available GPUs across all backends.
264#[must_use]
265pub fn gpu_count() -> usize {
266    #[allow(unused_mut)]
267    let mut count = 0_usize;
268
269    #[cfg(feature = "cuda")]
270    {
271        count += cuda::device_count();
272    }
273
274    #[cfg(feature = "vulkan")]
275    {
276        count += vulkan::device_count();
277    }
278
279    #[cfg(feature = "metal")]
280    {
281        count += metal::device_count();
282    }
283
284    #[cfg(feature = "wgpu")]
285    {
286        count += wgpu_backend::device_count();
287    }
288
289    count
290}
axonml_core/backends/mod.rs

axonml_core/backends/
mod.rs