axonml_core/backends/
mod.rs

1//! Backends - Device-Specific Implementations
2//!
3//! This module contains backend implementations for different compute devices.
4//! Each backend provides device-specific memory operations and kernel execution.
5//!
6//! # Available Backends
7//! - `cpu` - CPU backend (always available)
8//! - `cuda` - NVIDIA CUDA backend (requires `cuda` feature)
9//! - `vulkan` - Vulkan backend (requires `vulkan` feature)
10//! - `metal` - Apple Metal backend (requires `metal` feature)
11//! - `wgpu` - WebGPU backend (requires `wgpu` feature)
12//!
13//! # Backend Trait
14//!
15//! All backends implement the `Backend` trait which provides a common interface
16//! for tensor operations. This enables device-agnostic code.
17//!
18//! @version 0.1.0
19//! @author `AutomataNexus` Development Team
20
21use crate::device::DeviceCapabilities;
22
23// =============================================================================
24// Backend Modules
25// =============================================================================
26
27pub mod cpu;
28
29#[cfg(feature = "cuda")]
30pub mod cuda;
31
32#[cfg(feature = "cuda")]
33pub mod cuda_kernels;
34
35#[cfg(feature = "vulkan")]
36pub mod vulkan;
37
38#[cfg(feature = "metal")]
39pub mod metal;
40
41#[cfg(feature = "wgpu")]
42pub mod wgpu_backend;
43
44// GPU testing infrastructure
45pub mod gpu_tests;
46
47// =============================================================================
48// Re-exports
49// =============================================================================
50
51pub use cpu::CpuBackend;
52
53#[cfg(feature = "cuda")]
54pub use cuda::CudaBackend;
55
56#[cfg(feature = "vulkan")]
57pub use vulkan::VulkanBackend;
58
59#[cfg(feature = "metal")]
60pub use metal::MetalBackend;
61
62#[cfg(feature = "wgpu")]
63pub use wgpu_backend::WgpuBackend;
64
65// =============================================================================
66// Backend Trait
67// =============================================================================
68
69/// Common trait for all compute backends.
70///
71/// This trait defines the interface that all backends must implement,
72/// enabling device-agnostic tensor operations.
73pub trait Backend: Send + Sync {
74    /// Returns the name of this backend.
75    fn name(&self) -> &'static str;
76
77    /// Returns whether this backend is available on the current system.
78    fn is_available(&self) -> bool;
79
80    /// Returns the device capabilities.
81    fn capabilities(&self) -> DeviceCapabilities;
82
83    /// Allocates memory on this backend.
84    fn allocate(&self, size: usize) -> *mut u8;
85
86    /// Deallocates memory on this backend.
87    fn deallocate(&self, ptr: *mut u8, size: usize);
88
89    /// Copies data from host to device.
90    fn copy_to_device(&self, dst: *mut u8, src: *const u8, size: usize);
91
92    /// Copies data from device to host.
93    fn copy_to_host(&self, dst: *mut u8, src: *const u8, size: usize);
94
95    /// Copies data within the device.
96    fn copy_device_to_device(&self, dst: *mut u8, src: *const u8, size: usize);
97
98    /// Synchronizes the device (waits for all operations to complete).
99    fn synchronize(&self);
100}
101
102// =============================================================================
103// GPU Memory Management
104// =============================================================================
105
106/// GPU memory handle for safe memory management.
107#[derive(Debug)]
108pub struct GpuMemory {
109    ptr: *mut u8,
110    size: usize,
111    device_index: usize,
112    backend_type: BackendType,
113}
114
115/// Type of backend for a GPU memory allocation.
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117pub enum BackendType {
118    /// CPU backend.
119    Cpu,
120    /// CUDA backend.
121    #[cfg(feature = "cuda")]
122    Cuda,
123    /// Vulkan backend.
124    #[cfg(feature = "vulkan")]
125    Vulkan,
126    /// Metal backend.
127    #[cfg(feature = "metal")]
128    Metal,
129    /// WebGPU backend.
130    #[cfg(feature = "wgpu")]
131    Wgpu,
132}
133
134impl GpuMemory {
135    /// Creates a new GPU memory handle.
136    pub fn new(ptr: *mut u8, size: usize, device_index: usize, backend_type: BackendType) -> Self {
137        Self {
138            ptr,
139            size,
140            device_index,
141            backend_type,
142        }
143    }
144
145    /// Returns the raw pointer.
146    #[must_use]
147    pub fn ptr(&self) -> *mut u8 {
148        self.ptr
149    }
150
151    /// Returns the size in bytes.
152    #[must_use]
153    pub fn size(&self) -> usize {
154        self.size
155    }
156
157    /// Returns the device index.
158    #[must_use]
159    pub fn device_index(&self) -> usize {
160        self.device_index
161    }
162
163    /// Returns the backend type.
164    #[must_use]
165    pub fn backend_type(&self) -> BackendType {
166        self.backend_type
167    }
168}
169
170// =============================================================================
171// GPU Stream/Queue Abstraction
172// =============================================================================
173
174/// GPU execution stream for async operations.
175#[derive(Debug)]
176pub struct GpuStream {
177    /// Stream handle (backend-specific).
178    handle: usize,
179    /// Device index.
180    device_index: usize,
181    /// Backend type.
182    backend_type: BackendType,
183}
184
185impl GpuStream {
186    /// Creates a new GPU stream.
187    #[must_use]
188    pub fn new(handle: usize, device_index: usize, backend_type: BackendType) -> Self {
189        Self {
190            handle,
191            device_index,
192            backend_type,
193        }
194    }
195
196    /// Returns the stream handle.
197    #[must_use]
198    pub fn handle(&self) -> usize {
199        self.handle
200    }
201
202    /// Returns the device index.
203    #[must_use]
204    pub fn device_index(&self) -> usize {
205        self.device_index
206    }
207
208    /// Synchronizes this stream (waits for all operations to complete).
209    ///
210    /// # Backend-specific behavior
211    /// - **CPU**: No-op (CPU operations are synchronous)
212    /// - **CUDA**: No-op at stream level; use `CudaBackend::synchronize()` for device sync
213    /// - **Vulkan**: Waits for queue to become idle
214    /// - **Metal**: Waits for command buffer completion
215    /// - **WebGPU**: Submits pending commands to queue
216    ///
217    /// For CUDA, proper synchronization should be done through `CudaBackend::synchronize()`
218    /// which performs device-level synchronization.
219    pub fn synchronize(&self) {
220        match self.backend_type {
221            BackendType::Cpu => {} // No-op for CPU (synchronous)
222            #[cfg(feature = "cuda")]
223            BackendType::Cuda => cuda::stream_synchronize(self.handle),
224            #[cfg(feature = "vulkan")]
225            BackendType::Vulkan => vulkan::queue_wait_idle(self.handle),
226            #[cfg(feature = "metal")]
227            BackendType::Metal => metal::command_buffer_wait(self.handle),
228            #[cfg(feature = "wgpu")]
229            BackendType::Wgpu => wgpu_backend::queue_submit(self.handle),
230        }
231    }
232}
233
234// =============================================================================
235// Device Selection Utilities
236// =============================================================================
237
238/// Returns the best available GPU backend.
239#[must_use]
240pub fn best_available_backend() -> BackendType {
241    #[cfg(feature = "cuda")]
242    if cuda::is_available() {
243        return BackendType::Cuda;
244    }
245
246    #[cfg(feature = "metal")]
247    if metal::is_available() {
248        return BackendType::Metal;
249    }
250
251    #[cfg(feature = "vulkan")]
252    if vulkan::is_available() {
253        return BackendType::Vulkan;
254    }
255
256    #[cfg(feature = "wgpu")]
257    if wgpu_backend::is_available() {
258        return BackendType::Wgpu;
259    }
260
261    BackendType::Cpu
262}
263
264/// Returns the number of available GPUs across all backends.
265#[must_use]
266pub fn gpu_count() -> usize {
267    #[allow(unused_mut)]
268    let mut count = 0_usize;
269
270    #[cfg(feature = "cuda")]
271    {
272        count += cuda::device_count();
273    }
274
275    #[cfg(feature = "vulkan")]
276    {
277        count += vulkan::device_count();
278    }
279
280    #[cfg(feature = "metal")]
281    {
282        count += metal::device_count();
283    }
284
285    #[cfg(feature = "wgpu")]
286    {
287        count += wgpu_backend::device_count();
288    }
289
290    count
291}
axonml_core/backends/mod.rs

axonml_core/backends/
mod.rs