atomr_accel/backend.rs
1//! Backend identity traits.
2//!
3//! A `atomr-accel` backend (CUDA, ROCm, Metal, oneAPI, Vulkan compute,
4//! …) is a coherent triple of *device handle*, *stream / queue*, and
5//! *event / fence* types. These traits name them so portable code
6//! can be parameterized over `B: AccelBackend` without committing to
7//! a vendor SDK.
8//!
9//! Backends still expose richer concrete types directly — the
10//! cuBLAS / cuDNN / cuFFT actors live in the `atomr-accel-cuda`
11//! crate and are not part of this trait surface. The traits here
12//! capture only the shape that every backend has to provide.
13
14use std::fmt::Debug;
15use std::sync::Arc;
16
17use crate::error::AccelError;
18
19/// Marker trait identifying a compute-acceleration backend.
20///
21/// Implemented by the `Backend` zero-sized type in each backend
22/// crate. Use as a trait bound:
23///
24/// ```ignore
25/// fn observe_load<B: AccelBackend>(d: &B::Device) -> u32 { ... }
26/// ```
27///
28/// The associated types name the lifetime-bounded handles a
29/// backend hands out. They're all `Send + Sync + 'static` so they
30/// can travel across actor boundaries.
31pub trait AccelBackend: Send + Sync + 'static {
32 /// Display name, e.g. `"cuda"`, `"rocm"`, `"metal"`.
33 const NAME: &'static str;
34
35 /// Device handle (e.g. `cudarc::driver::CudaContext`,
36 /// `hipDevice_t`, `MTLDevice`).
37 type Device: AccelDevice<Backend = Self>;
38
39 /// Per-actor stream / queue handle (e.g.
40 /// `cudarc::driver::CudaStream`, `hipStream_t`,
41 /// `MTLCommandQueue`).
42 type Stream: AccelStream<Backend = Self>;
43
44 /// Recordable / waitable synchronization primitive (e.g.
45 /// `cudaEvent_t`, `hipEvent_t`, `MTLEvent`).
46 type Event: Debug + Send + Sync + 'static;
47
48 /// Backend-specific error variants supplement the core
49 /// [`AccelError`] enum via the `LibraryError { lib, msg }`
50 /// catch-all. Backends that need finer granularity wrap
51 /// `AccelError` in their own type; the core itself is
52 /// `#[non_exhaustive]` so adding variants is a minor bump,
53 /// not a breaking change.
54 type Error: std::error::Error + Send + Sync + From<AccelError> + 'static;
55}
56
57/// Device-handle contract: identification + a hook to observe
58/// generation rebuilds (sticky-error recovery).
59pub trait AccelDevice: Send + Sync + 'static {
60 type Backend: AccelBackend;
61
62 /// Stable, opaque device id. CUDA returns the ordinal; ROCm
63 /// returns the hipDevice_t; Metal returns a hashed
64 /// `MTLDevice.registryID`.
65 fn device_id(&self) -> u32;
66
67 /// Current generation counter. Bumped every time the underlying
68 /// device context is torn down + rebuilt (e.g. cuda sticky-error
69 /// recovery). `AccelRef`s minted against an older generation
70 /// fail their next `access()`.
71 fn generation(&self) -> u64;
72}
73
74/// Stream / queue contract: ordered submission of work, plus the
75/// ability to record an event for cross-stream synchronization.
76pub trait AccelStream: Send + Sync + 'static {
77 type Backend: AccelBackend;
78
79 /// Record an event into this stream. Other streams can wait on
80 /// the resulting handle via [`AccelStream::wait_event`].
81 fn record_event(&self) -> Result<<Self::Backend as AccelBackend>::Event, AccelError>;
82
83 /// Wait on an event recorded into another stream before
84 /// scheduling further work on this one. Backends without
85 /// cross-queue events synthesize a host-side block.
86 fn wait_event(&self, event: &<Self::Backend as AccelBackend>::Event) -> Result<(), AccelError>;
87}
88
89/// Convenience type alias for a shared device handle. Every backend
90/// hands out devices through `Arc<B::Device>` so they survive context
91/// rebuilds without invalidating the outer `ActorRef`.
92pub type Device<B> = Arc<<B as AccelBackend>::Device>;
93
94/// Convenience type alias for a shared stream handle.
95pub type Stream<B> = Arc<<B as AccelBackend>::Stream>;