j2k_core/accelerator.rs
1// SPDX-License-Identifier: Apache-2.0
2
3use core::mem::{size_of, size_of_val};
4use core::slice;
5
6use crate::backend::BackendKind;
7
8/// Residency of an accelerator-visible surface or buffer.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10#[non_exhaustive]
11pub enum SurfaceResidency {
12 /// Host memory owned by CPU code.
13 Host,
14 /// Pixels were produced directly by a CUDA decode path.
15 CudaResidentDecode,
16 /// Pixels were decoded on CPU and uploaded into CUDA memory.
17 CpuStagedCudaUpload,
18 /// Pixels were produced directly by a Metal decode path.
19 MetalResidentDecode,
20 /// Pixels were decoded on CPU and uploaded into Metal memory.
21 CpuStagedMetalUpload,
22 /// Device-local memory owned by a backend.
23 Device(BackendKind),
24 /// Host/device shared memory for the backend.
25 Shared(BackendKind),
26}
27
28impl SurfaceResidency {
29 /// Generic residency for a backend-produced surface.
30 #[must_use]
31 pub const fn for_backend(backend: BackendKind) -> Self {
32 match backend {
33 BackendKind::Cpu => Self::Host,
34 BackendKind::Metal => Self::Device(BackendKind::Metal),
35 BackendKind::Cuda => Self::Device(BackendKind::Cuda),
36 }
37 }
38}
39
40/// Execution counters reported by accelerator sessions and surfaces.
41#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
42pub struct ExecutionStats {
43 /// Number of submitted backend command buffers, streams, or equivalent jobs.
44 pub submissions: u64,
45 /// Number of kernel or shader dispatches.
46 pub kernel_dispatches: u64,
47 /// Bytes uploaded from host to device.
48 pub upload_bytes: u64,
49 /// Bytes read back from device to host.
50 pub readback_bytes: u64,
51 /// Backend-reported execution time in microseconds, when available.
52 pub device_us: u128,
53}
54
55impl ExecutionStats {
56 /// Construct empty execution statistics.
57 pub const fn new() -> Self {
58 Self {
59 submissions: 0,
60 kernel_dispatches: 0,
61 upload_bytes: 0,
62 readback_bytes: 0,
63 device_us: 0,
64 }
65 }
66
67 /// Saturating sum of two execution-stat blocks.
68 #[must_use]
69 pub const fn saturating_add(self, other: Self) -> Self {
70 Self {
71 submissions: self.submissions.saturating_add(other.submissions),
72 kernel_dispatches: self
73 .kernel_dispatches
74 .saturating_add(other.kernel_dispatches),
75 upload_bytes: self.upload_bytes.saturating_add(other.upload_bytes),
76 readback_bytes: self.readback_bytes.saturating_add(other.readback_bytes),
77 device_us: self.device_us.saturating_add(other.device_us),
78 }
79 }
80}
81
82/// Opaque byte range in accelerator-visible memory.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
84pub struct DeviceMemoryRange {
85 /// Backend that owns the range.
86 pub backend: BackendKind,
87 /// Backend-local allocation identifier. Backends define its meaning.
88 pub allocation: u64,
89 /// Byte offset inside the allocation.
90 pub offset: usize,
91 /// Byte length of the range.
92 pub len: usize,
93}
94
95impl DeviceMemoryRange {
96 /// Construct a backend-local memory range.
97 pub const fn new(backend: BackendKind, allocation: u64, offset: usize, len: usize) -> Self {
98 Self {
99 backend,
100 allocation,
101 offset,
102 len,
103 }
104 }
105}
106
107/// Shared session contract for caller-owned accelerator runtime state.
108pub trait AcceleratorSession {
109 /// Backend owned by this session.
110 fn backend_kind(&self) -> BackendKind;
111
112 /// Execution statistics accumulated by this session.
113 fn execution_stats(&self) -> ExecutionStats {
114 ExecutionStats::default()
115 }
116}
117
118/// Backend failure class used before mapping into codec-specific errors.
119#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
120pub enum BackendFailureKind {
121 /// Requested backend is not available on this host or build.
122 Unavailable,
123 /// Request shape is outside the backend implementation.
124 Unsupported,
125 /// Runtime/device/queue/context setup failed.
126 Runtime,
127 /// Kernel or shader execution failed.
128 Kernel,
129 /// Backend state lock or session state was poisoned.
130 StatePoisoned,
131 /// Backend rejected malformed caller input.
132 InvalidInput,
133 /// Backend allocation failed.
134 OutOfMemory,
135}
136
137/// Marker trait for host-side values whose memory layout is part of a GPU ABI.
138///
139/// # Safety
140/// Implementers must guarantee that `Self` has a stable host/shader layout for
141/// every backend that consumes it. In practice this means using `#[repr(C)]` or
142/// an equivalent explicit layout, avoiding padding-sensitive interpretation
143/// unless tests cover the exact byte layout, and keeping all fields plain data.
144pub unsafe trait GpuAbi: Copy + 'static {
145 /// Human-readable ABI name used in layout-test failures.
146 const NAME: &'static str;
147
148 /// View one value as bytes.
149 fn as_bytes(value: &Self) -> &[u8] {
150 // SAFETY: The trait contract requires `Self` to be plain GPU ABI data.
151 unsafe { slice::from_raw_parts(core::ptr::from_ref(value).cast::<u8>(), size_of::<Self>()) }
152 }
153
154 /// View a slice of values as bytes.
155 fn slice_as_bytes(values: &[Self]) -> &[u8] {
156 // SAFETY: The trait contract requires `Self` to be plain GPU ABI data.
157 unsafe { slice::from_raw_parts(values.as_ptr().cast::<u8>(), size_of_val(values)) }
158 }
159
160 /// Mutably view a slice of values as bytes.
161 fn slice_as_bytes_mut(values: &mut [Self]) -> &mut [u8] {
162 // SAFETY: The trait contract requires `Self` to be plain GPU ABI data.
163 unsafe { slice::from_raw_parts_mut(values.as_mut_ptr().cast::<u8>(), size_of_val(values)) }
164 }
165}
166
167macro_rules! impl_gpu_abi_primitive {
168 ($($ty:ty),* $(,)?) => {
169 $(
170 // SAFETY: Primitive numeric types are plain data with stable Rust layouts
171 // accepted by the shader ABI helpers as scalar values.
172 unsafe impl GpuAbi for $ty {
173 const NAME: &'static str = stringify!($ty);
174 }
175 )*
176 };
177}
178
179impl_gpu_abi_primitive!(u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
180
181// SAFETY: Arrays preserve element order and contain no extra non-element state;
182// the element type supplies the GPU ABI layout contract.
183unsafe impl<T, const N: usize> GpuAbi for [T; N]
184where
185 T: GpuAbi,
186{
187 const NAME: &'static str = "array";
188}