Skip to main content

j2k_core/
accelerator.rs

1// SPDX-License-Identifier: Apache-2.0
2
3use core::mem::{size_of, size_of_val};
4use core::slice;
5
6use crate::backend::BackendKind;
7
8/// Residency of an accelerator-visible surface or buffer.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10#[non_exhaustive]
11pub enum SurfaceResidency {
12    /// Host memory owned by CPU code.
13    Host,
14    /// Pixels were produced directly by a CUDA decode path.
15    CudaResidentDecode,
16    /// Pixels were decoded on CPU and uploaded into CUDA memory.
17    CpuStagedCudaUpload,
18    /// Pixels were produced directly by a Metal decode path.
19    MetalResidentDecode,
20    /// Pixels were decoded on CPU and uploaded into Metal memory.
21    CpuStagedMetalUpload,
22    /// Device-local memory owned by a backend.
23    Device(BackendKind),
24    /// Host/device shared memory for the backend.
25    Shared(BackendKind),
26}
27
28impl SurfaceResidency {
29    /// Generic residency for a backend-produced surface.
30    #[must_use]
31    pub const fn for_backend(backend: BackendKind) -> Self {
32        match backend {
33            BackendKind::Cpu => Self::Host,
34            BackendKind::Metal => Self::Device(BackendKind::Metal),
35            BackendKind::Cuda => Self::Device(BackendKind::Cuda),
36        }
37    }
38}
39
40/// Execution counters reported by accelerator sessions and surfaces.
41#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
42pub struct ExecutionStats {
43    /// Number of submitted backend command buffers, streams, or equivalent jobs.
44    pub submissions: u64,
45    /// Number of kernel or shader dispatches.
46    pub kernel_dispatches: u64,
47    /// Bytes uploaded from host to device.
48    pub upload_bytes: u64,
49    /// Bytes read back from device to host.
50    pub readback_bytes: u64,
51    /// Backend-reported execution time in microseconds, when available.
52    pub device_us: u128,
53}
54
55impl ExecutionStats {
56    /// Construct empty execution statistics.
57    pub const fn new() -> Self {
58        Self {
59            submissions: 0,
60            kernel_dispatches: 0,
61            upload_bytes: 0,
62            readback_bytes: 0,
63            device_us: 0,
64        }
65    }
66
67    /// Saturating sum of two execution-stat blocks.
68    #[must_use]
69    pub const fn saturating_add(self, other: Self) -> Self {
70        Self {
71            submissions: self.submissions.saturating_add(other.submissions),
72            kernel_dispatches: self
73                .kernel_dispatches
74                .saturating_add(other.kernel_dispatches),
75            upload_bytes: self.upload_bytes.saturating_add(other.upload_bytes),
76            readback_bytes: self.readback_bytes.saturating_add(other.readback_bytes),
77            device_us: self.device_us.saturating_add(other.device_us),
78        }
79    }
80}
81
82/// Opaque byte range in accelerator-visible memory.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
84pub struct DeviceMemoryRange {
85    /// Backend that owns the range.
86    pub backend: BackendKind,
87    /// Backend-local allocation identifier. Backends define its meaning.
88    pub allocation: u64,
89    /// Byte offset inside the allocation.
90    pub offset: usize,
91    /// Byte length of the range.
92    pub len: usize,
93}
94
95impl DeviceMemoryRange {
96    /// Construct a backend-local memory range.
97    pub const fn new(backend: BackendKind, allocation: u64, offset: usize, len: usize) -> Self {
98        Self {
99            backend,
100            allocation,
101            offset,
102            len,
103        }
104    }
105}
106
107/// Shared session contract for caller-owned accelerator runtime state.
108pub trait AcceleratorSession {
109    /// Backend owned by this session.
110    fn backend_kind(&self) -> BackendKind;
111
112    /// Execution statistics accumulated by this session.
113    fn execution_stats(&self) -> ExecutionStats {
114        ExecutionStats::default()
115    }
116}
117
118/// Backend failure class used before mapping into codec-specific errors.
119#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
120pub enum BackendFailureKind {
121    /// Requested backend is not available on this host or build.
122    Unavailable,
123    /// Request shape is outside the backend implementation.
124    Unsupported,
125    /// Runtime/device/queue/context setup failed.
126    Runtime,
127    /// Kernel or shader execution failed.
128    Kernel,
129    /// Backend state lock or session state was poisoned.
130    StatePoisoned,
131    /// Backend rejected malformed caller input.
132    InvalidInput,
133    /// Backend allocation failed.
134    OutOfMemory,
135}
136
137/// Marker trait for host-side values whose memory layout is part of a GPU ABI.
138///
139/// # Safety
140/// Implementers must guarantee that `Self` has a stable host/shader layout for
141/// every backend that consumes it. In practice this means using `#[repr(C)]` or
142/// an equivalent explicit layout, avoiding padding-sensitive interpretation
143/// unless tests cover the exact byte layout, and keeping all fields plain data.
144pub unsafe trait GpuAbi: Copy + 'static {
145    /// Human-readable ABI name used in layout-test failures.
146    const NAME: &'static str;
147
148    /// View one value as bytes.
149    fn as_bytes(value: &Self) -> &[u8] {
150        // SAFETY: The trait contract requires `Self` to be plain GPU ABI data.
151        unsafe { slice::from_raw_parts(core::ptr::from_ref(value).cast::<u8>(), size_of::<Self>()) }
152    }
153
154    /// View a slice of values as bytes.
155    fn slice_as_bytes(values: &[Self]) -> &[u8] {
156        // SAFETY: The trait contract requires `Self` to be plain GPU ABI data.
157        unsafe { slice::from_raw_parts(values.as_ptr().cast::<u8>(), size_of_val(values)) }
158    }
159
160    /// Mutably view a slice of values as bytes.
161    fn slice_as_bytes_mut(values: &mut [Self]) -> &mut [u8] {
162        // SAFETY: The trait contract requires `Self` to be plain GPU ABI data.
163        unsafe { slice::from_raw_parts_mut(values.as_mut_ptr().cast::<u8>(), size_of_val(values)) }
164    }
165}
166
167macro_rules! impl_gpu_abi_primitive {
168    ($($ty:ty),* $(,)?) => {
169        $(
170            // SAFETY: Primitive numeric types are plain data with stable Rust layouts
171            // accepted by the shader ABI helpers as scalar values.
172            unsafe impl GpuAbi for $ty {
173                const NAME: &'static str = stringify!($ty);
174            }
175        )*
176    };
177}
178
179impl_gpu_abi_primitive!(u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
180
181// SAFETY: Arrays preserve element order and contain no extra non-element state;
182// the element type supplies the GPU ABI layout contract.
183unsafe impl<T, const N: usize> GpuAbi for [T; N]
184where
185    T: GpuAbi,
186{
187    const NAME: &'static str = "array";
188}