use core::mem::{size_of, size_of_val};
use core::slice;
use crate::backend::BackendKind;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum SurfaceResidency {
Host,
CudaResidentDecode,
CpuStagedCudaUpload,
MetalResidentDecode,
CpuStagedMetalUpload,
Device(BackendKind),
Shared(BackendKind),
}
impl SurfaceResidency {
#[must_use]
pub const fn for_backend(backend: BackendKind) -> Self {
match backend {
BackendKind::Cpu => Self::Host,
BackendKind::Metal => Self::Device(BackendKind::Metal),
BackendKind::Cuda => Self::Device(BackendKind::Cuda),
}
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ExecutionStats {
pub submissions: u64,
pub kernel_dispatches: u64,
pub upload_bytes: u64,
pub readback_bytes: u64,
pub device_us: u128,
}
impl ExecutionStats {
pub const fn new() -> Self {
Self {
submissions: 0,
kernel_dispatches: 0,
upload_bytes: 0,
readback_bytes: 0,
device_us: 0,
}
}
#[must_use]
pub const fn saturating_add(self, other: Self) -> Self {
Self {
submissions: self.submissions.saturating_add(other.submissions),
kernel_dispatches: self
.kernel_dispatches
.saturating_add(other.kernel_dispatches),
upload_bytes: self.upload_bytes.saturating_add(other.upload_bytes),
readback_bytes: self.readback_bytes.saturating_add(other.readback_bytes),
device_us: self.device_us.saturating_add(other.device_us),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct DeviceMemoryRange {
pub backend: BackendKind,
pub allocation: u64,
pub offset: usize,
pub len: usize,
}
impl DeviceMemoryRange {
pub const fn new(backend: BackendKind, allocation: u64, offset: usize, len: usize) -> Self {
Self {
backend,
allocation,
offset,
len,
}
}
}
pub trait AcceleratorSession {
fn backend_kind(&self) -> BackendKind;
fn execution_stats(&self) -> ExecutionStats {
ExecutionStats::default()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BackendFailureKind {
Unavailable,
Unsupported,
Runtime,
Kernel,
StatePoisoned,
InvalidInput,
OutOfMemory,
}
pub unsafe trait GpuAbi: Copy + 'static {
const NAME: &'static str;
fn as_bytes(value: &Self) -> &[u8] {
unsafe { slice::from_raw_parts(core::ptr::from_ref(value).cast::<u8>(), size_of::<Self>()) }
}
fn slice_as_bytes(values: &[Self]) -> &[u8] {
unsafe { slice::from_raw_parts(values.as_ptr().cast::<u8>(), size_of_val(values)) }
}
fn slice_as_bytes_mut(values: &mut [Self]) -> &mut [u8] {
unsafe { slice::from_raw_parts_mut(values.as_mut_ptr().cast::<u8>(), size_of_val(values)) }
}
}
macro_rules! impl_gpu_abi_primitive {
($($ty:ty),* $(,)?) => {
$(
unsafe impl GpuAbi for $ty {
const NAME: &'static str = stringify!($ty);
}
)*
};
}
impl_gpu_abi_primitive!(u8, i8, u16, i16, u32, i32, u64, i64, f32, f64);
unsafe impl<T, const N: usize> GpuAbi for [T; N]
where
T: GpuAbi,
{
const NAME: &'static str = "array";
}