use std::ffi::c_void;
use oxicuda_driver::ffi::{
CUDA_ARRAY_DESCRIPTOR, CUDA_ARRAY3D_DESCRIPTOR, CUDA_RESOURCE_DESC, CUDA_RESOURCE_VIEW_DESC,
CUDA_TEXTURE_DESC, CUaddress_mode, CUarray, CUarray_format, CUfilter_mode, CUmipmappedArray,
CUresourceViewFormat, CUresourcetype, CUsurfObject, CUtexObject, CudaResourceDescArray,
CudaResourceDescLinear, CudaResourceDescMipmap, CudaResourceDescPitch2d, CudaResourceDescRes,
};
use oxicuda_driver::loader::try_driver;
use oxicuda_driver::{
CU_TRSF_NORMALIZED_COORDINATES, CU_TRSF_READ_AS_INTEGER, CU_TRSF_SRGB, CUDA_ARRAY3D_CUBEMAP,
CUDA_ARRAY3D_LAYERED, CUDA_ARRAY3D_SURFACE_LDST, CUDA_ARRAY3D_TEXTURE_GATHER,
};
use crate::error::{CudaRtError, CudaRtResult};
use crate::memory::DevicePtr;
use crate::stream::CudaStream;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ArrayFormat {
UnsignedInt8,
UnsignedInt16,
UnsignedInt32,
SignedInt8,
SignedInt16,
SignedInt32,
Half,
Float,
}
impl ArrayFormat {
#[must_use]
pub const fn as_cu_format(self) -> CUarray_format {
match self {
Self::UnsignedInt8 => CUarray_format::UnsignedInt8,
Self::UnsignedInt16 => CUarray_format::UnsignedInt16,
Self::UnsignedInt32 => CUarray_format::UnsignedInt32,
Self::SignedInt8 => CUarray_format::SignedInt8,
Self::SignedInt16 => CUarray_format::SignedInt16,
Self::SignedInt32 => CUarray_format::SignedInt32,
Self::Half => CUarray_format::Half,
Self::Float => CUarray_format::Float,
}
}
#[must_use]
pub const fn bytes_per_channel(self) -> usize {
match self {
Self::UnsignedInt8 | Self::SignedInt8 => 1,
Self::UnsignedInt16 | Self::SignedInt16 | Self::Half => 2,
Self::UnsignedInt32 | Self::SignedInt32 | Self::Float => 4,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum AddressMode {
Wrap,
Clamp,
Mirror,
Border,
}
impl AddressMode {
#[must_use]
const fn as_cu(self) -> CUaddress_mode {
match self {
Self::Wrap => CUaddress_mode::Wrap,
Self::Clamp => CUaddress_mode::Clamp,
Self::Mirror => CUaddress_mode::Mirror,
Self::Border => CUaddress_mode::Border,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum FilterMode {
Point,
Linear,
}
impl FilterMode {
#[must_use]
const fn as_cu(self) -> CUfilter_mode {
match self {
Self::Point => CUfilter_mode::Point,
Self::Linear => CUfilter_mode::Linear,
}
}
}
pub struct CudaArray {
handle: CUarray,
width: usize,
height: usize,
format: ArrayFormat,
num_channels: u32,
}
impl CudaArray {
pub fn create_1d(width: usize, format: ArrayFormat, num_channels: u32) -> CudaRtResult<Self> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let create_fn = api.cu_array_create_v2.ok_or(CudaRtError::NotSupported)?;
let desc = CUDA_ARRAY_DESCRIPTOR {
width,
height: 0,
format: format.as_cu_format(),
num_channels,
};
let mut handle = CUarray::default();
let rc = unsafe { create_fn(&raw mut handle, &desc) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::MemoryAllocation));
}
Ok(Self {
handle,
width,
height: 0,
format,
num_channels,
})
}
pub fn create_2d(
width: usize,
height: usize,
format: ArrayFormat,
num_channels: u32,
) -> CudaRtResult<Self> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let create_fn = api.cu_array_create_v2.ok_or(CudaRtError::NotSupported)?;
let desc = CUDA_ARRAY_DESCRIPTOR {
width,
height,
format: format.as_cu_format(),
num_channels,
};
let mut handle = CUarray::default();
let rc = unsafe { create_fn(&raw mut handle, &desc) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::MemoryAllocation));
}
Ok(Self {
handle,
width,
height,
format,
num_channels,
})
}
pub unsafe fn copy_from_host_raw(
&self,
src: *const c_void,
byte_count: usize,
) -> CudaRtResult<()> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let f = api.cu_memcpy_htoa_v2.ok_or(CudaRtError::NotSupported)?;
let rc = unsafe { f(self.handle, 0, src, byte_count) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidMemcpyDirection));
}
Ok(())
}
pub fn copy_from_host<T: Copy>(&self, src: &[T]) -> CudaRtResult<()> {
unsafe {
self.copy_from_host_raw(src.as_ptr().cast::<c_void>(), std::mem::size_of_val(src))
}
}
pub unsafe fn copy_to_host_raw(&self, dst: *mut c_void, byte_count: usize) -> CudaRtResult<()> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let f = api.cu_memcpy_atoh_v2.ok_or(CudaRtError::NotSupported)?;
let rc = unsafe { f(dst, self.handle, 0, byte_count) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidMemcpyDirection));
}
Ok(())
}
pub fn copy_to_host<T: Copy>(&self, dst: &mut [T]) -> CudaRtResult<()> {
unsafe {
self.copy_to_host_raw(
dst.as_mut_ptr().cast::<c_void>(),
std::mem::size_of_val(dst),
)
}
}
pub unsafe fn copy_from_host_async_raw(
&self,
src: *const c_void,
byte_count: usize,
stream: CudaStream,
) -> CudaRtResult<()> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let f = api
.cu_memcpy_htoa_async_v2
.ok_or(CudaRtError::NotSupported)?;
let rc = unsafe { f(self.handle, 0, src, byte_count, stream.raw()) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidMemcpyDirection));
}
Ok(())
}
#[must_use]
pub fn raw(&self) -> CUarray {
self.handle
}
#[must_use]
pub const fn width(&self) -> usize {
self.width
}
#[must_use]
pub const fn height(&self) -> usize {
self.height
}
#[must_use]
pub const fn format(&self) -> ArrayFormat {
self.format
}
#[must_use]
pub const fn num_channels(&self) -> u32 {
self.num_channels
}
}
impl Drop for CudaArray {
fn drop(&mut self) {
if let Ok(api) = try_driver() {
if let Some(f) = api.cu_array_destroy {
unsafe { f(self.handle) };
}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct Array3DFlags(pub u32);
impl Array3DFlags {
pub const DEFAULT: Self = Self(0);
pub const LAYERED: Self = Self(CUDA_ARRAY3D_LAYERED);
pub const SURFACE_LDST: Self = Self(CUDA_ARRAY3D_SURFACE_LDST);
pub const CUBEMAP: Self = Self(CUDA_ARRAY3D_CUBEMAP);
pub const TEXTURE_GATHER: Self = Self(CUDA_ARRAY3D_TEXTURE_GATHER);
#[must_use]
pub const fn or(self, other: Self) -> Self {
Self(self.0 | other.0)
}
}
pub struct CudaArray3D {
handle: CUarray,
width: usize,
height: usize,
depth: usize,
format: ArrayFormat,
num_channels: u32,
flags: Array3DFlags,
}
impl CudaArray3D {
pub fn create(
width: usize,
height: usize,
depth: usize,
format: ArrayFormat,
num_channels: u32,
flags: Array3DFlags,
) -> CudaRtResult<Self> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let create_fn = api.cu_array3d_create_v2.ok_or(CudaRtError::NotSupported)?;
let desc = CUDA_ARRAY3D_DESCRIPTOR {
width,
height,
depth,
format: format.as_cu_format(),
num_channels,
flags: flags.0,
};
let mut handle = CUarray::default();
let rc = unsafe { create_fn(&raw mut handle, &desc) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::MemoryAllocation));
}
Ok(Self {
handle,
width,
height,
depth,
format,
num_channels,
flags,
})
}
#[must_use]
pub fn raw(&self) -> CUarray {
self.handle
}
#[must_use]
pub const fn width(&self) -> usize {
self.width
}
#[must_use]
pub const fn height(&self) -> usize {
self.height
}
#[must_use]
pub const fn depth(&self) -> usize {
self.depth
}
#[must_use]
pub const fn format(&self) -> ArrayFormat {
self.format
}
#[must_use]
pub const fn num_channels(&self) -> u32 {
self.num_channels
}
#[must_use]
pub const fn flags(&self) -> Array3DFlags {
self.flags
}
}
impl Drop for CudaArray3D {
fn drop(&mut self) {
if let Ok(api) = try_driver() {
if let Some(f) = api.cu_array_destroy {
unsafe { f(self.handle) };
}
}
}
}
#[derive(Clone, Copy)]
pub enum ResourceDesc {
Array {
handle: CUarray,
},
MipmappedArray {
handle: CUmipmappedArray,
},
Linear {
dev_ptr: DevicePtr,
format: ArrayFormat,
num_channels: u32,
size_in_bytes: usize,
},
Pitch2d {
dev_ptr: DevicePtr,
format: ArrayFormat,
num_channels: u32,
width_in_elements: usize,
height: usize,
pitch_in_bytes: usize,
},
}
impl ResourceDesc {
#[must_use]
pub fn as_raw(&self) -> CUDA_RESOURCE_DESC {
match *self {
Self::Array { handle } => CUDA_RESOURCE_DESC {
res_type: CUresourcetype::Array,
res: CudaResourceDescRes {
array: CudaResourceDescArray { h_array: handle },
},
flags: 0,
},
Self::MipmappedArray { handle } => CUDA_RESOURCE_DESC {
res_type: CUresourcetype::MipmappedArray,
res: CudaResourceDescRes {
mipmap: CudaResourceDescMipmap {
h_mipmapped_array: handle,
},
},
flags: 0,
},
Self::Linear {
dev_ptr,
format,
num_channels,
size_in_bytes,
} => CUDA_RESOURCE_DESC {
res_type: CUresourcetype::Linear,
res: CudaResourceDescRes {
linear: CudaResourceDescLinear {
dev_ptr: dev_ptr.0,
format: format.as_cu_format(),
num_channels,
size_in_bytes,
},
},
flags: 0,
},
Self::Pitch2d {
dev_ptr,
format,
num_channels,
width_in_elements,
height,
pitch_in_bytes,
} => CUDA_RESOURCE_DESC {
res_type: CUresourcetype::Pitch2d,
res: CudaResourceDescRes {
pitch2d: CudaResourceDescPitch2d {
dev_ptr: dev_ptr.0,
format: format.as_cu_format(),
num_channels,
width_in_elements,
height,
pitch_in_bytes,
},
},
flags: 0,
},
}
}
}
#[derive(Clone, Copy)]
pub struct TextureDesc {
pub address_u: AddressMode,
pub address_v: AddressMode,
pub address_w: AddressMode,
pub filter_mode: FilterMode,
pub normalized_coords: bool,
pub read_as_integer: bool,
pub srgb: bool,
pub max_anisotropy: u32,
pub mipmap_filter: FilterMode,
pub mipmap_bias: f32,
pub min_lod: f32,
pub max_lod: f32,
pub border_color: [f32; 4],
}
impl TextureDesc {
#[must_use]
pub const fn default_2d() -> Self {
Self {
address_u: AddressMode::Clamp,
address_v: AddressMode::Clamp,
address_w: AddressMode::Clamp,
filter_mode: FilterMode::Point,
normalized_coords: true,
read_as_integer: false,
srgb: false,
max_anisotropy: 1,
mipmap_filter: FilterMode::Point,
mipmap_bias: 0.0,
min_lod: 0.0,
max_lod: 0.0,
border_color: [0.0; 4],
}
}
#[must_use]
pub fn as_raw(&self) -> CUDA_TEXTURE_DESC {
let mut flags: u32 = 0;
if self.normalized_coords {
flags |= CU_TRSF_NORMALIZED_COORDINATES;
}
if self.read_as_integer {
flags |= CU_TRSF_READ_AS_INTEGER;
}
if self.srgb {
flags |= CU_TRSF_SRGB;
}
CUDA_TEXTURE_DESC {
address_mode: [
self.address_u.as_cu(),
self.address_v.as_cu(),
self.address_w.as_cu(),
],
filter_mode: self.filter_mode.as_cu(),
flags,
max_anisotropy: self.max_anisotropy,
mipmap_filter_mode: self.mipmap_filter.as_cu(),
mipmap_level_bias: self.mipmap_bias,
min_mipmap_level_clamp: self.min_lod,
max_mipmap_level_clamp: self.max_lod,
border_color: self.border_color,
reserved: [0i32; 12],
}
}
}
#[derive(Clone, Copy)]
pub struct ResourceViewDesc {
pub format: CUresourceViewFormat,
pub width: usize,
pub height: usize,
pub depth: usize,
pub first_mip_level: u32,
pub last_mip_level: u32,
pub first_layer: u32,
pub last_layer: u32,
}
impl ResourceViewDesc {
#[must_use]
pub fn as_raw(&self) -> CUDA_RESOURCE_VIEW_DESC {
CUDA_RESOURCE_VIEW_DESC {
format: self.format,
width: self.width,
height: self.height,
depth: self.depth,
first_mipmap_level: self.first_mip_level,
last_mipmap_level: self.last_mip_level,
first_layer: self.first_layer,
last_layer: self.last_layer,
reserved: [0u32; 16],
}
}
}
pub struct CudaTextureObject {
handle: CUtexObject,
}
impl CudaTextureObject {
pub fn create(
resource: &ResourceDesc,
texture: &TextureDesc,
view: Option<&ResourceViewDesc>,
) -> CudaRtResult<Self> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let create_fn = api.cu_tex_object_create.ok_or(CudaRtError::NotSupported)?;
let raw_res = resource.as_raw();
let raw_tex = texture.as_raw();
let (raw_view_ptr, _raw_view_storage);
if let Some(v) = view {
_raw_view_storage = v.as_raw();
raw_view_ptr = &_raw_view_storage as *const CUDA_RESOURCE_VIEW_DESC;
} else {
_raw_view_storage = unsafe { std::mem::zeroed() };
raw_view_ptr = std::ptr::null();
}
let mut handle = CUtexObject::default();
let rc = unsafe { create_fn(&raw mut handle, &raw_res, &raw_tex, raw_view_ptr) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidValue));
}
Ok(Self { handle })
}
#[must_use]
pub fn raw(&self) -> CUtexObject {
self.handle
}
}
impl Drop for CudaTextureObject {
fn drop(&mut self) {
if let Ok(api) = try_driver() {
if let Some(f) = api.cu_tex_object_destroy {
unsafe { f(self.handle) };
}
}
}
}
pub struct CudaSurfaceObject {
handle: CUsurfObject,
}
impl CudaSurfaceObject {
pub fn create(resource: &ResourceDesc) -> CudaRtResult<Self> {
let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
let create_fn = api.cu_surf_object_create.ok_or(CudaRtError::NotSupported)?;
let raw_res = resource.as_raw();
let mut handle = CUsurfObject::default();
let rc = unsafe { create_fn(&raw mut handle, &raw_res) };
if rc != 0 {
return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidValue));
}
Ok(Self { handle })
}
#[must_use]
pub fn raw(&self) -> CUsurfObject {
self.handle
}
}
impl Drop for CudaSurfaceObject {
fn drop(&mut self) {
if let Ok(api) = try_driver() {
if let Some(f) = api.cu_surf_object_destroy {
unsafe { f(self.handle) };
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn array_format_byte_widths() {
assert_eq!(ArrayFormat::UnsignedInt8.bytes_per_channel(), 1);
assert_eq!(ArrayFormat::UnsignedInt16.bytes_per_channel(), 2);
assert_eq!(ArrayFormat::Half.bytes_per_channel(), 2);
assert_eq!(ArrayFormat::Float.bytes_per_channel(), 4);
assert_eq!(ArrayFormat::SignedInt32.bytes_per_channel(), 4);
}
#[test]
fn array_format_cu_round_trip() {
let fmt = ArrayFormat::Float;
assert!(matches!(fmt.as_cu_format(), CUarray_format::Float));
let fmt_int = ArrayFormat::SignedInt8;
assert!(matches!(fmt_int.as_cu_format(), CUarray_format::SignedInt8));
}
#[test]
fn texture_desc_default_flags() {
let desc = TextureDesc::default_2d();
let raw = desc.as_raw();
assert!(raw.flags & CU_TRSF_NORMALIZED_COORDINATES != 0);
assert!(raw.flags & CU_TRSF_READ_AS_INTEGER == 0);
assert!(raw.flags & CU_TRSF_SRGB == 0);
assert!(matches!(raw.filter_mode, CUfilter_mode::Point));
assert!(matches!(raw.address_mode[0], CUaddress_mode::Clamp));
assert!(matches!(raw.address_mode[1], CUaddress_mode::Clamp));
assert!(matches!(raw.address_mode[2], CUaddress_mode::Clamp));
}
#[test]
fn resource_desc_array_round_trip() {
let handle = CUarray::default();
let rd = ResourceDesc::Array { handle };
let raw = rd.as_raw();
assert!(matches!(raw.res_type, CUresourcetype::Array));
let arr = unsafe { raw.res.array };
assert!(arr.h_array.is_null()); }
#[test]
fn resource_desc_linear_round_trip() {
let rd = ResourceDesc::Linear {
dev_ptr: DevicePtr(0x1000),
format: ArrayFormat::Float,
num_channels: 4,
size_in_bytes: 1024,
};
let raw = rd.as_raw();
assert!(matches!(raw.res_type, CUresourcetype::Linear));
let lin = unsafe { raw.res.linear };
assert_eq!(lin.dev_ptr, 0x1000);
assert_eq!(lin.num_channels, 4);
assert_eq!(lin.size_in_bytes, 1024);
assert!(matches!(lin.format, CUarray_format::Float));
}
#[test]
fn cuda_array_create_no_gpu() {
match CudaArray::create_2d(64, 64, ArrayFormat::Float, 4) {
Ok(_) => { }
Err(CudaRtError::DriverNotAvailable)
| Err(CudaRtError::NotSupported)
| Err(CudaRtError::NoGpu)
| Err(CudaRtError::InitializationError)
| Err(CudaRtError::InvalidDevice)
| Err(CudaRtError::DeviceUninitialized) => { }
Err(e) => panic!("unexpected error: {e}"),
}
}
#[test]
fn cuda_texture_object_create_no_gpu() {
let handle = CUarray::default();
let res = ResourceDesc::Array { handle };
let tex = TextureDesc::default_2d();
match CudaTextureObject::create(&res, &tex, None) {
Ok(_) => {}
Err(CudaRtError::DriverNotAvailable)
| Err(CudaRtError::NotSupported)
| Err(CudaRtError::NoGpu)
| Err(CudaRtError::InitializationError)
| Err(CudaRtError::InvalidDevice)
| Err(CudaRtError::InvalidValue)
| Err(CudaRtError::DeviceUninitialized) => {}
Err(e) => panic!("unexpected error: {e}"),
}
}
#[test]
fn cuda_surface_object_create_no_gpu() {
let handle = CUarray::default();
let res = ResourceDesc::Array { handle };
match CudaSurfaceObject::create(&res) {
Ok(_) => {}
Err(CudaRtError::DriverNotAvailable)
| Err(CudaRtError::NotSupported)
| Err(CudaRtError::NoGpu)
| Err(CudaRtError::InitializationError)
| Err(CudaRtError::InvalidDevice)
| Err(CudaRtError::InvalidValue)
| Err(CudaRtError::DeviceUninitialized) => {}
Err(e) => panic!("unexpected error: {e}"),
}
}
#[test]
fn array_3d_flags_combine() {
let flags = Array3DFlags::LAYERED.or(Array3DFlags::SURFACE_LDST);
assert_eq!(flags.0, CUDA_ARRAY3D_LAYERED | CUDA_ARRAY3D_SURFACE_LDST);
}
#[test]
fn address_mode_variants_compile() {
let _ = AddressMode::Wrap.as_cu();
let _ = AddressMode::Clamp.as_cu();
let _ = AddressMode::Mirror.as_cu();
let _ = AddressMode::Border.as_cu();
}
}