use crate::context::CurrentContext;
use crate::device::DeviceAttribute;
use crate::error::*;
use crate::sys::cuMemcpy2D_v2;
use crate::sys::cuMemcpyAtoH_v2;
use crate::sys::cuMemcpyHtoA_v2;
use crate::sys::CUDA_MEMCPY2D;
use crate::sys::{self as cuda, CUarray, CUarray_format, CUarray_format_enum};
use std::ffi::c_void;
use std::mem;
use std::mem::zeroed;
use std::mem::ManuallyDrop;
use std::mem::MaybeUninit;
use std::os::raw::c_uint;
use std::ptr::null;
use std::ptr::null_mut;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ArrayFormat {
U8,
U16,
U32,
I8,
I16,
I32,
F32,
F64,
}
impl ArrayFormat {
pub fn mem_size(&self) -> usize {
use ArrayFormat::*;
match self {
U8 | I8 => 1,
U16 | I16 => 2,
U32 | I32 | F32 => 4,
F64 => 8,
}
}
}
mod private {
pub trait Sealed {}
}
pub trait ArrayPrimitive: private::Sealed + Copy + Default {
fn array_format() -> ArrayFormat;
}
impl private::Sealed for u8 {}
impl private::Sealed for u16 {}
impl private::Sealed for u32 {}
impl private::Sealed for i8 {}
impl private::Sealed for i16 {}
impl private::Sealed for i32 {}
impl private::Sealed for f32 {}
impl private::Sealed for f64 {}
impl ArrayPrimitive for u8 {
fn array_format() -> ArrayFormat {
ArrayFormat::U8
}
}
impl ArrayPrimitive for u16 {
fn array_format() -> ArrayFormat {
ArrayFormat::U16
}
}
impl ArrayPrimitive for u32 {
fn array_format() -> ArrayFormat {
ArrayFormat::U32
}
}
impl ArrayPrimitive for i8 {
fn array_format() -> ArrayFormat {
ArrayFormat::I8
}
}
impl ArrayPrimitive for i16 {
fn array_format() -> ArrayFormat {
ArrayFormat::I16
}
}
impl ArrayPrimitive for i32 {
fn array_format() -> ArrayFormat {
ArrayFormat::I32
}
}
impl ArrayPrimitive for f32 {
fn array_format() -> ArrayFormat {
ArrayFormat::F32
}
}
impl ArrayPrimitive for f64 {
fn array_format() -> ArrayFormat {
ArrayFormat::F64
}
}
impl ArrayFormat {
pub fn from_raw(raw: CUarray_format) -> Self {
match raw {
CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT8 => ArrayFormat::U8,
CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT16 => ArrayFormat::U16,
CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT32 => ArrayFormat::U32,
CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT8 => ArrayFormat::I8,
CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT16 => ArrayFormat::I16,
CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT32 => ArrayFormat::I32,
CUarray_format_enum::CU_AD_FORMAT_HALF => ArrayFormat::F32,
CUarray_format_enum::CU_AD_FORMAT_FLOAT => ArrayFormat::F64,
CUarray_format_enum::CU_AD_FORMAT_NV12 => panic!("nv12 is not supported yet"),
}
}
pub fn to_raw(self) -> CUarray_format {
match self {
ArrayFormat::U8 => CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT8,
ArrayFormat::U16 => CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT16,
ArrayFormat::U32 => CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT32,
ArrayFormat::I8 => CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT8,
ArrayFormat::I16 => CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT16,
ArrayFormat::I32 => CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT32,
ArrayFormat::F32 => CUarray_format_enum::CU_AD_FORMAT_HALF,
ArrayFormat::F64 => CUarray_format_enum::CU_AD_FORMAT_FLOAT,
}
}
}
bitflags::bitflags! {
#[derive(Default)]
pub struct ArrayObjectFlags: c_uint {
const LAYERED = cuda::CUDA_ARRAY3D_LAYERED;
const SURFACE_LDST = cuda::CUDA_ARRAY3D_SURFACE_LDST;
const CUBEMAP = cuda::CUDA_ARRAY3D_CUBEMAP;
const TEXTURE_GATHER = cuda::CUDA_ARRAY3D_TEXTURE_GATHER;
}
}
impl ArrayObjectFlags {
pub fn new() -> Self {
Self::default()
}
}
#[derive(Clone, Copy, Debug)]
pub struct ArrayDescriptor {
desc: cuda::CUDA_ARRAY3D_DESCRIPTOR,
}
impl ArrayDescriptor {
pub fn from_raw(desc: cuda::CUDA_ARRAY3D_DESCRIPTOR) -> Self {
Self { desc }
}
pub fn new(
dims: [usize; 3],
format: ArrayFormat,
num_channels: c_uint,
flags: ArrayObjectFlags,
) -> Self {
Self {
desc: cuda::CUDA_ARRAY3D_DESCRIPTOR {
Width: dims[0],
Height: dims[1],
Depth: dims[2],
Format: format.to_raw(),
NumChannels: num_channels,
Flags: flags.bits(),
},
}
}
pub fn from_dims_format(dims: [usize; 3], format: ArrayFormat) -> Self {
Self {
desc: cuda::CUDA_ARRAY3D_DESCRIPTOR {
Width: dims[0],
Height: dims[1],
Depth: dims[2],
Format: format.to_raw(),
NumChannels: 1,
Flags: ArrayObjectFlags::default().bits(),
},
}
}
pub fn dims(&self) -> [usize; 3] {
[self.desc.Width, self.desc.Height, self.desc.Depth]
}
pub fn set_dims(&mut self, dims: [usize; 3]) {
self.desc.Width = dims[0];
self.desc.Height = dims[1];
self.desc.Depth = dims[2];
}
pub fn width(&self) -> usize {
self.desc.Width
}
pub fn set_width(&mut self, width: usize) {
self.desc.Width = width;
}
pub fn height(&self) -> usize {
self.desc.Height
}
pub fn set_height(&mut self, height: usize) {
self.desc.Height = height;
}
pub fn depth(&self) -> usize {
self.desc.Depth
}
pub fn set_depth(&mut self, depth: usize) {
self.desc.Depth = depth;
}
pub fn format(&self) -> ArrayFormat {
ArrayFormat::from_raw(self.desc.Format)
}
pub fn set_format(&mut self, format: ArrayFormat) {
self.desc.Format = format.to_raw();
}
pub fn num_channels(&self) -> c_uint {
self.desc.NumChannels
}
pub fn set_num_channels(&mut self, num_channels: c_uint) {
self.desc.NumChannels = num_channels;
}
pub fn flags(&self) -> ArrayObjectFlags {
ArrayObjectFlags::from_bits_truncate(self.desc.Flags)
}
pub fn set_flags(&mut self, flags: ArrayObjectFlags) {
self.desc.Flags = flags.bits();
}
}
pub struct ArrayObject {
pub(crate) handle: CUarray,
}
unsafe impl Send for ArrayObject {}
unsafe impl Sync for ArrayObject {}
impl ArrayObject {
pub(crate) fn into_raw(self) -> CUarray {
ManuallyDrop::new(self).handle
}
pub fn from_descriptor(descriptor: &ArrayDescriptor) -> CudaResult<Self> {
if cfg!(debug_assertions) {
assert_ne!(
0,
descriptor.width(),
"Cannot allocate an array with 0 Width"
);
if !descriptor.flags().contains(ArrayObjectFlags::LAYERED) && descriptor.depth() > 0 {
assert_ne!(
0,
descriptor.height(),
"If Depth is non-zero and the descriptor is not LAYERED, then Height must also \
be non-zero."
);
}
if descriptor.flags().contains(ArrayObjectFlags::CUBEMAP) {
assert_eq!(
descriptor.height(),
descriptor.width(),
"Height and Width must be equal for CUBEMAP arrays."
);
if descriptor.flags().contains(ArrayObjectFlags::LAYERED) {
assert_eq!(
0,
descriptor.depth() % 6,
"Depth must be a multiple of 6 when the array descriptor is for a LAYERED \
CUBEMAP."
);
} else {
assert_eq!(
6,
descriptor.depth(),
"Depth must be equal to 6 when the array descriptor is for a CUBEMAP."
);
}
}
assert!(
descriptor.num_channels() == 1
|| descriptor.num_channels() == 2
|| descriptor.num_channels() == 4,
"NumChannels was set to {}. It must be 1, 2, or 4.",
descriptor.num_channels()
);
let device = CurrentContext::get_device()?;
let attr = |attr| Ok(1..=(device.get_attribute(attr)? as usize));
let (description, bounds) = if descriptor.flags().contains(ArrayObjectFlags::CUBEMAP) {
if descriptor.flags().contains(ArrayObjectFlags::LAYERED) {
(
"Layered Cubemap",
vec![[
attr(DeviceAttribute::MaximumTextureCubemapLayeredWidth)?,
attr(DeviceAttribute::MaximumTextureCubemapLayeredWidth)?,
attr(DeviceAttribute::MaximumTextureCubemapLayeredLayers)?,
]],
)
} else {
(
"Cubemap",
vec![[
attr(DeviceAttribute::MaximumTextureCubemapWidth)?,
attr(DeviceAttribute::MaximumTextureCubemapWidth)?,
6..=6,
]],
)
}
} else if descriptor.flags().contains(ArrayObjectFlags::LAYERED) {
if descriptor.height() > 0 {
(
"2D Layered",
vec![[
attr(DeviceAttribute::MaximumTexture2DLayeredWidth)?,
attr(DeviceAttribute::MaximumTexture2DLayeredHeight)?,
attr(DeviceAttribute::MaximumTexture2DLayeredLayers)?,
]],
)
} else {
(
"1D Layered",
vec![[
attr(DeviceAttribute::MaximumTexture1DLayeredWidth)?,
0..=0,
attr(DeviceAttribute::MaximumTexture1DLayeredLayers)?,
]],
)
}
} else if descriptor.depth() > 0 {
(
"3D",
vec![
[
attr(DeviceAttribute::MaximumTexture3DWidth)?,
attr(DeviceAttribute::MaximumTexture3DHeight)?,
attr(DeviceAttribute::MaximumTexture3DDepth)?,
],
[
attr(DeviceAttribute::MaximumTexture3DWidthAlternate)?,
attr(DeviceAttribute::MaximumTexture3DHeightAlternate)?,
attr(DeviceAttribute::MaximumTexture3DDepthAlternate)?,
],
],
)
} else if descriptor.height() > 0 {
if descriptor
.flags()
.contains(ArrayObjectFlags::TEXTURE_GATHER)
{
(
"2D Texture Gather",
vec![[
attr(DeviceAttribute::MaximumTexture2DGatherWidth)?,
attr(DeviceAttribute::MaximumTexture2DGatherHeight)?,
0..=0,
]],
)
} else {
(
"2D",
vec![[
attr(DeviceAttribute::MaximumTexture2DWidth)?,
attr(DeviceAttribute::MaximumTexture2DHeight)?,
0..=0,
]],
)
}
} else {
assert!(descriptor.width() > 0);
(
"1D",
vec![[attr(DeviceAttribute::MaximumTexture1DWidth)?, 0..=0, 0..=0]],
)
};
let bounds_invalid = |x: &[::std::ops::RangeInclusive<usize>; 3]| {
(descriptor.width() >= *x[0].start() && descriptor.width() <= *x[0].end())
&& (descriptor.height() >= *x[1].start() && descriptor.height() <= *x[1].end())
&& (descriptor.depth() >= *x[2].start() && descriptor.depth() <= *x[2].end())
};
assert!(
bounds.iter().any(bounds_invalid),
"The dimensions of the {} ArrayObject did not fall within the valid bounds for \
the array. descriptor = {:?}, dims = {:?}, valid bounds = {:?}",
description,
descriptor,
[descriptor.width(), descriptor.height(), descriptor.depth()],
bounds
);
}
let mut handle = MaybeUninit::uninit();
unsafe { cuda::cuArray3DCreate_v2(handle.as_mut_ptr(), &descriptor.desc) }.to_result()?;
Ok(Self {
handle: unsafe { handle.assume_init() },
})
}
pub fn new(dims: [usize; 3], format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
Self::from_descriptor(&ArrayDescriptor::new(
dims,
format,
num_channels,
Default::default(),
))
}
pub fn new_1d(width: usize, format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
Self::from_descriptor(&ArrayDescriptor::new(
[width, 0, 0],
format,
num_channels,
Default::default(),
))
}
pub fn new_2d(dims: [usize; 2], format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
Self::from_descriptor(&ArrayDescriptor::new(
[dims[0], dims[1], 0],
format,
num_channels,
Default::default(),
))
}
pub fn new_layered(
dims: [usize; 2],
num_layers: usize,
format: ArrayFormat,
num_channels: c_uint,
) -> CudaResult<Self> {
Self::from_descriptor(&ArrayDescriptor::new(
[dims[0], dims[1], num_layers],
format,
num_channels,
ArrayObjectFlags::LAYERED,
))
}
pub fn new_layered_1d(
width: usize,
num_layers: usize,
format: ArrayFormat,
num_channels: c_uint,
) -> CudaResult<Self> {
Self::from_descriptor(&ArrayDescriptor::new(
[width, 0, num_layers],
format,
num_channels,
ArrayObjectFlags::LAYERED,
))
}
pub fn new_cubemap(side: usize, format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
Self::from_descriptor(&ArrayDescriptor::new(
[side, side, 6],
format,
num_channels,
ArrayObjectFlags::CUBEMAP,
))
}
pub fn new_layered_cubemap(
side: usize,
num_layers: usize,
format: ArrayFormat,
num_channels: c_uint,
) -> CudaResult<Self> {
Self::from_descriptor(&ArrayDescriptor::new(
[side, side, num_layers * 6],
format,
num_channels,
ArrayObjectFlags::CUBEMAP | ArrayObjectFlags::LAYERED,
))
}
pub fn descriptor(&self) -> CudaResult<ArrayDescriptor> {
let mut raw_descriptor = MaybeUninit::zeroed();
unsafe { cuda::cuArray3DGetDescriptor_v2(raw_descriptor.as_mut_ptr(), self.handle) }
.to_result()?;
Ok(ArrayDescriptor::from_raw(unsafe {
raw_descriptor.assume_init()
}))
}
pub fn drop(array: ArrayObject) -> DropResult<ArrayObject> {
match unsafe { cuda::cuArrayDestroy(array.handle) }.to_result() {
Ok(()) => Ok(()),
Err(e) => Err((e, array)),
}
}
pub fn copy_from<T: ArrayPrimitive, U: AsRef<[T]>>(&mut self, val: &U) -> CudaResult<()> {
let val = val.as_ref();
let desc = self.descriptor()?;
let self_size = desc.width()
* desc.height().max(1)
* desc.depth().max(1)
* desc.num_channels() as usize
* desc.format().mem_size();
let other_size = mem::size_of_val(val);
assert_eq!(self_size, other_size, "Array and value sizes don't match");
unsafe {
if desc.height() == 0 && desc.depth() == 0 {
cuMemcpyHtoA_v2(self.handle, 0, val.as_ptr() as *const c_void, self_size)
.to_result()
} else if desc.depth() == 0 {
let desc = CUDA_MEMCPY2D {
Height: desc.height(),
WidthInBytes: desc.width()
* desc.num_channels() as usize
* desc.format().mem_size(),
dstArray: self.handle,
dstDevice: 0,
dstHost: null_mut(),
dstMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY,
dstPitch: 0,
dstXInBytes: 0,
dstY: 0,
srcArray: null_mut(),
srcDevice: 0,
srcHost: val.as_ptr() as *const c_void,
srcMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_HOST,
srcPitch: 0,
srcXInBytes: 0,
srcY: 0,
};
cuMemcpy2D_v2(&desc as *const _).to_result()
} else {
panic!();
}
}
}
pub fn copy_to<T: ArrayPrimitive, U: AsMut<[T]>>(&self, val: &mut U) -> CudaResult<()> {
let val = val.as_mut();
let desc = self.descriptor()?;
let self_size = desc.width()
* desc.height().max(1)
* desc.depth().max(1)
* desc.num_channels() as usize
* desc.format().mem_size();
let other_size = mem::size_of_val(val);
assert_eq!(self_size, other_size, "Array and value sizes don't match");
unsafe {
if desc.height() == 0 && desc.depth() == 0 {
cuMemcpyAtoH_v2(val.as_mut_ptr() as *mut c_void, self.handle, 0, self_size)
.to_result()
} else if desc.depth() == 0 {
let width = desc.width() * desc.num_channels() as usize * desc.format().mem_size();
let desc = CUDA_MEMCPY2D {
Height: desc.height(),
WidthInBytes: width,
dstArray: null_mut(),
dstDevice: 0,
dstHost: val.as_mut_ptr() as *mut c_void,
dstMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_HOST,
dstPitch: 0,
dstXInBytes: 0,
dstY: 0,
srcArray: self.handle,
srcDevice: 0,
srcHost: null(),
srcMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY,
srcPitch: 0,
srcXInBytes: 0,
srcY: 0,
};
cuMemcpy2D_v2(&desc as *const _).to_result()?;
Ok(())
} else {
panic!();
}
}
}
pub fn as_host_vec<T: ArrayPrimitive>(&self) -> CudaResult<Vec<T>> {
let desc = self.descriptor()?;
let self_size = desc.width()
* desc.height().max(1)
* desc.depth().max(1)
* desc.num_channels() as usize
* desc.format().mem_size();
let len = self_size / T::array_format().mem_size();
unsafe {
let mut vec = vec![zeroed(); len];
self.copy_to(&mut vec)?;
Ok(vec)
}
}
}
impl std::fmt::Debug for ArrayObject {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
self.descriptor().fmt(f)
}
}
impl Drop for ArrayObject {
fn drop(&mut self) {
unsafe { cuda::cuArrayDestroy(self.handle) };
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn descriptor_round_trip() {
let _context = crate::quick_init().unwrap();
let obj = ArrayObject::new([1, 2, 3], ArrayFormat::F64, 2).unwrap();
let descriptor = obj.descriptor().unwrap();
assert_eq!([1, 2, 3], descriptor.dims());
assert_eq!(ArrayFormat::F64, descriptor.format());
assert_eq!(2, descriptor.num_channels());
assert_eq!(ArrayObjectFlags::default(), descriptor.flags());
}
#[test]
fn allow_1d_arrays() {
let _context = crate::quick_init().unwrap();
let obj = ArrayObject::new([10, 0, 0], ArrayFormat::F64, 1).unwrap();
let descriptor = obj.descriptor().unwrap();
assert_eq!([10, 0, 0], descriptor.dims());
}
#[test]
fn allow_2d_arrays() {
let _context = crate::quick_init().unwrap();
let obj = ArrayObject::new([10, 20, 0], ArrayFormat::F64, 1).unwrap();
let descriptor = obj.descriptor().unwrap();
assert_eq!([10, 20, 0], descriptor.dims());
}
#[test]
fn allow_1d_layered_arrays() {
let _context = crate::quick_init().unwrap();
let obj = ArrayObject::new_layered([10, 0], 20, ArrayFormat::F64, 1).unwrap();
let descriptor = obj.descriptor().unwrap();
assert_eq!([10, 0, 20], descriptor.dims());
assert_eq!(ArrayObjectFlags::LAYERED, descriptor.flags());
}
#[test]
fn allow_cubemaps() {
let _context = crate::quick_init().unwrap();
let obj = ArrayObject::new_cubemap(4, ArrayFormat::F64, 1).unwrap();
let descriptor = obj.descriptor().unwrap();
assert_eq!([4, 4, 6], descriptor.dims());
assert_eq!(ArrayObjectFlags::CUBEMAP, descriptor.flags());
}
#[test]
fn allow_layered_cubemaps() {
let _context = crate::quick_init().unwrap();
let obj = ArrayObject::new_layered_cubemap(4, 4, ArrayFormat::F64, 1).unwrap();
let descriptor = obj.descriptor().unwrap();
assert_eq!([4, 4, 24], descriptor.dims());
assert_eq!(
ArrayObjectFlags::CUBEMAP | ArrayObjectFlags::LAYERED,
descriptor.flags()
);
}
#[test]
#[should_panic]
fn fail_on_zero_width_1d_array() {
let _context = crate::quick_init().unwrap();
let _ = ArrayObject::new_1d(0, ArrayFormat::F64, 1).unwrap();
}
#[test]
#[should_panic]
fn fail_on_zero_size_widths() {
let _context = crate::quick_init().unwrap();
let _ = ArrayObject::new([0, 10, 20], ArrayFormat::F64, 1).unwrap();
}
#[test]
#[should_panic]
fn fail_cubemaps_with_unmatching_width_height() {
let _context = crate::quick_init().unwrap();
let mut descriptor = ArrayDescriptor::from_dims_format([2, 3, 6], ArrayFormat::F64);
descriptor.set_flags(ArrayObjectFlags::CUBEMAP);
let _ = ArrayObject::from_descriptor(&descriptor).unwrap();
}
#[test]
#[should_panic]
fn fail_cubemaps_with_non_six_depth() {
let _context = crate::quick_init().unwrap();
let mut descriptor = ArrayDescriptor::from_dims_format([4, 4, 5], ArrayFormat::F64);
descriptor.set_flags(ArrayObjectFlags::CUBEMAP);
let _ = ArrayObject::from_descriptor(&descriptor).unwrap();
}
#[test]
#[should_panic]
fn fail_cubemaps_with_non_six_multiple_depth() {
let _context = crate::quick_init().unwrap();
let mut descriptor = ArrayDescriptor::from_dims_format([4, 4, 10], ArrayFormat::F64);
descriptor.set_flags(ArrayObjectFlags::LAYERED | ArrayObjectFlags::CUBEMAP);
let _ = ArrayObject::from_descriptor(&descriptor).unwrap();
}
#[test]
#[should_panic]
fn fail_with_depth_without_height() {
let _context = crate::quick_init().unwrap();
let _ = ArrayObject::new([10, 0, 20], ArrayFormat::F64, 1).unwrap();
}
#[test]
#[should_panic]
fn fails_on_invalid_num_channels() {
let _context = crate::quick_init().unwrap();
let _ = ArrayObject::new([1, 2, 3], ArrayFormat::F64, 3).unwrap();
}
}