use super::sys;
use crate::driver::sys::cudaError_enum;
use core::ffi::{c_uchar, c_void};
use std::ffi::CStr;
use std::mem::MaybeUninit;
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct RuntimeError(pub sys::cudaError_t);
impl sys::cudaError_t {
#[inline]
pub fn result(self) -> Result<(), RuntimeError> {
match self {
sys::cudaError_t::cudaSuccess => Ok(()),
_ => Err(RuntimeError(self)),
}
}
}
impl From<cudaError_enum> for RuntimeError {
fn from(e: cudaError_enum) -> Self {
match e {
cudaError_enum::CUDA_SUCCESS => Self(sys::cudaError_t::cudaSuccess),
cudaError_enum::CUDA_ERROR_INVALID_VALUE => {
Self(sys::cudaError_t::cudaErrorInvalidValue)
}
cudaError_enum::CUDA_ERROR_OUT_OF_MEMORY => {
Self(sys::cudaError_t::cudaErrorMemoryAllocation)
}
_ => Self(sys::cudaError_t::cudaErrorUnknown),
}
}
}
impl RuntimeError {
pub fn error_name(&self) -> Result<&CStr, RuntimeError> {
unsafe {
let err_str = sys::cudaGetErrorName(self.0);
Ok(CStr::from_ptr(err_str))
}
}
pub fn error_string(&self) -> Result<&CStr, RuntimeError> {
unsafe {
let err_str = sys::cudaGetErrorString(self.0);
Ok(CStr::from_ptr(err_str))
}
}
}
impl std::fmt::Debug for RuntimeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let err_str = self.error_string().unwrap();
f.debug_tuple("RuntimeError")
.field(&self.0)
.field(&err_str)
.finish()
}
}
#[cfg(feature = "std")]
impl std::fmt::Display for RuntimeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{self:?}")
}
}
#[cfg(feature = "std")]
impl std::error::Error for RuntimeError {}
pub enum CudaDeviceFlags {
CudaDeviceScheduleAuto = sys::cudaDeviceScheduleAuto as isize,
CudaDeviceScheduleSpin = sys::cudaDeviceScheduleSpin as isize,
CudaDeviceScheduleYield = sys::cudaDeviceScheduleYield as isize,
CudaDeviceScheduleBlockingSync = sys::cudaDeviceScheduleBlockingSync as isize,
CudaDeviceMapHost = sys::cudaDeviceMapHost as isize,
CudaDeviceLmemResizeToMax = sys::cudaDeviceLmemResizeToMax as isize,
#[cfg(not(any(
feature = "cuda-12000",
feature = "cuda-11080",
feature = "cuda-11070",
feature = "cuda-11060",
feature = "cuda-11050",
feature = "cuda-11040"
)))]
CudaDeviceSyncMemops = sys::cudaDeviceSyncMemops as isize,
}
pub mod version {
use super::{sys, RuntimeError};
pub fn get() -> Result<i32, RuntimeError> {
let mut version = 0;
unsafe {
sys::cudaRuntimeGetVersion(&mut version).result()?;
}
Ok(version)
}
pub fn get_runtime_version() -> Result<i32, RuntimeError> {
get()
}
pub fn get_driver_version() -> Result<i32, RuntimeError> {
let mut version = 0;
unsafe {
sys::cudaDriverGetVersion(&mut version).result()?;
}
Ok(version)
}
}
pub mod device {
use super::{sys, RuntimeError};
use core::ffi::{c_int, c_void};
use std::mem::MaybeUninit;
pub fn set(ordinal: i32) -> Result<(), RuntimeError> {
unsafe { sys::cudaSetDevice(ordinal).result() }
}
pub fn get() -> Result<i32, RuntimeError> {
let mut device = MaybeUninit::uninit();
unsafe {
sys::cudaGetDevice(device.as_mut_ptr()).result()?;
Ok(device.assume_init())
}
}
pub unsafe fn free(device_ptr: *mut c_void) -> Result<(), RuntimeError> {
unsafe { sys::cudaFree(device_ptr).result() }
}
pub fn get_device_prop(ordinal: c_int) -> Result<sys::cudaDeviceProp, RuntimeError> {
let mut prop = MaybeUninit::uninit();
#[cfg(not(any(
feature = "cuda-11080",
feature = "cuda-11070",
feature = "cuda-11060",
feature = "cuda-11050",
feature = "cuda-11040",
feature = "cuda-13000",
feature = "cuda-13010",
feature = "cuda-13020"
)))]
unsafe {
sys::cudaGetDeviceProperties_v2(prop.as_mut_ptr(), ordinal).result()?;
Ok(prop.assume_init())
}
#[cfg(any(
feature = "cuda-11080",
feature = "cuda-11070",
feature = "cuda-11060",
feature = "cuda-11050",
feature = "cuda-11040",
feature = "cuda-13000",
feature = "cuda-13010",
feature = "cuda-13020"
))]
unsafe {
sys::cudaGetDeviceProperties(prop.as_mut_ptr(), ordinal).result()?;
Ok(prop.assume_init())
}
}
pub fn get_count() -> Result<c_int, RuntimeError> {
let mut count = MaybeUninit::uninit();
unsafe {
sys::cudaGetDeviceCount(count.as_mut_ptr()).result()?;
Ok(count.assume_init())
}
}
pub fn total_mem() -> Result<usize, RuntimeError> {
let mut bytes = MaybeUninit::uninit();
unsafe {
sys::cudaMemGetInfo(std::ptr::null_mut(), bytes.as_mut_ptr()).result()?;
Ok(bytes.assume_init())
}
}
pub fn free_mem() -> Result<usize, RuntimeError> {
let mut bytes = MaybeUninit::uninit();
unsafe {
sys::cudaMemGetInfo(bytes.as_mut_ptr(), std::ptr::null_mut()).result()?;
Ok(bytes.assume_init())
}
}
pub unsafe fn get_attribute(
ordinal: c_int,
attr: sys::cudaDeviceAttr,
) -> Result<i32, RuntimeError> {
let mut value = MaybeUninit::uninit();
unsafe {
sys::cudaDeviceGetAttribute(value.as_mut_ptr(), attr, ordinal).result()?;
Ok(value.assume_init())
}
}
pub unsafe fn reset() -> Result<(), RuntimeError> {
sys::cudaDeviceReset().result()
}
}
pub mod function {
use super::{sys, RuntimeError};
use std::mem::MaybeUninit;
use std::os::raw::c_void;
pub unsafe fn set_function_attribute(
func: *const c_void,
attribute: sys::cudaFuncAttribute,
value: i32,
) -> Result<(), RuntimeError> {
sys::cudaFuncSetAttribute(func, attribute, value).result()
}
pub unsafe fn get_function_attributes(
func: *const c_void,
) -> Result<sys::cudaFuncAttributes, RuntimeError> {
let mut attr = MaybeUninit::uninit();
unsafe {
sys::cudaFuncGetAttributes(attr.as_mut_ptr(), func).result()?;
Ok(attr.assume_init())
}
}
pub unsafe fn get_function_by_symbol(
symbol_ptr: *const c_void,
) -> Result<sys::cudaFunction_t, RuntimeError> {
let mut func = MaybeUninit::uninit();
unsafe {
sys::cudaGetFuncBySymbol(func.as_mut_ptr(), symbol_ptr).result()?;
Ok(func.assume_init())
}
}
pub unsafe fn set_function_cache_config(
func: *const c_void,
attribute: sys::cudaFuncCache,
) -> Result<(), RuntimeError> {
sys::cudaFuncSetCacheConfig(func, attribute).result()
}
}
pub mod occupancy {
use core::ffi::{c_int, c_uint, c_void};
use std::mem::MaybeUninit;
use super::{sys, RuntimeError};
use crate::driver::result::occupancy::{
max_potential_block_size as driver_max_potential_block_size,
max_potential_block_size_with_flags as driver_max_potential_block_size_with_flags,
};
use crate::driver::{sys as driver_sys, DriverError};
pub unsafe fn available_dynamic_shared_mem_per_block(
f: *const c_void,
num_blocks: c_int,
block_size: c_int,
) -> Result<usize, RuntimeError> {
let mut dynamic_smem_size = MaybeUninit::uninit();
sys::cudaOccupancyAvailableDynamicSMemPerBlock(
dynamic_smem_size.as_mut_ptr(),
f,
num_blocks,
block_size,
)
.result()?;
Ok(dynamic_smem_size.assume_init())
}
pub unsafe fn max_active_block_per_multiprocessor(
f: *const c_void,
block_size: c_int,
dynamic_smem_size: usize,
) -> Result<i32, RuntimeError> {
let mut num_blocks = MaybeUninit::uninit();
sys::cudaOccupancyMaxActiveBlocksPerMultiprocessor(
num_blocks.as_mut_ptr(),
f,
block_size,
dynamic_smem_size,
)
.result()?;
Ok(num_blocks.assume_init())
}
pub unsafe fn max_active_block_per_multiprocessor_with_flags(
f: *const c_void,
block_size: c_int,
dynamic_smem_size: usize,
flags: c_uint,
) -> Result<i32, RuntimeError> {
let mut num_blocks = MaybeUninit::uninit();
sys::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
num_blocks.as_mut_ptr(),
f,
block_size,
dynamic_smem_size,
flags,
)
.result()?;
Ok(num_blocks.assume_init())
}
pub unsafe fn max_potential_block_size(
f: sys::cudaFunction_t,
block_size_to_dynamic_smem_size: driver_sys::CUoccupancyB2DSize,
dynamic_smem_size: usize,
block_size_limit: c_int,
) -> Result<(i32, i32), DriverError> {
driver_max_potential_block_size(
f as driver_sys::CUfunction,
block_size_to_dynamic_smem_size,
dynamic_smem_size,
block_size_limit,
)
}
pub unsafe fn max_potential_block_size_with_flags(
f: sys::cudaFunction_t,
block_size_to_dynamic_smem_size: driver_sys::CUoccupancyB2DSize,
dynamic_smem_size: usize,
block_size_limit: c_int,
flags: c_uint,
) -> Result<(i32, i32), DriverError> {
driver_max_potential_block_size_with_flags(
f as driver_sys::CUfunction,
block_size_to_dynamic_smem_size,
dynamic_smem_size,
block_size_limit,
flags,
)
}
}
pub mod stream {
use super::{sys, RuntimeError};
use std::mem::MaybeUninit;
pub enum StreamKind {
Default,
NonBlocking,
}
impl StreamKind {
fn flags(self) -> u32 {
match self {
Self::Default => sys::cudaStreamDefault,
Self::NonBlocking => sys::cudaStreamNonBlocking,
}
}
}
pub fn create(kind: StreamKind) -> Result<sys::cudaStream_t, RuntimeError> {
let mut stream = MaybeUninit::uninit();
unsafe {
sys::cudaStreamCreateWithFlags(stream.as_mut_ptr(), kind.flags());
Ok(stream.assume_init())
}
}
pub unsafe fn synchronize(stream: sys::cudaStream_t) -> Result<(), RuntimeError> {
sys::cudaStreamSynchronize(stream).result()
}
pub unsafe fn destroy(stream: sys::cudaStream_t) -> Result<(), RuntimeError> {
sys::cudaStreamDestroy(stream).result()
}
pub unsafe fn wait_event(
stream: sys::cudaStream_t,
event: sys::cudaEvent_t,
flags: u32,
) -> Result<(), RuntimeError> {
sys::cudaStreamWaitEvent(stream, event, flags).result()
}
}
pub unsafe fn malloc_async(
stream: sys::cudaStream_t,
num_bytes: usize,
) -> Result<*mut c_void, RuntimeError> {
let mut dev_ptr = MaybeUninit::uninit();
sys::cudaMallocAsync(dev_ptr.as_mut_ptr(), num_bytes, stream).result()?;
Ok(dev_ptr.assume_init())
}
pub unsafe fn malloc_sync(num_bytes: usize) -> Result<*mut c_void, RuntimeError> {
let mut dev_ptr = MaybeUninit::uninit();
sys::cudaMalloc(dev_ptr.as_mut_ptr(), num_bytes).result()?;
Ok(dev_ptr.assume_init())
}
pub fn get_mem_info() -> Result<(usize, usize), RuntimeError> {
let mut free = MaybeUninit::uninit();
let mut total = MaybeUninit::uninit();
unsafe {
sys::cudaMemGetInfo(free.as_mut_ptr(), total.as_mut_ptr()).result()?;
Ok((free.assume_init(), total.assume_init()))
}
}
pub unsafe fn free_async(dptr: *mut c_void, stream: sys::cudaStream_t) -> Result<(), RuntimeError> {
sys::cudaFreeAsync(dptr, stream).result()
}
pub unsafe fn free_sync(dptr: *mut c_void) -> Result<(), RuntimeError> {
sys::cudaFree(dptr).result()
}
pub unsafe fn memory_free(device_ptr: *mut c_void) -> Result<(), RuntimeError> {
sys::cudaFree(device_ptr).result()
}
pub unsafe fn memset_d8_async(
dptr: *mut c_void,
uc: c_uchar,
num_bytes: usize,
stream: sys::cudaStream_t,
) -> Result<(), RuntimeError> {
sys::cudaMemsetAsync(dptr, uc as i32, num_bytes, stream).result()
}
pub unsafe fn memset_d8_sync(
dptr: *mut c_void,
uc: c_uchar,
num_bytes: usize,
) -> Result<(), RuntimeError> {
sys::cudaMemset(dptr, uc as i32, num_bytes).result()
}
pub unsafe fn memcpy_htod_async<T>(
dst: *mut c_void,
src: &[T],
stream: sys::cudaStream_t,
) -> Result<(), RuntimeError> {
sys::cudaMemcpyAsync(
dst,
src.as_ptr() as *const c_void,
std::mem::size_of_val(src),
sys::cudaMemcpyKind::cudaMemcpyHostToDevice,
stream,
)
.result()
}
pub unsafe fn memcpy_htod_sync<T>(dst: *mut c_void, src: &[T]) -> Result<(), RuntimeError> {
sys::cudaMemcpy(
dst,
src.as_ptr() as *const c_void,
std::mem::size_of_val(src),
sys::cudaMemcpyKind::cudaMemcpyHostToDevice,
)
.result()
}
pub unsafe fn memcpy_dtoh_async<T>(
dst: &mut [T],
src: *const c_void,
stream: sys::cudaStream_t,
) -> Result<(), RuntimeError> {
sys::cudaMemcpyAsync(
dst.as_mut_ptr() as *mut c_void,
src,
std::mem::size_of_val(dst),
sys::cudaMemcpyKind::cudaMemcpyDeviceToHost,
stream,
)
.result()
}
pub unsafe fn memcpy_dtoh_sync<T>(dst: &mut [T], src: *const c_void) -> Result<(), RuntimeError> {
sys::cudaMemcpy(
dst.as_mut_ptr() as *mut c_void,
src,
std::mem::size_of_val(dst),
sys::cudaMemcpyKind::cudaMemcpyDeviceToHost,
)
.result()
}
pub unsafe fn memcpy_dtod_async(
dst: *mut c_void,
src: *const c_void,
num_bytes: usize,
stream: sys::cudaStream_t,
) -> Result<(), RuntimeError> {
sys::cudaMemcpyAsync(
dst,
src,
num_bytes,
sys::cudaMemcpyKind::cudaMemcpyDeviceToDevice,
stream,
)
.result()
}
pub unsafe fn memcpy_dtod_sync(
dst: *mut c_void,
src: *const c_void,
num_bytes: usize,
) -> Result<(), RuntimeError> {
sys::cudaMemcpy(
dst,
src,
num_bytes,
sys::cudaMemcpyKind::cudaMemcpyDeviceToDevice,
)
.result()
}
pub fn mem_get_info() -> Result<(usize, usize), RuntimeError> {
let mut free = 0;
let mut total = 0;
unsafe {
sys::cudaMemGetInfo(&mut free, &mut total).result()?;
}
Ok((free, total))
}
pub mod event {
use super::{sys, RuntimeError};
use std::mem::MaybeUninit;
pub fn create(flags: u32) -> Result<sys::cudaEvent_t, RuntimeError> {
let mut event = MaybeUninit::uninit();
unsafe {
sys::cudaEventCreateWithFlags(event.as_mut_ptr(), flags).result()?;
Ok(event.assume_init())
}
}
pub unsafe fn record(
event: sys::cudaEvent_t,
stream: sys::cudaStream_t,
) -> Result<(), RuntimeError> {
sys::cudaEventRecord(event, stream).result()
}
pub unsafe fn elapsed(
start: sys::cudaEvent_t,
end: sys::cudaEvent_t,
) -> Result<f32, RuntimeError> {
let mut ms: f32 = 0.0;
sys::cudaEventElapsedTime((&mut ms) as *mut _, start, end).result()?;
Ok(ms)
}
pub unsafe fn destroy(event: sys::cudaEvent_t) -> Result<(), RuntimeError> {
sys::cudaEventDestroy(event).result()
}
}
#[inline]
pub unsafe fn launch_kernel(
f: crate::driver::sys::CUfunction,
grid_dim: (u32, u32, u32),
block_dim: (u32, u32, u32),
shared_mem_bytes: usize,
stream: sys::cudaStream_t,
kernel_params: &mut [*mut c_void],
) -> Result<(), RuntimeError> {
sys::cudaLaunchKernel(
f as *const c_void,
sys::dim3 {
x: grid_dim.0,
y: grid_dim.1,
z: grid_dim.2,
},
sys::dim3 {
x: block_dim.0,
y: block_dim.1,
z: block_dim.2,
},
kernel_params.as_mut_ptr(),
shared_mem_bytes,
stream,
)
.result()
}
pub mod external_memory {
use core::ffi::c_void;
use std::mem::MaybeUninit;
use super::{sys, RuntimeError};
#[cfg(unix)]
pub unsafe fn import_external_memory_opaque_fd(
fd: std::os::fd::RawFd,
size: u64,
) -> Result<sys::cudaExternalMemory_t, RuntimeError> {
let mut external_memory = MaybeUninit::uninit();
let handle_description = sys::cudaExternalMemoryHandleDesc {
type_: sys::cudaExternalMemoryHandleType::cudaExternalMemoryHandleTypeOpaqueFd,
handle: sys::cudaExternalMemoryHandleDesc__bindgen_ty_1 { fd },
size,
flags: 0,
#[cfg(any(feature = "cuda-13000", feature = "cuda-13010", feature = "cuda-13020"))]
reserved: [0; 16],
};
sys::cudaImportExternalMemory(external_memory.as_mut_ptr(), &handle_description)
.result()?;
Ok(external_memory.assume_init())
}
#[cfg(windows)]
pub unsafe fn import_external_memory_opaque_win32(
handle: std::os::windows::io::RawHandle,
size: u64,
) -> Result<sys::cudaExternalMemory_t, RuntimeError> {
let mut external_memory = MaybeUninit::uninit();
let handle_description = sys::cudaExternalMemoryHandleDesc {
type_: sys::cudaExternalMemoryHandleType::cudaExternalMemoryHandleTypeOpaqueWin32,
handle: sys::cudaExternalMemoryHandleDesc__bindgen_ty_1 {
win32: sys::cudaExternalMemoryHandleDesc__bindgen_ty_1__bindgen_ty_1 {
handle,
name: std::ptr::null(),
},
},
size,
flags: 0,
#[cfg(any(feature = "cuda-13000", feature = "cuda-13010", feature = "cuda-13020"))]
reserved: [0; 16],
};
sys::cudaImportExternalMemory(external_memory.as_mut_ptr(), &handle_description)
.result()?;
Ok(external_memory.assume_init())
}
pub unsafe fn destroy_external_memory(
external_memory: sys::cudaExternalMemory_t,
) -> Result<(), RuntimeError> {
sys::cudaDestroyExternalMemory(external_memory).result()
}
pub unsafe fn get_mapped_buffer(
external_memory: sys::cudaExternalMemory_t,
offset: u64,
size: u64,
) -> Result<*mut c_void, RuntimeError> {
let mut device_ptr = MaybeUninit::uninit();
let buffer_description = sys::cudaExternalMemoryBufferDesc {
offset,
size,
flags: 0,
#[cfg(any(feature = "cuda-13000", feature = "cuda-13010", feature = "cuda-13020"))]
reserved: [0; 16],
};
sys::cudaExternalMemoryGetMappedBuffer(
device_ptr.as_mut_ptr(),
external_memory,
&buffer_description,
)
.result()?;
Ok(device_ptr.assume_init())
}
}