pub(crate) mod driver_types_sys;
mod error;
pub mod sys;
pub use error::*;
use core::ffi::c_void;
use core::mem::MaybeUninit;
use sys as cuda;
bitflags::bitflags! {
pub struct StreamFlags: u32 {
const DEFAULT = 0x00;
const NON_BLOCKING = 0x01;
}
}
#[derive(Debug)]
pub struct Stream {
raw: cuda::cudaStream_t,
}
impl Stream {
pub fn new(flags: StreamFlags) -> CudaResult<Self> {
let mut stream = MaybeUninit::uninit();
unsafe {
cuda::cudaStreamCreateWithFlags(stream.as_mut_ptr(), flags.bits).to_result()?;
Ok(Self {
raw: stream.assume_init(),
})
}
}
#[doc(hidden)]
pub fn launch(&self, param_buf: *mut c_void) -> CudaResult<()> {
unsafe { cuda::cudaLaunchDeviceV2(param_buf, self.raw).to_result() }
}
}
impl Drop for Stream {
fn drop(&mut self) {
unsafe {
cuda::cudaStreamDestroy(self.raw);
}
}
}
#[macro_export]
macro_rules! launch {
($func:ident<<<$grid_dim:expr, $block_dim:expr, $smem_size:expr, $stream:ident>>>($($param:expr),* $(,)?)) => {{
let grid_dim = ::$crate::rt::GridDim::from($grid_dim);
let block_dim = ::$crate::rt::BlockDim::from($block_dim);
let mut buf = ::$crate::rt::sys::cudaGetParameterBufferV2(
&$func as *const _ as *const ::core::ffi::c_void,
::$crate::rt::sys::dim3 {
x: grid_dim.x,
y: grid_dim.y,
z: grid_dim.z
},
::$crate::rt::sys::dim3 {
x: block_dim.x,
y: block_dim.y,
z: block_dim.z
},
$smem_size
) as *mut u8;
unsafe {
let mut offset = 0;
$(
let param = $param;
let size = ::core::mem::size_of_val(¶m)
let mut buf_idx = (offset as f32 / size as f32).ceil() as usize + 1;
offset = buf_idx * size;
let ptr = ¶m as *const _ as *const u8;
let dst = buf.add(offset);
::core::ptr::copy_nonoverlapping(¶m as *const _ as *const u8, dst, size);
)*
}
if false {
$func($($param),*);
}
$stream.launch(buf as *mut ::core::ffi::c_void).to_result()
}};
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GridSize {
pub x: u32,
pub y: u32,
pub z: u32,
}
impl GridSize {
#[inline]
pub fn x(x: u32) -> GridSize {
GridSize { x, y: 1, z: 1 }
}
#[inline]
pub fn xy(x: u32, y: u32) -> GridSize {
GridSize { x, y, z: 1 }
}
#[inline]
pub fn xyz(x: u32, y: u32, z: u32) -> GridSize {
GridSize { x, y, z }
}
}
impl From<u32> for GridSize {
fn from(x: u32) -> GridSize {
GridSize::x(x)
}
}
impl From<(u32, u32)> for GridSize {
fn from((x, y): (u32, u32)) -> GridSize {
GridSize::xy(x, y)
}
}
impl From<(u32, u32, u32)> for GridSize {
fn from((x, y, z): (u32, u32, u32)) -> GridSize {
GridSize::xyz(x, y, z)
}
}
impl<'a> From<&'a GridSize> for GridSize {
fn from(other: &GridSize) -> GridSize {
other.clone()
}
}
impl From<vek::Vec2<u32>> for GridSize {
fn from(vec: vek::Vec2<u32>) -> Self {
GridSize::xy(vec.x, vec.y)
}
}
impl From<vek::Vec3<u32>> for GridSize {
fn from(vec: vek::Vec3<u32>) -> Self {
GridSize::xyz(vec.x, vec.y, vec.z)
}
}
impl From<vek::Vec2<usize>> for GridSize {
fn from(vec: vek::Vec2<usize>) -> Self {
GridSize::xy(vec.x as u32, vec.y as u32)
}
}
impl From<vek::Vec3<usize>> for GridSize {
fn from(vec: vek::Vec3<usize>) -> Self {
GridSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BlockSize {
pub x: u32,
pub y: u32,
pub z: u32,
}
impl BlockSize {
#[inline]
pub fn x(x: u32) -> BlockSize {
BlockSize { x, y: 1, z: 1 }
}
#[inline]
pub fn xy(x: u32, y: u32) -> BlockSize {
BlockSize { x, y, z: 1 }
}
#[inline]
pub fn xyz(x: u32, y: u32, z: u32) -> BlockSize {
BlockSize { x, y, z }
}
}
impl From<u32> for BlockSize {
fn from(x: u32) -> BlockSize {
BlockSize::x(x)
}
}
impl From<(u32, u32)> for BlockSize {
fn from((x, y): (u32, u32)) -> BlockSize {
BlockSize::xy(x, y)
}
}
impl From<(u32, u32, u32)> for BlockSize {
fn from((x, y, z): (u32, u32, u32)) -> BlockSize {
BlockSize::xyz(x, y, z)
}
}
impl<'a> From<&'a BlockSize> for BlockSize {
fn from(other: &BlockSize) -> BlockSize {
other.clone()
}
}
impl From<vek::Vec2<u32>> for BlockSize {
fn from(vec: vek::Vec2<u32>) -> Self {
BlockSize::xy(vec.x, vec.y)
}
}
impl From<vek::Vec3<u32>> for BlockSize {
fn from(vec: vek::Vec3<u32>) -> Self {
BlockSize::xyz(vec.x, vec.y, vec.z)
}
}
impl From<vek::Vec2<usize>> for BlockSize {
fn from(vec: vek::Vec2<usize>) -> Self {
BlockSize::xy(vec.x as u32, vec.y as u32)
}
}
impl From<vek::Vec3<usize>> for BlockSize {
fn from(vec: vek::Vec3<usize>) -> Self {
BlockSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}