nncombinator/cuda/
ffi.rs

1//! FFI Module
2use std::{mem};
3use std::ptr::null_mut;
4use cuda_runtime_sys::dim3;
5use libc::c_void;
6use rcudnn_sys::{cudaError, cudaMemcpyKind, cudaStream_t};
7use crate::error::CudaRuntimeError;
8
9/// Device Memory allocation
10/// # Arguments
11/// * `size` - memory size
12///
13/// # Errors
14///
15/// This function may return the following errors
16/// * [`rcudnn::Error`]
17///
18/// note: The actual size of memory allocated is automatically calculated
19///       by multiplying the size of the return type by the value of the argument passed.
20///       There is no need for the caller to multiply the size of the return type.
21pub fn malloc<T>(size: usize) -> Result<*mut T,rcudnn::Error> {
22    let size = mem::size_of::<T>() * size;
23    let mut ptr: *mut T = null_mut();
24
25    match unsafe { rcudnn_sys::cudaMalloc(&mut ptr as *mut *mut T as *mut *mut libc::c_void, size) } {
26        cudaError::cudaSuccess => {
27            assert_ne!(ptr,
28                       null_mut(),
29                       "cudaMalloc is succeeded, but returned null pointer!");
30            Ok(ptr)
31        },
32        cudaError::cudaErrorInvalidValue => {
33            Err(rcudnn::Error::InvalidValue("The range of one or more of the entered parameters is out of tolerance."))
34        },
35        cudaError::cudaErrorMemoryAllocation => {
36            Err(rcudnn::Error::AllocFailed("Device memory allocation failed."))
37        },
38        status => {
39            Err(rcudnn::Error::Unknown("Unable to create the CUDA cuDNN context/resources.", status as i32 as u64))
40        }
41    }
42}
43
44/// Host Memory allocation
45/// # Arguments
46/// * `size` - memory size
47///
48/// # Errors
49///
50/// This function may return the following errors
51/// * [`rcudnn::Error`]
52///
53/// note: The actual size of memory allocated is automatically calculated
54///       by multiplying the size of the return type by the value of the argument passed.
55///       There is no need for the caller to multiply the size of the return type.
56pub fn malloc_host<T>(size: usize, flags:libc::c_uint) -> Result<*mut T,rcudnn::Error> {
57    let size = mem::size_of::<T>() * size;
58    let mut ptr: *mut T = null_mut();
59
60    match unsafe { rcudnn_sys::cudaHostAlloc(&mut ptr as *mut *mut T as *mut *mut libc::c_void, size, flags) } {
61        cudaError::cudaSuccess => {
62            assert_ne!(ptr,
63                       null_mut(),
64                       "cudaMalloc is succeeded, but returned null pointer!");
65            Ok(ptr)
66        },
67        cudaError::cudaErrorInvalidValue => {
68            Err(rcudnn::Error::InvalidValue("The range of one or more of the entered parameters is out of tolerance."))
69        },
70        cudaError::cudaErrorMemoryAllocation => {
71            Err(rcudnn::Error::AllocFailed("Device memory allocation failed."))
72        },
73        status => {
74            Err(rcudnn::Error::Unknown("Unable to create the CUDA cuDNN context/resources.", status as i32 as u64))
75        }
76    }
77}
78
79/// Sync Copy memory
80/// # Arguments
81/// * `dst` - copy destination
82/// * `src` - copy source
83/// * `size` - Size to copy (number of elements, not bytes)
84/// * `kind` - Type of memory copy defined by cuda
85///
86/// # Errors
87///
88/// This function may return the following errors
89/// * [`rcudnn::Error`]
90///
91pub fn memcpy<T>(dst: *mut T, src: *const T, size: usize, kind: cudaMemcpyKind) -> Result<(),rcudnn::Error> {
92    let size = mem::size_of::<T>() * size;
93
94    match unsafe {
95        rcudnn_sys::cudaMemcpy(dst as *mut libc::c_void, src as *mut libc::c_void, size, kind)
96    } {
97        cudaError::cudaSuccess => {
98            Ok(())
99        },
100        cudaError::cudaErrorInvalidMemcpyDirection => {
101            Err(rcudnn::Error::BadParam("Incorrect specification of memory transfer direction."))
102        },
103        status => {
104            Err(rcudnn::Error::Unknown("Unable to create the CUDA cuDNN context/resources.", status as i32 as u64))
105        }
106    }
107}
108
109/// Async Copy memory
110/// # Arguments
111/// * `dst` - copy destination
112/// * `src` - copy source
113/// * `size` - Size to copy (number of elements, not bytes)
114/// * `kind` - Type of memory copy defined by cuda
115/// * `stream` - cuda stream
116///
117/// # Errors
118///
119/// This function may return the following errors
120/// * [`rcudnn::Error`]
121///
122pub fn memcpy_async<T>(dst: *mut T, src: *const T, size: usize, kind: cudaMemcpyKind, stream: cudaStream_t) -> Result<(),rcudnn::Error> {
123    let size = mem::size_of::<T>() * size;
124    match unsafe {
125        rcudnn_sys::cudaMemcpyAsync(dst as *mut libc::c_void, src as *mut libc::c_void, size, kind, stream)
126    } {
127        cudaError::cudaSuccess => {
128            Ok(())
129        },
130        cudaError::cudaErrorInvalidMemcpyDirection => {
131            Err(rcudnn::Error::BadParam("Incorrect specification of memory transfer direction."))
132        },
133        status => {
134            Err(rcudnn::Error::Unknown("Unable to create the CUDA cuDNN context/resources.", status as i32 as u64))
135        }
136    }
137}
138
139/// free up memory
140/// # Arguments
141/// * `devptr` - Device Memory object to be free
142///
143/// # Errors
144///
145/// This function may return the following errors
146/// * [`rcudnn::Error`]
147///
148pub fn free<T>(devptr: *mut T) -> Result<(),rcudnn::Error> {
149    match unsafe { rcudnn_sys::cudaFree(devptr as *mut libc::c_void) } {
150        cudaError::cudaSuccess => Ok(()),
151        cudaError::cudaErrorInvalidValue => {
152            Err(rcudnn::Error::InvalidValue("Invalid pointer passed as argument."))
153        },
154        status => {
155            Err(rcudnn::Error::Unknown("Unable to create the CUDA cuDNN context/resources.", status as i32 as u64))
156        }
157    }
158}
159
160/// free up memory
161/// # Arguments
162/// * `devptr` - Host Memory object to be free
163///
164/// # Errors
165///
166/// This function may return the following errors
167/// * [`rcudnn::Error`]
168///
169pub fn free_host<T>(devptr: *mut T) -> Result<(),rcudnn::Error> {
170    match unsafe { rcudnn_sys::cudaFreeHost(devptr as *mut libc::c_void) } {
171        cudaError::cudaSuccess => Ok(()),
172        cudaError::cudaErrorInvalidValue => {
173            Err(rcudnn::Error::InvalidValue("Invalid pointer passed as argument."))
174        },
175        status => {
176            Err(rcudnn::Error::Unknown("Unable to create the CUDA cuDNN context/resources.", status as i32 as u64))
177        }
178    }
179}
180fn launch_with_stream(func: *const c_void,
181                          grid_dim: dim3,
182                          block_dim: dim3,
183                          args: &mut [*mut c_void],
184                          shared_mem: usize,
185                          stream:cuda_runtime_sys::cudaStream_t)
186                          -> Result<(),CudaRuntimeError> {
187    let cuda_error = unsafe {
188        cuda_runtime_sys::cudaLaunchKernel(func,
189                                           grid_dim,
190                                           block_dim,
191                                           args.as_mut_ptr(),
192                                           shared_mem,
193                                           stream)
194    };
195
196    if cuda_error == cuda_runtime_sys::cudaError::cudaSuccess {
197        Ok(())
198    } else {
199        Err(CudaRuntimeError::new(cuda_error))
200    }
201}
202fn launch_cooperative_with_stream(func: *const c_void,
203                      grid_dim: dim3,
204                      block_dim: dim3,
205                      args: &mut [*mut c_void],
206                      shared_mem: usize,
207                      stream:cuda_runtime_sys::cudaStream_t)
208                      -> Result<(),CudaRuntimeError> {
209    let cuda_error = unsafe {
210        cuda_runtime_sys::cudaLaunchCooperativeKernel(func,
211                                           grid_dim,
212                                           block_dim,
213                                           args.as_mut_ptr(),
214                                           shared_mem,
215                                           stream)
216    };
217
218    if cuda_error == cuda_runtime_sys::cudaError::cudaSuccess {
219        Ok(())
220    } else {
221        Err(CudaRuntimeError::new(cuda_error))
222    }
223}
224/// cuda kernel startup function
225/// # Arguments
226/// * `func` - Pointer to cuda kernel function
227/// * `grid_dim` - Number of dims in grid
228/// * `block_dim` - Number of blocks in grid
229/// * `args` - List of arguments passed to cuda kernel functions
230/// * `shared_mem` - Size (in bytes) of shared memory to allocate for use within cuda kernel functions.
231///
232/// # Errors
233///
234/// This function may return the following errors
235/// * [`CudaRuntimeError`]
236pub fn launch(func: *const c_void,
237              grid_dim: dim3,
238              block_dim: dim3,
239              args: &mut [*mut c_void],
240              shared_mem: usize)
241              -> Result<(),CudaRuntimeError> {
242    launch_with_stream(func,
243                       grid_dim,
244                       block_dim,
245                       args,
246                       shared_mem,
247                       null_mut())
248}
249/// cuda kernel startup function
250/// Launches a device function where thread blocks can cooperate and synchronize as they execute.
251/// # Arguments
252/// * `func` - Pointer to cuda kernel function
253/// * `grid_dim` - Number of dims in grid
254/// * `block_dim` - Number of blocks in grid
255/// * `args` - List of arguments passed to cuda kernel functions
256/// * `shared_mem` - Size (in bytes) of shared memory to allocate for use within cuda kernel functions.
257///
258/// # Errors
259///
260/// This function may return the following errors
261/// * [`CudaRuntimeError`]
262pub fn launch_cooperative(func: *const c_void,
263              grid_dim: dim3,
264              block_dim: dim3,
265              args: &mut [*mut c_void],
266              shared_mem: usize)
267              -> Result<(),CudaRuntimeError> {
268    launch_cooperative_with_stream(func,
269                       grid_dim,
270                       block_dim,
271                       args,
272                       shared_mem,
273                       null_mut())
274}
275/// Function that waits for the completion of the execution of the process passed to the Cuda kernel
276///
277/// # Errors
278///
279/// This function may return the following errors
280/// * [`CudaRuntimeError`]
281pub fn device_synchronize() -> Result<(),CudaRuntimeError> {
282    let cuda_error = unsafe { cuda_runtime_sys::cudaDeviceSynchronize() };
283
284    if cuda_error == cuda_runtime_sys::cudaError::cudaSuccess {
285        Ok(())
286    } else {
287        Err(CudaRuntimeError::new(cuda_error))
288    }
289}