pub unsafe fn cudaMallocAsync( devPtr: *mut *mut c_void, size: usize, hStream: cudaStream_t, ) -> cudaError_t