pub unsafe extern "C" fn cuda_malloc_async( size: u64, stream: *mut c_void, gpu_index: u32, ) -> *mut c_void