pub unsafe extern "C" fn cuda_pull_array_async(
    x_gpu: *mut f32,
    x: *mut f32,
    n: usize
)