1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
use cudarc::{
driver::{CudaDevice, DriverError, LaunchAsync, LaunchConfig},
nvrtc::Ptx,
};
fn main() -> Result<(), DriverError> {
let dev = CudaDevice::new(0)?;
dev.load_ptx(Ptx::from_file("./examples/sin.ptx"), "sin", &["sin_kernel"])?;
let f = dev.get_func("sin", "sin_kernel").unwrap();
let a_host = [1.0, 2.0, 3.0];
let a_dev = dev.htod_copy(a_host.into())?;
let mut b_dev = a_dev.clone();
let n = 3;
let cfg = LaunchConfig::for_num_elems(n);
unsafe { f.launch(cfg, (&mut b_dev, &a_dev, n as i32)) }?;
let a_host_2 = dev.sync_reclaim(a_dev)?;
let b_host = dev.sync_reclaim(b_dev)?;
println!("Found {:?}", b_host);
println!("Expected {:?}", a_host.map(f32::sin));
assert_eq!(&a_host, a_host_2.as_slice());
Ok(())
}