1use cudarc::{
2 driver::{CudaContext, DriverError, LaunchConfig, PushKernelArg},
3 nvrtc::Ptx,
4};
5
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8 let stream = ctx.default_stream();
9
10 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
11 let f = module.load_function("sin_kernel")?;
12
13 let n = 3i32;
14 let a_host = [1.0, 2.0, 3.0];
15 let a_dev = stream.clone_htod(&a_host)?;
16 let mut b_dev = stream.alloc_zeros::<f32>(n as usize)?;
17
18 let stream2 = stream.fork()?;
23
24 let mut builder = stream2.launch_builder(&f);
26 builder.arg(&mut b_dev); builder.arg(&a_dev); builder.arg(&n);
29 unsafe { builder.launch(LaunchConfig::for_num_elems(n as u32)) }?;
30
31 let a_host_2 = stream.clone_dtoh(&a_dev)?;
38 let b_host = stream.clone_dtoh(&b_dev)?;
39
40 println!("Found {b_host:?}");
41 println!("Expected {:?}", a_host.map(f32::sin));
42 assert_eq!(&a_host, a_host_2.as_slice());
43
44 Ok(())
45}