pub struct LaunchConfig {
pub grid_dim: (u32, u32, u32),
pub block_dim: (u32, u32, u32),
pub shared_mem_bytes: u32,
}Expand description
Configuration for result::launch_kernel
See cuda docs for description of each parameter.
Fields§
§grid_dim: (u32, u32, u32)(width, height, depth) of grid in blocks
block_dim: (u32, u32, u32)(x, y, z) dimension of each thread block
Dynamic shared-memory size per thread block in bytes
Implementations§
Source§impl LaunchConfig
impl LaunchConfig
Sourcepub fn for_num_elems(n: u32) -> Self
pub fn for_num_elems(n: u32) -> Self
Creates a LaunchConfig with:
- block_dim ==
1024 - grid_dim ==
(n + 1023) / 1024 - shared_mem_bytes ==
0
Examples found in repository?
examples/05-device-repr.rs (line 51)
32fn main() -> Result<(), DriverError> {
33 let ctx = CudaContext::new(0)?;
34 let stream = ctx.default_stream();
35
36 let ptx = compile_ptx(PTX_SRC).unwrap();
37 let module = ctx.load_module(ptx)?;
38 let f = module.load_function("my_custom_kernel")?;
39
40 // try changing some of these values to see a device assert
41 let thing = MyCoolRustStruct {
42 a: 1.0,
43 b: 2.34,
44 c: 57,
45 d: 420,
46 };
47
48 let mut builder = stream.launch_builder(&f);
49 // since MyCoolRustStruct implements DeviceRepr, we can pass it to launch.
50 builder.arg(&thing);
51 unsafe { builder.launch(LaunchConfig::for_num_elems(1)) }?;
52
53 Ok(())
54}More examples
examples/03-launch-kernel.rs (line 23)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8 let stream = ctx.default_stream();
9
10 // You can load a function from a pre-compiled PTX like so:
11 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
12
13 // and then load a function from it:
14 let f = module.load_function("sin_kernel").unwrap();
15
16 let a_host = [1.0, 2.0, 3.0];
17
18 let a_dev = stream.memcpy_stod(&a_host)?;
19 let mut b_dev = a_dev.clone();
20
21 // we use a buidler pattern to launch kernels.
22 let n = 3i32;
23 let cfg = LaunchConfig::for_num_elems(n as u32);
24 let mut launch_args = stream.launch_builder(&f);
25 launch_args.arg(&mut b_dev);
26 launch_args.arg(&a_dev);
27 launch_args.arg(&n);
28 unsafe { launch_args.launch(cfg) }?;
29
30 let a_host_2 = stream.memcpy_dtov(&a_dev)?;
31 let b_host = stream.memcpy_dtov(&b_dev)?;
32
33 println!("Found {:?}", b_host);
34 println!("Expected {:?}", a_host.map(f32::sin));
35 assert_eq!(&a_host, a_host_2.as_slice());
36
37 Ok(())
38}examples/04-streams.rs (line 29)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8 let stream = ctx.default_stream();
9
10 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
11 let f = module.load_function("sin_kernel")?;
12
13 let n = 3i32;
14 let a_host = [1.0, 2.0, 3.0];
15 let a_dev = stream.memcpy_stod(&a_host)?;
16 let mut b_dev = stream.alloc_zeros::<f32>(n as usize)?;
17
18 // we can safely create a second stream using [CudaStream::fork()].
19 // This synchronizes with the source stream, so
20 // the `memcpy_vtod` & `alloc_zeros` above will complete **before**
21 // work on this stream can start.
22 let stream2 = stream.fork()?;
23
24 // now we launch this work on the other stream
25 let mut builder = stream2.launch_builder(&f);
26 builder.arg(&mut b_dev); // NOTE: tells cudarc that we are mutating this.
27 builder.arg(&a_dev); // NOTE: tells cudarc that we are reading from this slice
28 builder.arg(&n);
29 unsafe { builder.launch(LaunchConfig::for_num_elems(n as u32)) }?;
30
31 // cudarc automatically manages multi stream synchronization,
32 // so even though we launched the above on a separate stream,
33 // doing this device to host transfer will still properly synchronize.
34 // a_dev doesn't need to synchronize at all since we specified it is just
35 // being read from.
36 // b_dev DOES need to be synchronized, because it was mutated on a different stream.
37 let a_host_2 = stream.memcpy_dtov(&a_dev)?;
38 let b_host = stream.memcpy_dtov(&b_dev)?;
39
40 println!("Found {:?}", b_host);
41 println!("Expected {:?}", a_host.map(f32::sin));
42 assert_eq!(&a_host, a_host_2.as_slice());
43
44 Ok(())
45}examples/06-threading.rs (line 29)
12fn main() -> Result<(), DriverError> {
13 {
14 // Option 1: sharing ctx & module between threads
15 thread::scope(|s| {
16 let ptx = compile_ptx(KERNEL_SRC).unwrap();
17 let ctx = CudaContext::new(0)?;
18 let module = ctx.load_module(ptx)?;
19 for i in 0..10i32 {
20 let thread_ctx = ctx.clone();
21 let thread_module = module.clone();
22 s.spawn(move || {
23 let stream = thread_ctx.default_stream();
24 let f = thread_module.load_function("hello_world")?;
25 unsafe {
26 stream
27 .launch_builder(&f)
28 .arg(&i)
29 .launch(LaunchConfig::for_num_elems(1))
30 }
31 });
32 }
33 Ok(())
34 })?;
35 }
36
37 {
38 // Option 2: initializing different context in each
39 // Note that this will still schedule to the same stream since we are using the
40 // default stream here on the same device.
41 thread::scope(move |s| {
42 for i in 0..10i32 {
43 s.spawn(move || {
44 let ptx = compile_ptx(KERNEL_SRC).unwrap();
45 let ctx = CudaContext::new(0)?;
46 let module = ctx.load_module(ptx)?;
47 let stream = ctx.default_stream();
48 let f = module.load_function("hello_world")?;
49 unsafe {
50 stream
51 .launch_builder(&f)
52 .arg(&i)
53 .launch(LaunchConfig::for_num_elems(1))
54 }
55 });
56 }
57 Ok(())
58 })?;
59 }
60
61 Ok(())
62}Trait Implementations§
Source§impl Clone for LaunchConfig
impl Clone for LaunchConfig
Source§fn clone(&self) -> LaunchConfig
fn clone(&self) -> LaunchConfig
Returns a copy of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for LaunchConfig
impl Debug for LaunchConfig
impl Copy for LaunchConfig
Auto Trait Implementations§
impl Freeze for LaunchConfig
impl RefUnwindSafe for LaunchConfig
impl Send for LaunchConfig
impl Sync for LaunchConfig
impl Unpin for LaunchConfig
impl UnwindSafe for LaunchConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more