Struct LaunchConfig

Source

pub struct LaunchConfig {
    pub grid_dim: (u32, u32, u32),
    pub block_dim: (u32, u32, u32),
    pub shared_mem_bytes: u32,
}

Expand description

Configuration for result::launch_kernel

See cuda docs for description of each parameter.

Fields§

§grid_dim: (u32, u32, u32)

(width, height, depth) of grid in blocks

§block_dim: (u32, u32, u32)

(x, y, z) dimension of each thread block

§shared_mem_bytes: u32

Dynamic shared-memory size per thread block in bytes

Implementations§

Source §

impl LaunchConfig

Source

pub fn for_num_elems(n: u32) -> Self

Creates a LaunchConfig with:

block_dim == 1024
grid_dim == (n + 1023) / 1024
shared_mem_bytes == 0

Examples found in repository ?

examples/05-device-repr.rs (line 51)

32fn main() -> Result<(), DriverError> {
33    let ctx = CudaContext::new(0)?;
34    let stream = ctx.default_stream();
35
36    let ptx = compile_ptx(PTX_SRC).unwrap();
37    let module = ctx.load_module(ptx)?;
38    let f = module.load_function("my_custom_kernel")?;
39
40    // try changing some of these values to see a device assert
41    let thing = MyCoolRustStruct {
42        a: 1.0,
43        b: 2.34,
44        c: 57,
45        d: 420,
46    };
47
48    let mut builder = stream.launch_builder(&f);
49    // since MyCoolRustStruct implements DeviceRepr, we can pass it to launch.
50    builder.arg(&thing);
51    unsafe { builder.launch(LaunchConfig::for_num_elems(1)) }?;
52
53    Ok(())
54}

More examples

Hide additional examples

examples/03-launch-kernel.rs (line 23)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8    let stream = ctx.default_stream();
9
10    // You can load a function from a pre-compiled PTX like so:
11    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
12
13    // and then load a function from it:
14    let f = module.load_function("sin_kernel").unwrap();
15
16    let a_host = [1.0, 2.0, 3.0];
17
18    let a_dev = stream.memcpy_stod(&a_host)?;
19    let mut b_dev = a_dev.clone();
20
21    // we use a buidler pattern to launch kernels.
22    let n = 3i32;
23    let cfg = LaunchConfig::for_num_elems(n as u32);
24    let mut launch_args = stream.launch_builder(&f);
25    launch_args.arg(&mut b_dev);
26    launch_args.arg(&a_dev);
27    launch_args.arg(&n);
28    unsafe { launch_args.launch(cfg) }?;
29
30    let a_host_2 = stream.memcpy_dtov(&a_dev)?;
31    let b_host = stream.memcpy_dtov(&b_dev)?;
32
33    println!("Found {:?}", b_host);
34    println!("Expected {:?}", a_host.map(f32::sin));
35    assert_eq!(&a_host, a_host_2.as_slice());
36
37    Ok(())
38}

examples/04-streams.rs (line 29)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8    let stream = ctx.default_stream();
9
10    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
11    let f = module.load_function("sin_kernel")?;
12
13    let n = 3i32;
14    let a_host = [1.0, 2.0, 3.0];
15    let a_dev = stream.memcpy_stod(&a_host)?;
16    let mut b_dev = stream.alloc_zeros::<f32>(n as usize)?;
17
18    // we can safely create a second stream using [CudaStream::fork()].
19    // This synchronizes with the source stream, so
20    // the `memcpy_vtod` & `alloc_zeros` above will complete **before**
21    // work on this stream can start.
22    let stream2 = stream.fork()?;
23
24    // now we launch this work on the other stream
25    let mut builder = stream2.launch_builder(&f);
26    builder.arg(&mut b_dev); // NOTE: tells cudarc that we are mutating this.
27    builder.arg(&a_dev); // NOTE: tells cudarc that we are reading from this slice
28    builder.arg(&n);
29    unsafe { builder.launch(LaunchConfig::for_num_elems(n as u32)) }?;
30
31    // cudarc automatically manages multi stream synchronization,
32    // so even though we launched the above on a separate stream,
33    // doing this device to host transfer will still properly synchronize.
34    // a_dev doesn't need to synchronize at all since we specified it is just
35    // being read from.
36    // b_dev DOES need to be synchronized, because it was mutated on a different stream.
37    let a_host_2 = stream.memcpy_dtov(&a_dev)?;
38    let b_host = stream.memcpy_dtov(&b_dev)?;
39
40    println!("Found {:?}", b_host);
41    println!("Expected {:?}", a_host.map(f32::sin));
42    assert_eq!(&a_host, a_host_2.as_slice());
43
44    Ok(())
45}

examples/06-threading.rs (line 29)

12fn main() -> Result<(), DriverError> {
13    {
14        // Option 1: sharing ctx & module between threads
15        thread::scope(|s| {
16            let ptx = compile_ptx(KERNEL_SRC).unwrap();
17            let ctx = CudaContext::new(0)?;
18            let module = ctx.load_module(ptx)?;
19            for i in 0..10i32 {
20                let thread_ctx = ctx.clone();
21                let thread_module = module.clone();
22                s.spawn(move || {
23                    let stream = thread_ctx.default_stream();
24                    let f = thread_module.load_function("hello_world")?;
25                    unsafe {
26                        stream
27                            .launch_builder(&f)
28                            .arg(&i)
29                            .launch(LaunchConfig::for_num_elems(1))
30                    }
31                });
32            }
33            Ok(())
34        })?;
35    }
36
37    {
38        // Option 2: initializing different context in each
39        // Note that this will still schedule to the same stream since we are using the
40        // default stream here on the same device.
41        thread::scope(move |s| {
42            for i in 0..10i32 {
43                s.spawn(move || {
44                    let ptx = compile_ptx(KERNEL_SRC).unwrap();
45                    let ctx = CudaContext::new(0)?;
46                    let module = ctx.load_module(ptx)?;
47                    let stream = ctx.default_stream();
48                    let f = module.load_function("hello_world")?;
49                    unsafe {
50                        stream
51                            .launch_builder(&f)
52                            .arg(&i)
53                            .launch(LaunchConfig::for_num_elems(1))
54                    }
55                });
56            }
57            Ok(())
58        })?;
59    }
60
61    Ok(())
62}