Struct LaunchConfig

Source
pub struct LaunchConfig {
    pub grid_dim: (u32, u32, u32),
    pub block_dim: (u32, u32, u32),
    pub shared_mem_bytes: u32,
}
Expand description

Configuration for result::launch_kernel

See cuda docs for description of each parameter.

Fields§

§grid_dim: (u32, u32, u32)

(width, height, depth) of grid in blocks

§block_dim: (u32, u32, u32)

(x, y, z) dimension of each thread block

§shared_mem_bytes: u32

Dynamic shared-memory size per thread block in bytes

Implementations§

Source§

impl LaunchConfig

Source

pub fn for_num_elems(n: u32) -> Self

Creates a LaunchConfig with:

  • block_dim == 1024
  • grid_dim == (n + 1023) / 1024
  • shared_mem_bytes == 0
Examples found in repository?
examples/05-device-repr.rs (line 51)
32fn main() -> Result<(), DriverError> {
33    let ctx = CudaContext::new(0)?;
34    let stream = ctx.default_stream();
35
36    let ptx = compile_ptx(PTX_SRC).unwrap();
37    let module = ctx.load_module(ptx)?;
38    let f = module.load_function("my_custom_kernel")?;
39
40    // try changing some of these values to see a device assert
41    let thing = MyCoolRustStruct {
42        a: 1.0,
43        b: 2.34,
44        c: 57,
45        d: 420,
46    };
47
48    let mut builder = stream.launch_builder(&f);
49    // since MyCoolRustStruct implements DeviceRepr, we can pass it to launch.
50    builder.arg(&thing);
51    unsafe { builder.launch(LaunchConfig::for_num_elems(1)) }?;
52
53    Ok(())
54}
More examples
Hide additional examples
examples/03-launch-kernel.rs (line 23)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8    let stream = ctx.default_stream();
9
10    // You can load a function from a pre-compiled PTX like so:
11    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
12
13    // and then load a function from it:
14    let f = module.load_function("sin_kernel").unwrap();
15
16    let a_host = [1.0, 2.0, 3.0];
17
18    let a_dev = stream.memcpy_stod(&a_host)?;
19    let mut b_dev = a_dev.clone();
20
21    // we use a buidler pattern to launch kernels.
22    let n = 3i32;
23    let cfg = LaunchConfig::for_num_elems(n as u32);
24    let mut launch_args = stream.launch_builder(&f);
25    launch_args.arg(&mut b_dev);
26    launch_args.arg(&a_dev);
27    launch_args.arg(&n);
28    unsafe { launch_args.launch(cfg) }?;
29
30    let a_host_2 = stream.memcpy_dtov(&a_dev)?;
31    let b_host = stream.memcpy_dtov(&b_dev)?;
32
33    println!("Found {:?}", b_host);
34    println!("Expected {:?}", a_host.map(f32::sin));
35    assert_eq!(&a_host, a_host_2.as_slice());
36
37    Ok(())
38}
examples/04-streams.rs (line 29)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8    let stream = ctx.default_stream();
9
10    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
11    let f = module.load_function("sin_kernel")?;
12
13    let n = 3i32;
14    let a_host = [1.0, 2.0, 3.0];
15    let a_dev = stream.memcpy_stod(&a_host)?;
16    let mut b_dev = stream.alloc_zeros::<f32>(n as usize)?;
17
18    // we can safely create a second stream using [CudaStream::fork()].
19    // This synchronizes with the source stream, so
20    // the `memcpy_vtod` & `alloc_zeros` above will complete **before**
21    // work on this stream can start.
22    let stream2 = stream.fork()?;
23
24    // now we launch this work on the other stream
25    let mut builder = stream2.launch_builder(&f);
26    builder.arg(&mut b_dev); // NOTE: tells cudarc that we are mutating this.
27    builder.arg(&a_dev); // NOTE: tells cudarc that we are reading from this slice
28    builder.arg(&n);
29    unsafe { builder.launch(LaunchConfig::for_num_elems(n as u32)) }?;
30
31    // cudarc automatically manages multi stream synchronization,
32    // so even though we launched the above on a separate stream,
33    // doing this device to host transfer will still properly synchronize.
34    // a_dev doesn't need to synchronize at all since we specified it is just
35    // being read from.
36    // b_dev DOES need to be synchronized, because it was mutated on a different stream.
37    let a_host_2 = stream.memcpy_dtov(&a_dev)?;
38    let b_host = stream.memcpy_dtov(&b_dev)?;
39
40    println!("Found {:?}", b_host);
41    println!("Expected {:?}", a_host.map(f32::sin));
42    assert_eq!(&a_host, a_host_2.as_slice());
43
44    Ok(())
45}
examples/06-threading.rs (line 29)
12fn main() -> Result<(), DriverError> {
13    {
14        // Option 1: sharing ctx & module between threads
15        thread::scope(|s| {
16            let ptx = compile_ptx(KERNEL_SRC).unwrap();
17            let ctx = CudaContext::new(0)?;
18            let module = ctx.load_module(ptx)?;
19            for i in 0..10i32 {
20                let thread_ctx = ctx.clone();
21                let thread_module = module.clone();
22                s.spawn(move || {
23                    let stream = thread_ctx.default_stream();
24                    let f = thread_module.load_function("hello_world")?;
25                    unsafe {
26                        stream
27                            .launch_builder(&f)
28                            .arg(&i)
29                            .launch(LaunchConfig::for_num_elems(1))
30                    }
31                });
32            }
33            Ok(())
34        })?;
35    }
36
37    {
38        // Option 2: initializing different context in each
39        // Note that this will still schedule to the same stream since we are using the
40        // default stream here on the same device.
41        thread::scope(move |s| {
42            for i in 0..10i32 {
43                s.spawn(move || {
44                    let ptx = compile_ptx(KERNEL_SRC).unwrap();
45                    let ctx = CudaContext::new(0)?;
46                    let module = ctx.load_module(ptx)?;
47                    let stream = ctx.default_stream();
48                    let f = module.load_function("hello_world")?;
49                    unsafe {
50                        stream
51                            .launch_builder(&f)
52                            .arg(&i)
53                            .launch(LaunchConfig::for_num_elems(1))
54                    }
55                });
56            }
57            Ok(())
58        })?;
59    }
60
61    Ok(())
62}

Trait Implementations§

Source§

impl Clone for LaunchConfig

Source§

fn clone(&self) -> LaunchConfig

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for LaunchConfig

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Copy for LaunchConfig

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.