Struct cudarc::driver::safe::LaunchConfig

source ·
pub struct LaunchConfig {
    pub grid_dim: (u32, u32, u32),
    pub block_dim: (u32, u32, u32),
    pub shared_mem_bytes: u32,
}
Expand description

Configuration for result::launch_kernel

See cuda docs for description of each parameter.

Fields§

§grid_dim: (u32, u32, u32)

(width, height, depth) of grid in blocks

§block_dim: (u32, u32, u32)

(x, y, z) dimension of each thread block

§shared_mem_bytes: u32

Dynamic shared-memory size per thread block in bytes

Implementations§

source§

impl LaunchConfig

source

pub fn for_num_elems(n: u32) -> Self

Creates a LaunchConfig with:

  • block_dim == 1024
  • grid_dim == (n + 1023) / 1024
  • shared_mem_bytes == 0
Examples found in repository?
examples/05-device-repr.rs (line 49)
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
fn main() -> Result<(), DriverError> {
    let dev = CudaDevice::new(0)?;

    let ptx = compile_ptx(PTX_SRC).unwrap();
    dev.load_ptx(ptx, "module", &["my_custom_kernel"])?;

    // try changing some of these values to see a device assert
    let thing = MyCoolRustStruct {
        a: 1.0,
        b: 2.34,
        c: 57,
        d: 420,
    };

    let f = dev.get_func("module", "my_custom_kernel").unwrap();

    // since MyCoolRustStruct implements DeviceRepr, we can pass it to launch.
    unsafe { f.launch(LaunchConfig::for_num_elems(1), (thing,)) }?;

    Ok(())
}
More examples
Hide additional examples
examples/03-launch-kernel.rs (line 21)
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
fn main() -> Result<(), DriverError> {
    let dev = CudaDevice::new(0)?;

    // You can load a function from a pre-compiled PTX like so:
    dev.load_ptx(Ptx::from_file("./examples/sin.ptx"), "sin", &["sin_kernel"])?;

    // and then retrieve the function with `get_func`
    let f = dev.get_func("sin", "sin_kernel").unwrap();

    let a_host = [1.0, 2.0, 3.0];

    let a_dev = dev.htod_copy(a_host.into())?;
    let mut b_dev = a_dev.clone();

    let n = 3;
    let cfg = LaunchConfig::for_num_elems(n);
    unsafe { f.launch(cfg, (&mut b_dev, &a_dev, n as i32)) }?;

    let a_host_2 = dev.sync_reclaim(a_dev)?;
    let b_host = dev.sync_reclaim(b_dev)?;

    println!("Found {:?}", b_host);
    println!("Expected {:?}", a_host.map(f32::sin));
    assert_eq!(&a_host, a_host_2.as_slice());

    Ok(())
}
examples/04-streams.rs (line 11)
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
fn main() -> Result<(), DriverError> {
    let dev = CudaDevice::new(0)?;
    dev.load_ptx(Ptx::from_file("./examples/sin.ptx"), "sin", &["sin_kernel"])?;

    let n = 3;
    let cfg = LaunchConfig::for_num_elems(n);

    let a_host = [1.0, 2.0, 3.0];
    let a_dev = dev.htod_copy(a_host.into())?;
    let mut b_dev = a_dev.clone();

    // create a stream with `fork_default_stream()`
    // This synchronizes with the default stream, so since
    // we put this call **after** the `htod_copy` & `clone` above,
    // cuda will complete those orders **before** work on this stream
    // can start.
    let stream = dev.fork_default_stream()?;

    let f = dev.get_func("sin", "sin_kernel").unwrap();

    // we launch it differently too
    unsafe { f.launch_on_stream(&stream, cfg, (&mut b_dev, &a_dev, n as i32)) }?;

    // and we must join with the default work stream in order for copies
    // to work corrently.
    // NOTE: this is actually async with respect to the host!
    dev.wait_for(&stream)?;

    let a_host_2 = dev.sync_reclaim(a_dev)?;
    let b_host = dev.sync_reclaim(b_dev)?;

    println!("Found {:?}", b_host);
    println!("Expected {:?}", a_host.map(f32::sin));
    assert_eq!(&a_host, a_host_2.as_slice());

    Ok(())
}

Trait Implementations§

source§

impl Clone for LaunchConfig

source§

fn clone(&self) -> LaunchConfig

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for LaunchConfig

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Copy for LaunchConfig

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.