Struct cudarc::driver::safe::LaunchConfig
source · pub struct LaunchConfig {
pub grid_dim: (u32, u32, u32),
pub block_dim: (u32, u32, u32),
pub shared_mem_bytes: u32,
}
Expand description
Configuration for result::launch_kernel
See cuda docs for description of each parameter.
Fields§
§grid_dim: (u32, u32, u32)
(width, height, depth) of grid in blocks
block_dim: (u32, u32, u32)
(x, y, z) dimension of each thread block
Dynamic shared-memory size per thread block in bytes
Implementations§
source§impl LaunchConfig
impl LaunchConfig
sourcepub fn for_num_elems(n: u32) -> Self
pub fn for_num_elems(n: u32) -> Self
Creates a LaunchConfig with:
- block_dim ==
1024
- grid_dim ==
(n + 1023) / 1024
- shared_mem_bytes ==
0
Examples found in repository?
examples/05-device-repr.rs (line 49)
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
fn main() -> Result<(), DriverError> {
let dev = CudaDevice::new(0)?;
let ptx = compile_ptx(PTX_SRC).unwrap();
dev.load_ptx(ptx, "module", &["my_custom_kernel"])?;
// try changing some of these values to see a device assert
let thing = MyCoolRustStruct {
a: 1.0,
b: 2.34,
c: 57,
d: 420,
};
let f = dev.get_func("module", "my_custom_kernel").unwrap();
// since MyCoolRustStruct implements DeviceRepr, we can pass it to launch.
unsafe { f.launch(LaunchConfig::for_num_elems(1), (thing,)) }?;
Ok(())
}
More examples
examples/03-launch-kernel.rs (line 21)
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
fn main() -> Result<(), DriverError> {
let dev = CudaDevice::new(0)?;
// You can load a function from a pre-compiled PTX like so:
dev.load_ptx(Ptx::from_file("./examples/sin.ptx"), "sin", &["sin_kernel"])?;
// and then retrieve the function with `get_func`
let f = dev.get_func("sin", "sin_kernel").unwrap();
let a_host = [1.0, 2.0, 3.0];
let a_dev = dev.htod_copy(a_host.into())?;
let mut b_dev = a_dev.clone();
let n = 3;
let cfg = LaunchConfig::for_num_elems(n);
unsafe { f.launch(cfg, (&mut b_dev, &a_dev, n as i32)) }?;
let a_host_2 = dev.sync_reclaim(a_dev)?;
let b_host = dev.sync_reclaim(b_dev)?;
println!("Found {:?}", b_host);
println!("Expected {:?}", a_host.map(f32::sin));
assert_eq!(&a_host, a_host_2.as_slice());
Ok(())
}
examples/04-streams.rs (line 11)
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
fn main() -> Result<(), DriverError> {
let dev = CudaDevice::new(0)?;
dev.load_ptx(Ptx::from_file("./examples/sin.ptx"), "sin", &["sin_kernel"])?;
let n = 3;
let cfg = LaunchConfig::for_num_elems(n);
let a_host = [1.0, 2.0, 3.0];
let a_dev = dev.htod_copy(a_host.into())?;
let mut b_dev = a_dev.clone();
// create a stream with `fork_default_stream()`
// This synchronizes with the default stream, so since
// we put this call **after** the `htod_copy` & `clone` above,
// cuda will complete those orders **before** work on this stream
// can start.
let stream = dev.fork_default_stream()?;
let f = dev.get_func("sin", "sin_kernel").unwrap();
// we launch it differently too
unsafe { f.launch_on_stream(&stream, cfg, (&mut b_dev, &a_dev, n as i32)) }?;
// and we must join with the default work stream in order for copies
// to work corrently.
// NOTE: this is actually async with respect to the host!
dev.wait_for(&stream)?;
let a_host_2 = dev.sync_reclaim(a_dev)?;
let b_host = dev.sync_reclaim(b_dev)?;
println!("Found {:?}", b_host);
println!("Expected {:?}", a_host.map(f32::sin));
assert_eq!(&a_host, a_host_2.as_slice());
Ok(())
}
Trait Implementations§
source§impl Clone for LaunchConfig
impl Clone for LaunchConfig
source§fn clone(&self) -> LaunchConfig
fn clone(&self) -> LaunchConfig
Returns a copy of the value. Read more
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moresource§impl Debug for LaunchConfig
impl Debug for LaunchConfig
impl Copy for LaunchConfig
Auto Trait Implementations§
impl Freeze for LaunchConfig
impl RefUnwindSafe for LaunchConfig
impl Send for LaunchConfig
impl Sync for LaunchConfig
impl Unpin for LaunchConfig
impl UnwindSafe for LaunchConfig
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more