1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
use cudarc::driver::*;
use cudarc::nvrtc::compile_ptx;
use std::thread;
const KERNEL_SRC: &str = "
extern \"C\" __global__ void hello_world(int i) {
printf(\"Hello from the cuda kernel in thread %d\\n\", i);
}
";
fn main() -> Result<(), DriverError> {
{
// Option 1: sharing ctx & module between threads
thread::scope(|s| {
let ptx = compile_ptx(KERNEL_SRC).unwrap();
let ctx = CudaContext::new(0)?;
let module = ctx.load_module(ptx)?;
for i in 0..10i32 {
let thread_ctx = ctx.clone();
let thread_module = module.clone();
s.spawn(move || {
let stream = thread_ctx.default_stream();
let f = thread_module.load_function("hello_world")?;
unsafe {
stream
.launch_builder(&f)
.arg(&i)
.launch(LaunchConfig::for_num_elems(1))
}
});
}
Ok(())
})?;
}
{
// Option 2: initializing different context in each
// Note that this will still schedule to the same stream since we are using the
// default stream here on the same device.
thread::scope(move |s| {
for i in 0..10i32 {
s.spawn(move || {
let ptx = compile_ptx(KERNEL_SRC).unwrap();
let ctx = CudaContext::new(0)?;
let module = ctx.load_module(ptx)?;
let stream = ctx.default_stream();
let f = module.load_function("hello_world")?;
unsafe {
stream
.launch_builder(&f)
.arg(&i)
.launch(LaunchConfig::for_num_elems(1))
}
});
}
Ok(())
})?;
}
Ok(())
}