Function cudarc::nvrtc::safe::compile_ptx

source ·
pub fn compile_ptx<S: AsRef<str>>(src: S) -> Result<Ptx, CompileError>
Expand description

Calls compile_ptx_with_opts with no options. src is the source string of a .cu file.

Example:

let ptx = compile_ptx("extern \"C\" __global__ void kernel() { }").unwrap();
Examples found in repository?
examples/05-device-repr.rs (line 35)
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
fn main() -> Result<(), DriverError> {
    let dev = CudaDevice::new(0)?;

    let ptx = compile_ptx(PTX_SRC).unwrap();
    dev.load_ptx(ptx, "module", &["my_custom_kernel"])?;

    // try changing some of these values to see a device assert
    let thing = MyCoolRustStruct {
        a: 1.0,
        b: 2.34,
        c: 57,
        d: 420,
    };

    let f = dev.get_func("module", "my_custom_kernel").unwrap();

    // since MyCoolRustStruct implements DeviceRepr, we can pass it to launch.
    unsafe { f.launch(LaunchConfig::for_num_elems(1), (thing,)) }?;

    Ok(())
}
More examples
Hide additional examples
examples/matmul-kernel.rs (line 25)
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() -> Result<(), DriverError> {
    let start = std::time::Instant::now();

    let ptx = compile_ptx(PTX_SRC).unwrap();
    println!("Compilation succeeded in {:?}", start.elapsed());

    let dev = CudaDevice::new(0)?;
    println!("Built in {:?}", start.elapsed());

    dev.load_ptx(ptx, "matmul", &["matmul"])?;
    let f = dev.get_func("matmul", "matmul").unwrap();
    println!("Loaded in {:?}", start.elapsed());

    let a_host = [1.0f32, 2.0, 3.0, 4.0];
    let b_host = [1.0f32, 2.0, 3.0, 4.0];
    let mut c_host = [0.0f32; 4];

    let a_dev = dev.htod_sync_copy(&a_host)?;
    let b_dev = dev.htod_sync_copy(&b_host)?;
    let mut c_dev = dev.htod_sync_copy(&c_host)?;

    println!("Copied in {:?}", start.elapsed());

    let cfg = LaunchConfig {
        block_dim: (2, 2, 1),
        grid_dim: (1, 1, 1),
        shared_mem_bytes: 0,
    };
    unsafe { f.launch(cfg, (&a_dev, &b_dev, &mut c_dev, 2i32)) }?;

    dev.dtoh_sync_copy_into(&c_dev, &mut c_host)?;
    println!("Found {:?} in {:?}", c_host, start.elapsed());
    Ok(())
}
examples/06-threading.rs (line 26)
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
fn main() -> Result<(), DriverError> {
    let cfg = LaunchConfig {
        grid_dim: (1, 1, 1),
        block_dim: (1, 1, 1),
        shared_mem_bytes: 0,
    };

    {
        // Option 1: use the same device on each thread.
        // This requires calling the CudaDevice::bind_to_thread() method.
        // Note that all kernels are submitted to the same stream/context,
        // so the kernels will still execute in sequentially in the order
        // they are submitted to the gpu.
        let dev = CudaDevice::new(0)?;
        let ptx = compile_ptx(KERNEL_SRC).unwrap();
        dev.load_ptx(ptx, "kernel", &["hello_world"])?;

        // explicit borrow so we don't have to re-clone the device for each thread
        let dev = &dev;

        thread::scope(move |s| {
            for i in 0..10i32 {
                s.spawn(move || {
                    // NOTE: this is the important call to have
                    // without this, you'll get a CUDA_ERROR_INVALID_CONTEXT
                    dev.bind_to_thread()?;
                    let f = dev.get_func("kernel", "hello_world").unwrap();
                    unsafe { f.launch(cfg, (i,)) }
                });
            }
        });
    }

    {
        // Option 2: create a new device in each thread
        // This requires loading the PTX for each device, since they won't
        // share a loaded modules on the Rust side of things.
        let ptx = compile_ptx(KERNEL_SRC).unwrap();

        thread::scope(|s| {
            for i in 0..10i32 {
                let ptx = ptx.clone();
                s.spawn(move || {
                    let dev = CudaDevice::new(0)?;
                    dev.load_ptx(ptx, "kernel", &["hello_world"])?;
                    let f = dev.get_func("kernel", "hello_world").unwrap();
                    unsafe { f.launch(cfg, (i + 100,)) }
                });
            }
        });
    }

    Ok(())
}