kn_cuda_sys/wrapper/rtc/mod.rs
1//! The NVRTC wrapper.
2//!
3//! The most important types are:
4//! * [CuModule](self::rtc::core::CuModule): a compiled cuda module
5//! * [CuFunction](self::rtc::core::CuFunction) a compiled cuda kernel
6//! * [KernelArgs](self::rtc::args::KernelArgs): a utility to encode kernel arguments as bytes.
7//!
8//! The typical workflow, compiling a (very inefficient) memcpy kernel:
9//! ```
10//! # use std::collections::HashMap;
11//! # use kn_cuda_sys::wrapper::handle::{CudaStream, CudaDevice};
12//! # use kn_cuda_sys::wrapper::rtc::args::KernelArgs;
13//! # use kn_cuda_sys::wrapper::rtc::core::{CuModule};
14//! # use kn_cuda_sys::wrapper::status::Status;
15//! // define the source code
16//! let source = r#"
17//! typedef unsigned char u8;
18//! __global__ void kernel_memcpy(u8* dst, u8* src, int n) {
19//! for (int i = 0; i < n; i++) {
20//! dst[i] = src[i];
21//! }
22//! }"#;
23//! let kernel_name = "kernel_memcpy";
24//!
25//! // select a device
26//! let device = CudaDevice::new(0).unwrap();
27//! let stream = CudaStream::new(device);
28//!
29//! // compile the module, indicating which function(s) we want to use later
30//! let result = CuModule::from_source(device, source, None, &[kernel_name], &HashMap::new());
31//!
32//! // print warnings and errors if any
33//! if !result.log.is_empty() {
34//! eprintln!("Source:\n{}\nLog:\n{}", result.source_with_line_numbers(), result.log);
35//! }
36//!
37//! // get the kernel function
38//! let kernel = result.get_function_by_name(kernel_name).unwrap().unwrap();
39//!
40//! // allocate inputs and outputs
41//! let n: i32 = 16;
42//! let ptr_dest = device.alloc(n as usize);
43//! let ptr_src = device.alloc(n as usize);
44//!
45//! unsafe {
46//! // build kernel args
47//! let mut args = KernelArgs::new();
48//! args.push(ptr_dest.ptr());
49//! args.push(ptr_src.ptr());
50//! args.push_int(n);
51//! let args = args.finish();
52//!
53//! // actually launch the kernel
54//! kernel.launch_kernel(1, 1, 0, &stream, &args).unwrap();
55//! }
56//!
57//! // wait for the kernel to complete
58//! stream.synchronize();
59//! ```
60//!
61//! Modules and functions are reference counted to enable automatic memory management.
62
63/// Kernel argument builder.
64pub mod args;
65/// Core abstractions and utilities.
66pub mod core;