1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
//! The NVRTC wrapper.
//!
//! The most important types are:
//! * [CuModule](self::rtc::core::CuModule): a compiled cuda module
//! * [CuFunction](self::rtc::core::CuFunction) a compiled cuda kernel
//! * [KernelArgs](self::rtc::args::KernelArgs): a utility to encode kernel arguments as bytes.
//!
//! The typical workflow, compiling a (very inefficient) memcpy kernel:
//! ```
//! # use std::collections::HashMap;
//! # use kn_cuda_sys::wrapper::handle::{CudaStream, CudaDevice};
//! # use kn_cuda_sys::wrapper::rtc::args::KernelArgs;
//! # use kn_cuda_sys::wrapper::rtc::core::{CuModule};
//! # use kn_cuda_sys::wrapper::status::Status;
//! // define the source code
//! let source = r#"
//! typedef unsigned char u8;
//! __global__ void kernel_memcpy(u8* dst, u8* src, int n) {
//!     for (int i = 0; i < n; i++) {
//!       dst[i] = src[i];
//!     }
//! }"#;
//! let kernel_name = "kernel_memcpy";
//!
//! // select a device
//! let device = CudaDevice::new(0).unwrap();
//! let stream = CudaStream::new(device);
//!
//! // compile the module, indicating which function(s) we want to use later
//! let result = CuModule::from_source(device, source, None, &[kernel_name], &HashMap::new());
//!
//! // print warnings and errors if any
//! if !result.log.is_empty() {
//!     eprintln!("Source:\n{}\nLog:\n{}", result.source_with_line_numbers(), result.log);
//! }
//!
//! // get the kernel function
//! let kernel = result.get_function_by_name(kernel_name).unwrap().unwrap();
//!
//! // allocate inputs and outputs
//! let n: i32 = 16;
//! let ptr_dest = device.alloc(n as usize);
//! let ptr_src = device.alloc(n as usize);
//!
//! unsafe {
//!     // build kernel args
//!     let mut args = KernelArgs::new();
//!     args.push(ptr_dest.ptr());
//!     args.push(ptr_src.ptr());
//!     args.push_int(n);
//!     let args = args.finish();
//!
//!     // actually launch the kernel
//!     kernel.launch_kernel(1, 1, 0, &stream, &args).unwrap();
//! }
//!
//! // wait for the kernel to complete
//! stream.synchronize();
//! ```
//!
//! Modules and functions are reference counted to enable automatic memory management.

/// Kernel argument builder.
pub mod args;
/// Core abstractions and utilities.
pub mod core;