pub struct Module { /* private fields */ }
Expand description
A compiled CUDA module, loaded into a context.
Implementations§
Source§impl Module
impl Module
Sourcepub fn load_from_file(filename: &CStr) -> CudaResult<Module>
pub fn load_from_file(filename: &CStr) -> CudaResult<Module>
Load a module from the given file name into the current context.
The given file should be either a cubin file, a ptx file, or a fatbin file such as
those produced by nvcc
.
§Example
use rustacuda::module::Module;
use std::ffi::CString;
let filename = CString::new("./resources/add.ptx")?;
let module = Module::load_from_file(&filename)?;
Sourcepub fn load_from_string(image: &CStr) -> CudaResult<Module>
pub fn load_from_string(image: &CStr) -> CudaResult<Module>
Load a module from a CStr.
This is useful in combination with include_str!
, to include the device code into the
compiled executable.
The given CStr must contain the bytes of a cubin file, a ptx file or a fatbin file such as
those produced by nvcc
.
§Example
use rustacuda::module::Module;
use std::ffi::CString;
let image = CString::new(include_str!("../resources/add.ptx"))?;
let module = Module::load_from_string(&image)?;
Examples found in repository?
8fn main() -> Result<(), Box<dyn Error>> {
9 // Set up the context, load the module, and create a stream to run kernels in.
10 rustacuda::init(CudaFlags::empty())?;
11 let device = Device::get_device(0)?;
12 let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
13
14 let ptx = CString::new(include_str!("../resources/add.ptx"))?;
15 let module = Module::load_from_string(&ptx)?;
16 let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
17
18 // Create buffers for data
19 let mut in_x = DeviceBuffer::from_slice(&[1.0f32; 10])?;
20 let mut in_y = DeviceBuffer::from_slice(&[2.0f32; 10])?;
21 let mut out_1 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
22 let mut out_2 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
23
24 // This kernel adds each element in `in_x` and `in_y` and writes the result into `out`.
25 unsafe {
26 // Launch the kernel with one block of one thread, no dynamic shared memory on `stream`.
27 let result = launch!(module.sum<<<1, 1, 0, stream>>>(
28 in_x.as_device_ptr(),
29 in_y.as_device_ptr(),
30 out_1.as_device_ptr(),
31 out_1.len()
32 ));
33 result?;
34
35 // Launch the kernel again using the `function` form:
36 let function_name = CString::new("sum")?;
37 let sum = module.get_function(&function_name)?;
38 // Launch with 1x1x1 (1) blocks of 10x1x1 (10) threads, to show that you can use tuples to
39 // configure grid and block size.
40 let result = launch!(sum<<<(1, 1, 1), (10, 1, 1), 0, stream>>>(
41 in_x.as_device_ptr(),
42 in_y.as_device_ptr(),
43 out_2.as_device_ptr(),
44 out_2.len()
45 ));
46 result?;
47 }
48
49 // Kernel launches are asynchronous, so we wait for the kernels to finish executing.
50 stream.synchronize()?;
51
52 // Copy the results back to host memory
53 let mut out_host = [0.0f32; 20];
54 out_1.copy_to(&mut out_host[0..10])?;
55 out_2.copy_to(&mut out_host[10..20])?;
56
57 for x in out_host.iter() {
58 assert_eq!(3.0 as u32, *x as u32);
59 }
60
61 println!("Launched kernel successfully.");
62 Ok(())
63}
Sourcepub fn get_global<'a, T: DeviceCopy>(
&'a self,
name: &CStr,
) -> CudaResult<Symbol<'a, T>>
pub fn get_global<'a, T: DeviceCopy>( &'a self, name: &CStr, ) -> CudaResult<Symbol<'a, T>>
Get a reference to a global symbol, which can then be copied to/from.
§Panics:
This function panics if the size of the symbol is not the same as the mem::sizeof<T>()
.
§Examples
use rustacuda::module::Module;
use std::ffi::CString;
let ptx = CString::new(include_str!("../resources/add.ptx"))?;
let module = Module::load_from_string(&ptx)?;
let name = CString::new("my_constant")?;
let symbol = module.get_global::<u32>(&name)?;
let mut host_const = 0;
symbol.copy_to(&mut host_const)?;
assert_eq!(314, host_const);
Sourcepub fn get_function<'a>(&'a self, name: &CStr) -> CudaResult<Function<'a>>
pub fn get_function<'a>(&'a self, name: &CStr) -> CudaResult<Function<'a>>
Get a reference to a kernel function which can then be launched.
§Examples
use rustacuda::module::Module;
use std::ffi::CString;
let ptx = CString::new(include_str!("../resources/add.ptx"))?;
let module = Module::load_from_string(&ptx)?;
let name = CString::new("sum")?;
let function = module.get_function(&name)?;
Examples found in repository?
8fn main() -> Result<(), Box<dyn Error>> {
9 // Set up the context, load the module, and create a stream to run kernels in.
10 rustacuda::init(CudaFlags::empty())?;
11 let device = Device::get_device(0)?;
12 let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
13
14 let ptx = CString::new(include_str!("../resources/add.ptx"))?;
15 let module = Module::load_from_string(&ptx)?;
16 let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
17
18 // Create buffers for data
19 let mut in_x = DeviceBuffer::from_slice(&[1.0f32; 10])?;
20 let mut in_y = DeviceBuffer::from_slice(&[2.0f32; 10])?;
21 let mut out_1 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
22 let mut out_2 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
23
24 // This kernel adds each element in `in_x` and `in_y` and writes the result into `out`.
25 unsafe {
26 // Launch the kernel with one block of one thread, no dynamic shared memory on `stream`.
27 let result = launch!(module.sum<<<1, 1, 0, stream>>>(
28 in_x.as_device_ptr(),
29 in_y.as_device_ptr(),
30 out_1.as_device_ptr(),
31 out_1.len()
32 ));
33 result?;
34
35 // Launch the kernel again using the `function` form:
36 let function_name = CString::new("sum")?;
37 let sum = module.get_function(&function_name)?;
38 // Launch with 1x1x1 (1) blocks of 10x1x1 (10) threads, to show that you can use tuples to
39 // configure grid and block size.
40 let result = launch!(sum<<<(1, 1, 1), (10, 1, 1), 0, stream>>>(
41 in_x.as_device_ptr(),
42 in_y.as_device_ptr(),
43 out_2.as_device_ptr(),
44 out_2.len()
45 ));
46 result?;
47 }
48
49 // Kernel launches are asynchronous, so we wait for the kernels to finish executing.
50 stream.synchronize()?;
51
52 // Copy the results back to host memory
53 let mut out_host = [0.0f32; 20];
54 out_1.copy_to(&mut out_host[0..10])?;
55 out_2.copy_to(&mut out_host[10..20])?;
56
57 for x in out_host.iter() {
58 assert_eq!(3.0 as u32, *x as u32);
59 }
60
61 println!("Launched kernel successfully.");
62 Ok(())
63}
Sourcepub fn drop(module: Module) -> DropResult<Module>
pub fn drop(module: Module) -> DropResult<Module>
Destroy a Module
, returning an error.
Destroying a module can return errors from previous asynchronous work. This function destroys the given module and returns the error and the un-destroyed module on failure.
§Example
use rustacuda::module::Module;
use std::ffi::CString;
let ptx = CString::new(include_str!("../resources/add.ptx"))?;
let module = Module::load_from_string(&ptx)?;
match Module::drop(module) {
Ok(()) => println!("Successfully destroyed"),
Err((e, module)) => {
println!("Failed to destroy module: {:?}", e);
// Do something with module
},
}