pub struct Program { /* private fields */ }
Expand description
Abstraction that contains everything to run an OpenCL kernel on a GPU.
The majority of methods are the same as crate::cuda::Program
, so you can write code using this
API, which will then work with OpenCL as well as CUDA kernels.
Implementations§
Source§impl Program
impl Program
Sourcepub fn device_name(&self) -> &str
pub fn device_name(&self) -> &str
Returns the name of the GPU, e.g. “GeForce RTX 3090”.
Sourcepub fn from_opencl(device: &Device, src: &str) -> Result<Program, GPUError>
pub fn from_opencl(device: &Device, src: &str) -> Result<Program, GPUError>
Creates a program for a specific device from OpenCL source code.
Sourcepub fn from_binary(device: &Device, bin: Vec<u8>) -> Result<Program, GPUError>
pub fn from_binary(device: &Device, bin: Vec<u8>) -> Result<Program, GPUError>
Creates a program for a specific device from a compiled OpenCL binary.
Sourcepub unsafe fn create_buffer<T>(
&self,
length: usize,
) -> Result<Buffer<T>, GPUError>
pub unsafe fn create_buffer<T>( &self, length: usize, ) -> Result<Buffer<T>, GPUError>
Creates a new buffer that can be used for input/output with the GPU.
The length
is the number of elements to create.
It is usually used to create buffers that are initialized by the GPU. If you want to
directly transfer data from the host to the GPU, you would use the safe
Program::create_buffer_from_slice
instead.
§Safety
This function isn’t actually unsafe, it’s marked as unsafe
due to the CUDA version of it,
where it is unsafe. This is done to have symmetry between both APIs.
Examples found in repository?
21pub fn main() {
22 // Define some data that should be operated on.
23 let aa: Vec<u32> = vec![1, 2, 3, 4];
24 let bb: Vec<u32> = vec![5, 6, 7, 8];
25
26 // This is the core. Here we write the interaction with the GPU independent of whether it is
27 // CUDA or OpenCL.
28 let closures = program_closures!(|program, _args| -> Result<Vec<u32>, GPUError> {
29 // Make sure the input data has the same length.
30 assert_eq!(aa.len(), bb.len());
31 let length = aa.len();
32
33 // Copy the data to the GPU.
34 let aa_buffer = program.create_buffer_from_slice(&aa)?;
35 let bb_buffer = program.create_buffer_from_slice(&bb)?;
36
37 // The result buffer has the same length as the input buffers.
38 let result_buffer = unsafe { program.create_buffer::<u32>(length)? };
39
40 // Get the kernel.
41 let kernel = program.create_kernel("add", 1, 1)?;
42
43 // Execute the kernel.
44 kernel
45 .arg(&(length as u32))
46 .arg(&aa_buffer)
47 .arg(&bb_buffer)
48 .arg(&result_buffer)
49 .run()?;
50
51 // Get the resulting data.
52 let mut result = vec![0u32; length];
53 program.read_into_buffer(&result_buffer, &mut result)?;
54
55 Ok(result)
56 });
57
58 // Get the first available device.
59 let device = *Device::all().first().unwrap();
60
61 // First we run it on CUDA.
62 let cuda_program = cuda(device);
63 let cuda_result = cuda_program.run(closures, ()).unwrap();
64 assert_eq!(cuda_result, [6, 8, 10, 12]);
65 println!("CUDA result: {:?}", cuda_result);
66
67 // Then we run it on OpenCL.
68 let opencl_program = opencl(device);
69 let opencl_result = opencl_program.run(closures, ()).unwrap();
70 assert_eq!(opencl_result, [6, 8, 10, 12]);
71 println!("OpenCL result: {:?}", opencl_result);
72}
Sourcepub fn create_buffer_from_slice<T>(
&self,
slice: &[T],
) -> Result<Buffer<T>, GPUError>
pub fn create_buffer_from_slice<T>( &self, slice: &[T], ) -> Result<Buffer<T>, GPUError>
Creates a new buffer on the GPU and initializes with the given slice.
Examples found in repository?
21pub fn main() {
22 // Define some data that should be operated on.
23 let aa: Vec<u32> = vec![1, 2, 3, 4];
24 let bb: Vec<u32> = vec![5, 6, 7, 8];
25
26 // This is the core. Here we write the interaction with the GPU independent of whether it is
27 // CUDA or OpenCL.
28 let closures = program_closures!(|program, _args| -> Result<Vec<u32>, GPUError> {
29 // Make sure the input data has the same length.
30 assert_eq!(aa.len(), bb.len());
31 let length = aa.len();
32
33 // Copy the data to the GPU.
34 let aa_buffer = program.create_buffer_from_slice(&aa)?;
35 let bb_buffer = program.create_buffer_from_slice(&bb)?;
36
37 // The result buffer has the same length as the input buffers.
38 let result_buffer = unsafe { program.create_buffer::<u32>(length)? };
39
40 // Get the kernel.
41 let kernel = program.create_kernel("add", 1, 1)?;
42
43 // Execute the kernel.
44 kernel
45 .arg(&(length as u32))
46 .arg(&aa_buffer)
47 .arg(&bb_buffer)
48 .arg(&result_buffer)
49 .run()?;
50
51 // Get the resulting data.
52 let mut result = vec![0u32; length];
53 program.read_into_buffer(&result_buffer, &mut result)?;
54
55 Ok(result)
56 });
57
58 // Get the first available device.
59 let device = *Device::all().first().unwrap();
60
61 // First we run it on CUDA.
62 let cuda_program = cuda(device);
63 let cuda_result = cuda_program.run(closures, ()).unwrap();
64 assert_eq!(cuda_result, [6, 8, 10, 12]);
65 println!("CUDA result: {:?}", cuda_result);
66
67 // Then we run it on OpenCL.
68 let opencl_program = opencl(device);
69 let opencl_result = opencl_program.run(closures, ()).unwrap();
70 assert_eq!(opencl_result, [6, 8, 10, 12]);
71 println!("OpenCL result: {:?}", opencl_result);
72}
Sourcepub fn create_kernel(
&self,
name: &str,
global_work_size: usize,
local_work_size: usize,
) -> Result<Kernel<'_>, GPUError>
pub fn create_kernel( &self, name: &str, global_work_size: usize, local_work_size: usize, ) -> Result<Kernel<'_>, GPUError>
Returns a kernel.
The global_work_size
does not follow the OpenCL definition. It is not the total
number of threads. Instead it follows CUDA’s definition and is the number of
local_work_size
sized thread groups. So the total number of threads is
global_work_size * local_work_size
.
Examples found in repository?
21pub fn main() {
22 // Define some data that should be operated on.
23 let aa: Vec<u32> = vec![1, 2, 3, 4];
24 let bb: Vec<u32> = vec![5, 6, 7, 8];
25
26 // This is the core. Here we write the interaction with the GPU independent of whether it is
27 // CUDA or OpenCL.
28 let closures = program_closures!(|program, _args| -> Result<Vec<u32>, GPUError> {
29 // Make sure the input data has the same length.
30 assert_eq!(aa.len(), bb.len());
31 let length = aa.len();
32
33 // Copy the data to the GPU.
34 let aa_buffer = program.create_buffer_from_slice(&aa)?;
35 let bb_buffer = program.create_buffer_from_slice(&bb)?;
36
37 // The result buffer has the same length as the input buffers.
38 let result_buffer = unsafe { program.create_buffer::<u32>(length)? };
39
40 // Get the kernel.
41 let kernel = program.create_kernel("add", 1, 1)?;
42
43 // Execute the kernel.
44 kernel
45 .arg(&(length as u32))
46 .arg(&aa_buffer)
47 .arg(&bb_buffer)
48 .arg(&result_buffer)
49 .run()?;
50
51 // Get the resulting data.
52 let mut result = vec![0u32; length];
53 program.read_into_buffer(&result_buffer, &mut result)?;
54
55 Ok(result)
56 });
57
58 // Get the first available device.
59 let device = *Device::all().first().unwrap();
60
61 // First we run it on CUDA.
62 let cuda_program = cuda(device);
63 let cuda_result = cuda_program.run(closures, ()).unwrap();
64 assert_eq!(cuda_result, [6, 8, 10, 12]);
65 println!("CUDA result: {:?}", cuda_result);
66
67 // Then we run it on OpenCL.
68 let opencl_program = opencl(device);
69 let opencl_result = opencl_program.run(closures, ()).unwrap();
70 assert_eq!(opencl_result, [6, 8, 10, 12]);
71 println!("OpenCL result: {:?}", opencl_result);
72}
Sourcepub fn write_from_buffer<T>(
&self,
buffer: &mut Buffer<T>,
data: &[T],
) -> Result<(), GPUError>
pub fn write_from_buffer<T>( &self, buffer: &mut Buffer<T>, data: &[T], ) -> Result<(), GPUError>
Puts data from an existing buffer onto the GPU.
Sourcepub fn read_into_buffer<T>(
&self,
buffer: &Buffer<T>,
data: &mut [T],
) -> Result<(), GPUError>
pub fn read_into_buffer<T>( &self, buffer: &Buffer<T>, data: &mut [T], ) -> Result<(), GPUError>
Reads data from the GPU into an existing buffer.
Examples found in repository?
21pub fn main() {
22 // Define some data that should be operated on.
23 let aa: Vec<u32> = vec![1, 2, 3, 4];
24 let bb: Vec<u32> = vec![5, 6, 7, 8];
25
26 // This is the core. Here we write the interaction with the GPU independent of whether it is
27 // CUDA or OpenCL.
28 let closures = program_closures!(|program, _args| -> Result<Vec<u32>, GPUError> {
29 // Make sure the input data has the same length.
30 assert_eq!(aa.len(), bb.len());
31 let length = aa.len();
32
33 // Copy the data to the GPU.
34 let aa_buffer = program.create_buffer_from_slice(&aa)?;
35 let bb_buffer = program.create_buffer_from_slice(&bb)?;
36
37 // The result buffer has the same length as the input buffers.
38 let result_buffer = unsafe { program.create_buffer::<u32>(length)? };
39
40 // Get the kernel.
41 let kernel = program.create_kernel("add", 1, 1)?;
42
43 // Execute the kernel.
44 kernel
45 .arg(&(length as u32))
46 .arg(&aa_buffer)
47 .arg(&bb_buffer)
48 .arg(&result_buffer)
49 .run()?;
50
51 // Get the resulting data.
52 let mut result = vec![0u32; length];
53 program.read_into_buffer(&result_buffer, &mut result)?;
54
55 Ok(result)
56 });
57
58 // Get the first available device.
59 let device = *Device::all().first().unwrap();
60
61 // First we run it on CUDA.
62 let cuda_program = cuda(device);
63 let cuda_result = cuda_program.run(closures, ()).unwrap();
64 assert_eq!(cuda_result, [6, 8, 10, 12]);
65 println!("CUDA result: {:?}", cuda_result);
66
67 // Then we run it on OpenCL.
68 let opencl_program = opencl(device);
69 let opencl_result = opencl_program.run(closures, ()).unwrap();
70 assert_eq!(opencl_result, [6, 8, 10, 12]);
71 println!("OpenCL result: {:?}", opencl_result);
72}