extern crate opencl3;
use cl3::device::{
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_BUFFER, CL_DEVICE_TYPE_GPU,
};
use opencl3::command_queue::{CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE};
use opencl3::context::Context;
use opencl3::device::Device;
use opencl3::event;
use opencl3::kernel::ExecuteKernel;
use opencl3::memory::{Buffer, CL_MAP_READ, CL_MAP_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
use opencl3::platform::get_platforms;
use opencl3::svm::SvmVec;
use opencl3::types::{cl_event, cl_float, CL_FALSE, CL_TRUE};
use std::ffi::CString;
use std::ptr;
const PROGRAM_SOURCE: &str = r#"
kernel void saxpy_float (global float* z,
global float const* x,
global float const* y,
float a)
{
size_t i = get_global_id(0);
z[i] = a*x[i] + y[i];
}"#;
const KERNEL_NAME: &str = "saxpy_float";
#[test]
#[ignore]
fn test_opencl_1_2_example() {
let platforms = get_platforms().unwrap();
assert!(0 < platforms.len());
let platform = &platforms[0];
let devices = platform
.get_devices(CL_DEVICE_TYPE_GPU)
.expect("Platform::get_devices failed");
assert!(0 < devices.len());
let platform_name = platform.name().unwrap();
println!("Platform Name: {:?}", platform_name);
let device = Device::new(devices[0]);
let vendor = device.vendor().expect("Device.vendor failed");
let vendor_id = device.vendor_id().expect("Device.vendor_id failed");
println!("OpenCL device vendor name: {:?}", vendor);
println!("OpenCL device vendor id: {:X}", vendor_id);
let mut context = Context::from_device(device).expect("Context::from_device failed");
context
.create_command_queues(CL_QUEUE_PROFILING_ENABLE)
.expect("Context::create_command_queues failed");
let src = CString::new(PROGRAM_SOURCE).unwrap();
let options = CString::default();
context
.build_program_from_source(&src, &options)
.expect("Context::build_program_from_source failed");
assert!(!context.kernels().is_empty());
for kernel_name in context.kernels().keys() {
println!("Kernel name: {:?}", kernel_name);
}
const ARRAY_SIZE: usize = 1000;
let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE];
let mut sums: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
for i in 0..ARRAY_SIZE {
sums[i] = 1.0 + 1.0 * i as cl_float;
}
let x = Buffer::create::<cl_float>(&context, CL_MEM_WRITE_ONLY, ARRAY_SIZE, ptr::null_mut())
.unwrap();
let y = Buffer::create::<cl_float>(&context, CL_MEM_WRITE_ONLY, ARRAY_SIZE, ptr::null_mut())
.unwrap();
let z = Buffer::create::<cl_float>(&context, CL_MEM_READ_ONLY, ARRAY_SIZE, ptr::null_mut())
.unwrap();
let queue = context.default_queue();
let mut events: Vec<cl_event> = Vec::default();
let _x_write_event = queue
.enqueue_write_buffer(x.get(), CL_TRUE, 0, &ones, &events)
.unwrap();
let y_write_event = queue
.enqueue_write_buffer(y.get(), CL_FALSE, 0, &sums, &events)
.unwrap();
let kernel_name = CString::new(KERNEL_NAME).unwrap();
if let Some(kernel) = context.get_kernel(&kernel_name) {
let a: cl_float = 300.0;
let kernel_event = ExecuteKernel::new(kernel)
.set_arg(&z)
.set_arg(&x)
.set_arg(&y)
.set_arg(&a)
.set_global_work_size(ARRAY_SIZE)
.set_wait_event(y_write_event.get())
.enqueue_nd_range(&queue)
.unwrap();
events.push(kernel_event.get());
let mut results: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
let _event = queue
.enqueue_read_buffer(z.get(), CL_FALSE, 0, &mut results, &events)
.unwrap();
events.clear();
queue.finish().unwrap();
assert_eq!(1300.0, results[ARRAY_SIZE - 1]);
println!("results back: {}", results[ARRAY_SIZE - 1]);
let start_time = kernel_event.profiling_command_start().unwrap();
let end_time = kernel_event.profiling_command_end().unwrap();
let duration = end_time - start_time;
println!("kernel execution duration (ns): {}", duration);
}
}
#[test]
#[ignore]
fn test_opencl_svm_example() {
let platforms = get_platforms().unwrap();
assert!(0 < platforms.len());
let opencl_2: String = "OpenCL 2".to_string();
let mut device_id = ptr::null_mut();
let mut is_svm_capable: bool = false;
for p in platforms {
let platform_version = p.version().unwrap().into_string().unwrap();
if platform_version.contains(&opencl_2) {
let devices = p
.get_devices(CL_DEVICE_TYPE_GPU)
.expect("Platform::get_devices failed");
for dev_id in devices {
let device = Device::new(dev_id);
let svm_mem_capability = device.svm_mem_capability();
is_svm_capable = 0 < svm_mem_capability
& (CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
if is_svm_capable {
device_id = dev_id;
break;
}
}
}
}
if is_svm_capable {
let device = Device::new(device_id);
let vendor = device.vendor().expect("Device.vendor failed");
let vendor_id = device.vendor_id().expect("Device.vendor_id failed");
println!("OpenCL device vendor name: {:?}", vendor);
println!("OpenCL device vendor id: {:X}", vendor_id);
let mut context = Context::from_device(device).expect("Context::from_device failed");
context
.create_command_queues_with_properties(
CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
0,
)
.expect("Context::create_command_queues_with_properties failed");
let src = CString::new(PROGRAM_SOURCE).unwrap();
let options = CString::default();
context
.build_program_from_source(&src, &options)
.expect("Context::build_program_from_source failed");
assert!(!context.kernels().is_empty());
for kernel_name in context.kernels().keys() {
println!("Kernel name: {:?}", kernel_name);
}
let svm_capability = context.get_svm_mem_capability();
assert!(0 < svm_capability);
let is_fine_grained_svm: bool = 0 < svm_capability & CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
println!("OpenCL SVM is fine grained: {}", is_fine_grained_svm);
const ARRAY_SIZE: usize = 1000;
let mut ones = SvmVec::<cl_float>::new(&context, svm_capability);
ones.reserve(ARRAY_SIZE);
for _ in 0..ARRAY_SIZE {
ones.push(1.0);
}
let mut sums = SvmVec::<cl_float>::new(&context, svm_capability);
sums.reserve(ARRAY_SIZE);
for i in 0..ARRAY_SIZE {
sums.push(1.0 + 1.0 * i as cl_float);
}
let queue = context.default_queue();
let kernel_name = CString::new(KERNEL_NAME).unwrap();
if let Some(kernel) = context.get_kernel(&kernel_name) {
let mut results = SvmVec::<cl_float>::new(&context, svm_capability);
results.reserve(ARRAY_SIZE);
for i in 0..ARRAY_SIZE {
results.push(i as cl_float);
}
let mut events: Vec<cl_event> = Vec::default();
if is_fine_grained_svm {
let a: cl_float = 300.0;
let kernel_event = ExecuteKernel::new(kernel)
.set_arg_svm(results.as_mut_ptr())
.set_arg_svm(ones.as_ptr())
.set_arg_svm(sums.as_ptr())
.set_arg(&a)
.set_global_work_size(ARRAY_SIZE)
.enqueue_nd_range(&queue)
.unwrap();
events.push(kernel_event.get());
event::wait_for_events(&events).unwrap();
assert_eq!(1300.0, results[ARRAY_SIZE - 1]);
println!("results back: {}", results[ARRAY_SIZE - 1]);
let start_time = kernel_event.profiling_command_start().unwrap();
let end_time = kernel_event.profiling_command_end().unwrap();
let duration = end_time - start_time;
println!("kernel execution duration (ns): {}", duration);
} else {
let map_ones_event = queue
.enqueue_svm_map(CL_FALSE, CL_MAP_WRITE, &mut ones, &events)
.unwrap();
let map_sums_event = queue
.enqueue_svm_map(CL_FALSE, CL_MAP_WRITE, &mut sums, &events)
.unwrap();
events.push(map_ones_event.get());
events.push(map_sums_event.get());
let a: cl_float = 300.0;
let kernel_event = ExecuteKernel::new(kernel)
.set_arg_svm(results.as_mut_ptr())
.set_arg_svm(ones.as_ptr())
.set_arg_svm(sums.as_ptr())
.set_arg(&a)
.set_global_work_size(ARRAY_SIZE)
.set_event_wait_list(&events)
.enqueue_nd_range(&queue)
.unwrap();
events.clear();
events.push(kernel_event.get());
event::wait_for_events(&events).unwrap();
events.clear();
let _map_results_event = queue
.enqueue_svm_map(CL_TRUE, CL_MAP_READ, &mut results, &events)
.unwrap();
assert_eq!(1300.0, results[ARRAY_SIZE - 1]);
println!("results back: {}", results[ARRAY_SIZE - 1]);
let start_time = kernel_event.profiling_command_start().unwrap();
let end_time = kernel_event.profiling_command_end().unwrap();
let duration = end_time - start_time;
println!("kernel execution duration (ns): {}", duration);
let unmap_results_event = queue.enqueue_svm_unmap(&results, &events).unwrap();
let unmap_sums_event = queue.enqueue_svm_unmap(&sums, &events).unwrap();
let unmap_ones_event = queue.enqueue_svm_unmap(&ones, &events).unwrap();
events.push(unmap_results_event.get());
events.push(unmap_sums_event.get());
events.push(unmap_ones_event.get());
event::wait_for_events(&events).unwrap();
println!("SVM buffers unmapped");
}
}
} else {
println!("OpenCL SVM capable device not found")
}
}