#![allow(deprecated)]
extern crate cl3;
use cl3::command_queue::{
create_command_queue, enqueue_nd_range_kernel, enqueue_read_buffer, enqueue_write_buffer,
finish, release_command_queue, CL_QUEUE_PROFILING_ENABLE,
};
use cl3::context::{create_context, release_context};
use cl3::device::{
get_device_ids, get_device_info, CL_DEVICE_TYPE_GPU, CL_DEVICE_VENDOR, CL_DEVICE_VENDOR_ID,
};
use cl3::event::{
get_event_profiling_info, release_event, wait_for_events, CL_PROFILING_COMMAND_END,
CL_PROFILING_COMMAND_START,
};
use cl3::kernel::{create_kernel, release_kernel, set_kernel_arg};
use cl3::memory::{create_buffer, release_mem_object, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
use cl3::platform::{get_platform_ids, get_platform_info, CL_PLATFORM_NAME};
use cl3::program::{build_program, create_program_with_source, release_program};
use cl3::types::{cl_event, cl_float, cl_mem, CL_BLOCKING, CL_NON_BLOCKING};
use libc::{c_void, size_t};
use std::ffi::CString;
use std::mem;
use std::ptr;
const PROGRAM_SOURCE: &str = r#"
kernel void saxpy_float (global float* z,
global float const* x,
global float const* y,
float a)
{
size_t i = get_global_id(0);
z[i] = a*x[i] + y[i];
}"#;
const KERNEL_NAME: &str = "saxpy_float";
#[test]
#[ignore]
fn test_opencl_1_2_example() {
let platform_ids = get_platform_ids().unwrap();
assert!(0 < platform_ids.len());
let platform_id = platform_ids[0];
let platform_name = get_platform_info(platform_id, CL_PLATFORM_NAME).unwrap();
println!("Platform Name: {}", platform_name);
let device_ids = get_device_ids(platform_id, CL_DEVICE_TYPE_GPU).unwrap();
assert!(0 < device_ids.len());
let device_id = device_ids[0];
let vendor_name = get_device_info(device_id, CL_DEVICE_VENDOR).unwrap();
println!("OpenCL device vendor name: {}", vendor_name);
let vendor_id = get_device_info(device_id, CL_DEVICE_VENDOR_ID).unwrap();
println!("OpenCL device vendor id: {:X}", u32::from(vendor_id));
let device_ids = [device_id];
let context = create_context(&device_ids, ptr::null(), None, ptr::null_mut()).unwrap();
let sources = [PROGRAM_SOURCE];
let program = create_program_with_source(context, &sources).unwrap();
let build_options = CString::default();
build_program(program, &device_ids, &build_options, None, ptr::null_mut()).unwrap();
let kernel_name = CString::new(KERNEL_NAME).unwrap();
let kernel = create_kernel(program, &kernel_name).unwrap();
let queue =
unsafe { create_command_queue(context, device_id, CL_QUEUE_PROFILING_ENABLE).unwrap() };
const ARRAY_SIZE: usize = 1000;
let ones: [cl_float; ARRAY_SIZE] = [1.0; ARRAY_SIZE];
let mut sums: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
for i in 0..ARRAY_SIZE {
sums[i] = 1.0 + 1.0 * i as cl_float;
}
let x = unsafe {
create_buffer(
context,
CL_MEM_WRITE_ONLY,
ARRAY_SIZE * mem::size_of::<cl_float>(),
ptr::null_mut(),
)
.unwrap()
};
let y = unsafe {
create_buffer(
context,
CL_MEM_WRITE_ONLY,
ARRAY_SIZE * mem::size_of::<cl_float>(),
ptr::null_mut(),
)
.unwrap()
};
let z = unsafe {
create_buffer(
context,
CL_MEM_READ_ONLY,
ARRAY_SIZE * mem::size_of::<cl_float>(),
ptr::null_mut(),
)
.unwrap()
};
let x_write_event = unsafe {
enqueue_write_buffer(
queue,
x,
CL_BLOCKING,
0,
ones.len() * mem::size_of::<cl_float>(),
ones.as_ptr() as cl_mem,
0,
ptr::null(),
)
.unwrap()
};
let y_write_event = unsafe {
enqueue_write_buffer(
queue,
y,
CL_NON_BLOCKING,
0,
sums.len() * mem::size_of::<cl_float>(),
sums.as_ptr() as cl_mem,
0,
ptr::null(),
)
.unwrap()
};
let mut events: Vec<cl_event> = Vec::default();
events.push(y_write_event);
wait_for_events(&events).unwrap();
let a: cl_float = 300.0;
unsafe {
set_kernel_arg(
kernel,
0,
mem::size_of::<cl_mem>(),
&z as *const _ as *const c_void,
)
.unwrap();
set_kernel_arg(
kernel,
1,
mem::size_of::<cl_mem>(),
&x as *const _ as *const c_void,
)
.unwrap();
set_kernel_arg(
kernel,
2,
mem::size_of::<cl_mem>(),
&y as *const _ as *const c_void,
)
.unwrap();
set_kernel_arg(
kernel,
3,
mem::size_of::<cl_float>(),
&a as *const _ as *const c_void,
)
.unwrap();
}
let global_work_sizes: [size_t; 1] = [ARRAY_SIZE];
let kernel_event = unsafe {
enqueue_nd_range_kernel(
queue,
kernel,
1,
ptr::null(),
global_work_sizes.as_ptr(),
ptr::null(),
0,
ptr::null(),
)
.unwrap()
};
events.clear();
events.push(kernel_event);
let results: [cl_float; ARRAY_SIZE] = [0.0; ARRAY_SIZE];
let read_event = unsafe {
enqueue_read_buffer(
queue,
z,
CL_NON_BLOCKING,
0,
results.len() * mem::size_of::<cl_float>(),
results.as_ptr() as cl_mem,
1,
events.as_ptr(),
)
.unwrap()
};
events.clear();
finish(queue).unwrap();
assert_eq!(1300.0, results[ARRAY_SIZE - 1]);
println!("results back: {}", results[ARRAY_SIZE - 1]);
let start_time = get_event_profiling_info(kernel_event, CL_PROFILING_COMMAND_START).unwrap();
let end_time = get_event_profiling_info(kernel_event, CL_PROFILING_COMMAND_END).unwrap();
let duration = u64::from(end_time) - u64::from(start_time);
println!("kernel execution duration (ns): {}", duration);
unsafe {
release_event(x_write_event).unwrap();
release_event(y_write_event).unwrap();
release_event(kernel_event).unwrap();
release_event(read_event).unwrap();
release_mem_object(z).unwrap();
release_mem_object(y).unwrap();
release_mem_object(x).unwrap();
release_kernel(kernel).unwrap();
release_program(program).unwrap();
release_command_queue(queue).unwrap();
release_context(context).unwrap();
}
}