use opencl3::Result;
use opencl3::command_queue::{CL_QUEUE_PROFILING_ENABLE, CommandQueue};
use opencl3::context::Context;
use opencl3::device::{CL_DEVICE_TYPE_GPU, Device};
use opencl3::error_codes::cl_int;
use opencl3::kernel::{ExecuteKernel, Kernel};
use opencl3::memory::{CL_MAP_READ, CL_MAP_WRITE};
use opencl3::program::{CL_STD_2_0, Program};
use opencl3::svm::SvmVec;
use opencl3::types::CL_BLOCKING;
const PROGRAM_SOURCE: &str = r#"
kernel void inclusive_scan_int (global int* output,
global int const* values)
{
int sum = 0;
size_t lid = get_local_id(0);
size_t lsize = get_local_size(0);
size_t num_groups = get_num_groups(0);
for (size_t i = 0u; i < num_groups; ++i)
{
size_t lidx = i * lsize + lid;
int value = work_group_scan_inclusive_add(values[lidx]);
output[lidx] = sum + value;
sum += work_group_broadcast(value, lsize - 1);
}
}"#;
const KERNEL_NAME: &str = "inclusive_scan_int";
fn main() -> Result<()> {
let platforms = opencl3::platform::get_platforms()?;
let platform = platforms.first().expect("no OpenCL platforms");
let device = *platform
.get_devices(CL_DEVICE_TYPE_GPU)?
.first()
.expect("no device found in platform");
let device = Device::new(device);
let context = Context::from_device(&device).expect("Context::from_device failed");
let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0)
.expect("Program::create_and_build_from_source failed");
let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
let queue =
CommandQueue::create_default_with_properties(&context, CL_QUEUE_PROFILING_ENABLE, 0)
.expect("CommandQueue::create_default_with_properties failed");
const ARRAY_SIZE: usize = 8;
let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2];
let mut test_values =
SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
if !test_values.is_fine_grained() {
unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut test_values, &[])? };
}
test_values.clone_from_slice(&value_array);
let test_values = test_values;
if !test_values.is_fine_grained() {
let unmap_test_values_event = unsafe { queue.enqueue_svm_unmap(&test_values, &[])? };
unmap_test_values_event.wait()?;
}
let mut results =
SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
let kernel_event = unsafe {
ExecuteKernel::new(&kernel)
.set_arg_svm(results.as_mut_ptr())
.set_arg_svm(test_values.as_ptr())
.set_global_work_size(ARRAY_SIZE)
.enqueue_nd_range(&queue)?
};
kernel_event.wait()?;
if !results.is_fine_grained() {
unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_READ, &mut results, &[])? };
}
println!("sum results: {:?}", results);
if !results.is_fine_grained() {
let unmap_results_event = unsafe { queue.enqueue_svm_unmap(&results, &[])? };
unmap_results_event.wait()?;
}
Ok(())
}