#[cfg(any(feature = "CL_VERSION_2_0", feature = "dynamic"))]
extern crate opencl3;
use cl3::device::{
CL_DEVICE_SVM_FINE_GRAIN_BUFFER, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, CL_DEVICE_TYPE_ALL,
CL_DEVICE_TYPE_GPU,
};
use opencl3::Result;
use opencl3::command_queue::CommandQueue;
use opencl3::context::Context;
use opencl3::device::Device;
use opencl3::kernel::{ExecuteKernel, Kernel, create_program_kernels};
use opencl3::platform::get_platforms;
use opencl3::program::{CL_STD_2_0, Program};
use opencl3::svm::SvmVec;
use opencl3::types::cl_int;
use std::ptr;
const PROGRAM_SOURCE: &str = r#"
kernel void sum_int (global int* sums,
global int const* values)
{
int value = sub_group_reduce_add(values[get_global_id(0)]);
if (0u == get_local_id(0))
sums[get_group_id(0)] = value;
}
kernel void inclusive_scan_int (global int* output,
global int const* values)
{
int sum = 0;
size_t lid = get_local_id(0);
size_t lsize = get_local_size(0);
size_t num_groups = get_num_groups(0);
for (size_t i = 0u; i < num_groups; ++i)
{
size_t lidx = i * lsize + lid;
int value = sub_group_scan_inclusive_add(values[lidx]);
output[lidx] = sum + value;
sum += sub_group_broadcast(value, lsize - 1);
}
}"#;
const SUM_KERNEL_NAME: &str = "sum_int";
const INCLUSIVE_SCAN_KERNEL_NAME: &str = "inclusive_scan_int";
#[test]
#[ignore]
fn test_opencl_2_kernel_example() -> Result<()> {
let platforms = get_platforms()?;
assert!(0 < platforms.len());
let opencl_2: &str = "OpenCL 2";
let opencl_3: &str = "OpenCL 3";
let mut device_id = ptr::null_mut();
let mut is_fine_grained_svm: bool = false;
for p in platforms {
let platform_version = p.version()?;
if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) {
let devices = p
.get_devices(CL_DEVICE_TYPE_GPU)
.expect("Platform::get_devices failed");
for dev_id in devices {
let device = Device::new(dev_id);
let svm_mem_capability = device.svm_mem_capability();
is_fine_grained_svm = 0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
if is_fine_grained_svm {
device_id = dev_id;
break;
}
}
}
}
if is_fine_grained_svm {
let device = Device::new(device_id);
let vendor = device.vendor()?;
let vendor_id = device.vendor_id()?;
println!("OpenCL device vendor name: {}", vendor);
println!("OpenCL device vendor id: {:X}", vendor_id);
let context = Context::from_device(&device).expect("Context::from_device failed");
let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, CL_STD_2_0)
.expect("Program::create_and_build_from_source failed");
let kernels = create_program_kernels(&program)?;
assert!(0 < kernels.len());
let kernel_0_name = kernels[0].function_name()?;
println!("OpenCL kernel_0_name: {}", kernel_0_name);
let sum_kernel = if SUM_KERNEL_NAME == kernel_0_name {
&kernels[0]
} else {
&kernels[1]
};
let inclusive_scan_kernel = if INCLUSIVE_SCAN_KERNEL_NAME == kernel_0_name {
&kernels[0]
} else {
&kernels[1]
};
let queue = CommandQueue::create_default_with_properties(&context, 0, 0)
.expect("CommandQueue::create_with_properties failed");
let svm_capability = context.get_svm_mem_capability();
assert!(0 < svm_capability);
const ARRAY_SIZE: usize = 8;
let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2];
let mut test_values =
SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
test_values.copy_from_slice(&value_array);
let test_values = test_values;
let mut results =
SvmVec::<cl_int>::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed");
let sum_kernel_event = unsafe {
ExecuteKernel::new(sum_kernel)
.set_arg_svm(results.as_mut_ptr())
.set_arg_svm(test_values.as_ptr())
.set_global_work_size(ARRAY_SIZE)
.enqueue_nd_range(&queue)?
};
sum_kernel_event.wait()?;
println!("sum results: {:?}", results);
assert_eq!(33, results[0]);
assert_eq!(0, results[ARRAY_SIZE - 1]);
let kernel_event = unsafe {
ExecuteKernel::new(inclusive_scan_kernel)
.set_arg_svm(results.as_mut_ptr())
.set_arg_svm(test_values.as_ptr())
.set_global_work_size(ARRAY_SIZE)
.enqueue_nd_range(&queue)?
};
kernel_event.wait()?;
println!("inclusive_scan results: {:?}", results);
assert_eq!(value_array[0], results[0]);
assert_eq!(33, results[ARRAY_SIZE - 1]);
} else {
println!("OpenCL fine grained SVM capable device not found");
}
Ok(())
}
#[test]
#[ignore]
fn test_opencl_2_system_svm_example() -> Result<()> {
let platforms = get_platforms()?;
assert!(0 < platforms.len());
let opencl_2: &str = "OpenCL 2";
let opencl_3: &str = "OpenCL 3";
let mut device_id = ptr::null_mut();
let mut is_fine_grained_system_svm: bool = false;
for p in platforms {
let platform_version = p.version()?;
if platform_version.contains(&opencl_2) || platform_version.contains(&opencl_3) {
let devices = p
.get_devices(CL_DEVICE_TYPE_ALL)
.expect("Platform::get_devices failed");
for dev_id in devices {
let device = Device::new(dev_id);
let svm_mem_capability = device.svm_mem_capability();
is_fine_grained_system_svm =
0 < svm_mem_capability & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
if is_fine_grained_system_svm {
device_id = dev_id;
break;
}
}
}
}
if is_fine_grained_system_svm {
let device = Device::new(device_id);
let vendor = device.vendor().expect("Device.vendor failed");
let vendor_id = device.vendor_id().expect("Device.vendor_id failed");
println!("OpenCL device vendor name: {}", vendor);
println!("OpenCL device vendor id: {:X}", vendor_id);
let context = Context::from_device(&device).expect("Context::from_device failed");
let program = Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "")
.expect("Program::create_and_build_from_source failed");
let kernel = Kernel::create(&program, SUM_KERNEL_NAME).expect("Kernel::create failed");
let queue = CommandQueue::create_default_with_properties(&context, 0, 0)
.expect("CommandQueue::create_default_with_properties failed");
const ARRAY_SIZE: usize = 8;
let value_array: [cl_int; ARRAY_SIZE] = [3, 2, 5, 9, 7, 1, 4, 2];
let mut test_values =
SvmVec::<cl_int>::allocate(&context, ARRAY_SIZE).expect("SVM allocation failed");
test_values.copy_from_slice(&value_array);
let test_values = test_values;
let mut results =
SvmVec::<cl_int>::allocate_zeroed(&context, ARRAY_SIZE).expect("SVM allocation failed");
let sum_kernel_event = unsafe {
ExecuteKernel::new(&kernel)
.set_arg_svm(results.as_mut_ptr())
.set_arg_svm(test_values.as_ptr())
.set_global_work_size(ARRAY_SIZE)
.enqueue_nd_range(&queue)?
};
sum_kernel_event.wait()?;
println!("sum results: {:?}", results);
assert_eq!(33, results[0]);
assert_eq!(0, results[ARRAY_SIZE - 1]);
} else {
println!("OpenCL fine grained system SVM device not found")
}
Ok(())
}