extern crate ocl;
extern crate ocl_extras;
extern crate time;
use ocl::{core, Buffer, EventList, ProQue};
use std::time::Instant;
const WORK_SIZE: usize = 1 << 12;
const KERNEL_RUN_ITERS: i32 = 800;
const BUFFER_READ_ITERS: i32 = 20;
const KERNEL_AND_BUFFER_ITERS: i32 = 1000;
const SCALAR: f32 = 1.0;
const INIT_VAL_RANGE: (f32, f32) = (100.0, 200.0);
const PRINT_SOME_RESULTS: bool = true;
const RESULTS_TO_PRINT: usize = 5;
fn timed() -> ocl::Result<()> {
let src = r#"
__kernel void add(
__global float const* const source,
__private float scalar,
__global float* const result)
{
uint idx = get_global_id(0);
result[idx] = source[idx] + scalar;
}
"#;
let ocl_pq = ProQue::builder().src(src).dims(WORK_SIZE).build()?;
let vec_init = ocl_extras::scrambled_vec(INIT_VAL_RANGE, ocl_pq.dims().to_len());
let buffer_init = Buffer::builder()
.queue(ocl_pq.queue().clone())
.flags(core::MemFlags::new())
.len(WORK_SIZE)
.copy_host_slice(&vec_init)
.build()?;
let mut vec_result = vec![0.0f32; WORK_SIZE];
let buffer_result = Buffer::<f32>::builder()
.queue(ocl_pq.queue().clone())
.len(WORK_SIZE)
.build()?;
let kern = ocl_pq
.kernel_builder("add")
.global_work_size(ocl_pq.dims().clone())
.arg_named("source", Some(&buffer_init))
.arg(SCALAR)
.arg(&buffer_result)
.build()?;
print!("\n");
println!("Enqueuing {} kernel runs... ", KERNEL_RUN_ITERS);
let kern_start = Instant::now();
unsafe {
kern.enq()?;
}
kern.set_arg("source", &buffer_result)?;
for _ in 0..(KERNEL_RUN_ITERS - 1) {
unsafe {
kern.enq()?;
}
}
ocl_pq.queue().finish()?;
print_elapsed("total elapsed", kern_start);
print!("\n");
println!("Enqueuing {} buffer reads... ", BUFFER_READ_ITERS);
let buffer_start = Instant::now();
for _ in 0..BUFFER_READ_ITERS {
buffer_result.cmd().read(&mut vec_result).enq()?
}
print_elapsed("queue unfinished", buffer_start);
ocl_pq.queue().finish()?;
print_elapsed("queue finished", buffer_start);
verify_results(&vec_init, &vec_result, KERNEL_RUN_ITERS)?;
print!("\n");
println!(
"Enqueuing {} blocking kernel buffer sequences... ",
KERNEL_AND_BUFFER_ITERS
);
let kern_buf_start = Instant::now();
for _ in 0..(KERNEL_AND_BUFFER_ITERS) {
unsafe {
kern.enq()?;
}
buffer_result.cmd().read(&mut vec_result).enq()?;
}
print_elapsed("queue unfinished", kern_buf_start);
ocl_pq.queue().finish()?;
print_elapsed("queue finished", kern_buf_start);
verify_results(
&vec_init,
&vec_result,
KERNEL_AND_BUFFER_ITERS + KERNEL_RUN_ITERS,
)?;
print!("\n");
println!(
"Enqueuing {} non-blocking kernel buffer sequences... ",
KERNEL_AND_BUFFER_ITERS
);
let kern_buf_start = Instant::now();
let mut kern_events = EventList::new();
let mut buf_events = EventList::new();
for _ in 0..KERNEL_AND_BUFFER_ITERS {
unsafe {
kern.cmd().ewait(&buf_events).enew(&mut kern_events).enq()?;
}
buffer_result
.cmd()
.read(&mut vec_result)
.ewait(&kern_events)
.enew(&mut buf_events)
.enq()?;
}
print_elapsed("queue unfinished", kern_buf_start);
ocl_pq.queue().finish()?;
print_elapsed("queue finished", kern_buf_start);
kern_events.wait_for()?;
kern_events.clear_completed()?;
buf_events.wait_for()?;
buf_events.clear_completed()?;
verify_results(
&vec_init,
&vec_result,
KERNEL_AND_BUFFER_ITERS + KERNEL_AND_BUFFER_ITERS + KERNEL_RUN_ITERS,
)?;
print!("\n");
println!(
"Enqueuing another {} kernel buffer sequences... ",
KERNEL_AND_BUFFER_ITERS
);
let kern_buf_start = Instant::now();
for _ in 0..KERNEL_AND_BUFFER_ITERS {
unsafe {
kern.cmd().enew(&mut kern_events).enq()?;
}
unsafe {
buffer_result
.cmd()
.read(&mut vec_result)
.enew(&mut buf_events)
.block(true)
.enq()?;
}
}
print_elapsed("queue unfinished", kern_buf_start);
ocl_pq.queue().finish()?;
print_elapsed("queue finished", kern_buf_start);
kern_events.wait_for()?;
buf_events.wait_for()?;
verify_results(
&vec_init,
&vec_result,
KERNEL_AND_BUFFER_ITERS
+ KERNEL_AND_BUFFER_ITERS
+ KERNEL_AND_BUFFER_ITERS
+ KERNEL_RUN_ITERS,
)
}
fn print_elapsed(title: &str, start: Instant) {
let time_elapsed = start.elapsed();
let separator = if title.len() > 0 { ": " } else { "" };
println!(
" {}{}{}.{:03}",
title,
separator,
time_elapsed.as_secs(),
time_elapsed.subsec_millis()
);
}
fn verify_results(vec_init: &Vec<f32>, vec_result: &Vec<f32>, iters: i32) -> ocl::Result<()> {
print!("\nVerifying result values... ");
if PRINT_SOME_RESULTS {
print!("(printing {})\n", RESULTS_TO_PRINT);
}
let margin_of_error = 0.1 as f32;
for idx in 0..WORK_SIZE {
let correct = vec_init[idx] + (iters as f32 * SCALAR);
assert!(
(correct - vec_result[idx]).abs() < margin_of_error,
" INVALID RESULT[{}]: init: {}, correct: {}, margin: {}, result: {}",
idx,
vec_init[idx],
correct,
margin_of_error,
vec_result[idx]
);
if PRINT_SOME_RESULTS && (idx % (WORK_SIZE / RESULTS_TO_PRINT)) == 0 {
println!(
" [{}]: init: {}, correct: {}, result: {}",
idx, vec_init[idx], correct, vec_result[idx]
);
}
}
if PRINT_SOME_RESULTS {
print!("\n");
}
println!("All result values are correct.");
Ok(())
}
pub fn main() {
match timed() {
Ok(_) => (),
Err(err) => println!("{}", err),
}
}