extern crate ocl;
extern crate time;
use ocl::{util, core, ProQue, Buffer, EventList};
const DATASET_SIZE: usize = 2 << 12;
const KERNEL_RUN_ITERS: i32 = 800;
const BUFFER_READ_ITERS: i32 = 20;
const KERNEL_AND_BUFFER_ITERS: i32 = 1000;
const SCALAR: f32 = 1.0;
const INIT_VAL_RANGE: (f32, f32) = (100.0, 200.0);
const PRINT_SOME_RESULTS: bool = true;
const RESULTS_TO_PRINT: usize = 5;
fn main() {
let src = r#"
__kernel void add(
__global float const* const source,
__private float scalar,
__global float* const result)
{
uint idx = get_global_id(0);
result[idx] = source[idx] + scalar;
}
"#;
let ocl_pq = ProQue::builder().src(src).dims([DATASET_SIZE]).build().unwrap();
let vec_init = util::scrambled_vec(INIT_VAL_RANGE, ocl_pq.dims().to_len());
let buffer_init = Buffer::new(ocl_pq.queue(), Some(core::MEM_READ_WRITE |
core::MEM_COPY_HOST_PTR), ocl_pq.dims().clone(), Some(&vec_init)).unwrap();
let mut vec_result = vec![0.0f32; DATASET_SIZE];
let buffer_result = Buffer::<f32>::new(ocl_pq.queue(), None,
ocl_pq.dims(), None).unwrap();
let mut kern = ocl_pq.create_kernel("add").unwrap()
.gws(ocl_pq.dims().clone())
.arg_buf_named("source", Some(&buffer_init))
.arg_scl(SCALAR)
.arg_buf(&buffer_result);
print!("\n");
println!("Enqueuing {} kernel runs... ", KERNEL_RUN_ITERS);
let kern_start = time::get_time();
kern.enq().unwrap();
kern.set_arg_buf_named("source", Some(&buffer_result)).unwrap();
for _ in 0..(KERNEL_RUN_ITERS - 1) {
kern.enq().unwrap();
}
ocl_pq.queue().finish();
print_elapsed("total elapsed", kern_start);
print!("\n");
println!("Enqueuing {} buffer reads... ", BUFFER_READ_ITERS);
let buffer_start = time::get_time();
for _ in 0..BUFFER_READ_ITERS {
buffer_result.cmd().read(&mut vec_result).enq().unwrap()
}
print_elapsed("queue unfinished", buffer_start);
ocl_pq.queue().finish();
print_elapsed("queue finished", buffer_start);
verify_results(&vec_init, &vec_result, KERNEL_RUN_ITERS);
print!("\n");
println!("Enqueuing {} blocking kernel buffer sequences... ", KERNEL_AND_BUFFER_ITERS);
let kern_buf_start = time::get_time();
for _ in 0..(KERNEL_AND_BUFFER_ITERS) {
kern.enq().unwrap();
buffer_result.cmd().read(&mut vec_result).enq().unwrap();
}
print_elapsed("queue unfinished", kern_buf_start);
ocl_pq.queue().finish();
print_elapsed("queue finished", kern_buf_start);
verify_results(&vec_init, &vec_result, KERNEL_AND_BUFFER_ITERS + KERNEL_RUN_ITERS);
print!("\n");
println!("Enqueuing {} non-blocking kernel buffer sequences... ", KERNEL_AND_BUFFER_ITERS);
let kern_buf_start = time::get_time();
let mut kern_events = EventList::new();
let mut buf_events = EventList::new();
for _ in 0..KERNEL_AND_BUFFER_ITERS {
kern.cmd().ewait(&buf_events).enew(&mut kern_events).enq().unwrap();
buffer_result.cmd().read(&mut vec_result).ewait(&kern_events)
.enew(&mut buf_events).enq().unwrap();
}
print_elapsed("queue unfinished", kern_buf_start);
ocl_pq.queue().finish();
print_elapsed("queue finished", kern_buf_start);
kern_events.wait().unwrap();
kern_events.clear_completed().unwrap();
buf_events.wait().unwrap();
buf_events.clear_completed().unwrap();
verify_results(&vec_init, &vec_result,
KERNEL_AND_BUFFER_ITERS + KERNEL_AND_BUFFER_ITERS + KERNEL_RUN_ITERS);
print!("\n");
println!("Enqueuing {} oh-fuck-it kernel buffer sequences... ", KERNEL_AND_BUFFER_ITERS);
let kern_buf_start = time::get_time();
for _ in 0..KERNEL_AND_BUFFER_ITERS {
kern.cmd().enew(&mut kern_events).enq().unwrap();
unsafe { buffer_result.cmd().read_async(&mut vec_result).enew(&mut buf_events).enq().unwrap() }
}
print_elapsed("queue unfinished", kern_buf_start);
ocl_pq.queue().finish();
print_elapsed("queue finished", kern_buf_start);
kern_events.wait().unwrap();
buf_events.wait().unwrap();
verify_results(&vec_init, &vec_result,
KERNEL_AND_BUFFER_ITERS + KERNEL_AND_BUFFER_ITERS + KERNEL_AND_BUFFER_ITERS + KERNEL_RUN_ITERS);
}
fn print_elapsed(title: &str, start: time::Timespec) {
let time_elapsed = time::get_time() - start;
let elapsed_ms = time_elapsed.num_milliseconds();
let separator = if title.len() > 0 { ": " } else { "" };
println!(" {}{}: {}.{:03}", title, separator, time_elapsed.num_seconds(), elapsed_ms);
}
fn verify_results(vec_init: &Vec<f32>, vec_result: &Vec<f32>, iters: i32) {
print!("\nVerifying result values... ");
if PRINT_SOME_RESULTS { print!("(printing {})\n", RESULTS_TO_PRINT); }
let margin_of_error = 0.1 as f32;
for idx in 0..DATASET_SIZE {
let correct = vec_init[idx] + (iters as f32 * SCALAR);
assert!((correct - vec_result[idx]).abs() < margin_of_error,
" INVALID RESULT[{}]: init: {}, correct: {}, margin: {}, result: {}",
idx, vec_init[idx], correct, margin_of_error, vec_result[idx]);
if PRINT_SOME_RESULTS && (idx % (DATASET_SIZE / RESULTS_TO_PRINT)) == 0 {
println!(" [{}]: init: {}, correct: {}, result: {}", idx, vec_init[idx],
correct, vec_result[idx]);
}
}
if PRINT_SOME_RESULTS { print!("\n"); }
println!("All result values are correct.");
}