accel 0.3.0-alpha.2

GPGPU Framework for Rust
Documentation

GPGPU framework for Rust based on CUDA Driver API

Basic Examples

Vector Add

use accel::*;
use accel_derive::kernel;

#[kernel]
unsafe fn add(a: *const f64, b: *const f64, c: *mut f64, n: usize) {
let i = accel_core::index();
if (i as usize) < n {
*c.offset(i) = *a.offset(i) + *b.offset(i);
}
}

fn main() -> error::Result<()> {
let device = Device::nth(0)?;
let ctx = device.create_context();

// Allocate memories on GPU
let n = 32;
let mut a = DeviceMemory::<f64>::new(&ctx, n);
let mut b = DeviceMemory::<f64>::new(&ctx, n);
let mut c = DeviceMemory::<f64>::new(&ctx, n);

// Accessible from CPU as usual Rust slice (though this will be slow)
for i in 0..n {
a[i] = i as f64;
b[i] = 2.0 * i as f64;
}
println!("a = {:?}", a.as_slice());
println!("b = {:?}", b.as_slice());

// Launch kernel synchronously
add(&ctx,
1 /* grid */,
n /* block */,
&(&a.as_ptr(), &b.as_ptr(), &c.as_mut_ptr(), &n)
).expect("Kernel call failed");

println!("c = {:?}", c.as_slice());
Ok(())
}

Assertion on GPU

use accel::*;
use accel_derive::kernel;

#[kernel]
fn assert() {
accel_core::assert_eq!(1 + 2, 4);  // will fail
}

fn main() -> error::Result<()> {
let device = Device::nth(0)?;
let ctx = device.create_context();
let result = assert(&ctx, 1 /* grid */, 4 /* block */, &());
assert!(result.is_err()); // assertion failed
Ok(())
}

Print from GPU

use accel::*;
use accel_derive::kernel;

#[kernel]
pub fn print() {
let i = accel_core::index();
accel_core::println!("Hello from {}", i);
}

fn main() -> error::Result<()> {
let device = Device::nth(0)?;
let ctx = device.create_context();
print(&ctx, 1, 4, &())?;
Ok(())
}

Advanced Examples

Get compiled PTX as String

The proc-macro #[kernel] creates a submodule add:: in addition to a function add. Kernel Rust code is compiled into PTX string using rustc's nvptx64-nvidia-cuda toolchain. Generated PTX string is embedded into proc-macro output as {kernel_name}::PTX_STR.

use accel_derive::kernel;

#[kernel]
unsafe fn add(a: *const f64, b: *const f64, c: *mut f64, n: usize) {
let i = accel_core::index();
if (i as usize) < n {
*c.offset(i) = *a.offset(i) + *b.offset(i);
}
}

fn main() {
// PTX assembler code is embedded as `add::PTX_STR`
println!("{}", add::PTX_STR);
}

Asynchronous launch

#[kernel] creates assert::Module type definition which implements Launchable trait. This struct will read PTX_STR using Module.

use accel::*;
use accel_derive::kernel;

#[kernel]
fn assert() {
accel_core::assert_eq!(1 + 2, 4);
}

fn main() -> error::Result<()> {
let device = Device::nth(0)?;
let ctx = device.create_context();
let stream = Stream::new(&ctx);

let module = assert::Module::new(&ctx)?;
module.stream_launch(&stream, 1, 4, &())?; // lanch will succeed
assert!(stream.sync().is_err()); // assertion failed is detected in next sync
Ok(())
}