[][src]Crate accel

GPGPU framework for Rust based on CUDA Driver API

Basic Examples

Vector Add

use accel::*;
use accel_derive::kernel;

unsafe fn add(a: *const f64, b: *const f64, c: *mut f64, n: usize) {
    let i = accel_core::index();
    if (i as usize) < n {
        *c.offset(i) = *a.offset(i) + *b.offset(i);

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();

    // Allocate memories on GPU
    let n = 32;
    let mut a = DeviceMemory::<f64>::new(&ctx, n);
    let mut b = DeviceMemory::<f64>::new(&ctx, n);
    let mut c = DeviceMemory::<f64>::new(&ctx, n);

    // Accessible from CPU as usual Rust slice (though this will be slow)
    for i in 0..n {
        a[i] = i as f64;
        b[i] = 2.0 * i as f64;
    println!("a = {:?}", a.as_slice());
    println!("b = {:?}", b.as_slice());

    // Launch kernel synchronously
        1 /* grid */,
        n /* block */,
        &(&a.as_ptr(), &b.as_ptr(), &c.as_mut_ptr(), &n)
    ).expect("Kernel call failed");

    println!("c = {:?}", c.as_slice());

Assertion on GPU

use accel::*;
use accel_derive::kernel;

fn assert() {
    accel_core::assert_eq!(1 + 2, 4);  // will fail

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();
    let result = assert(&ctx, 1 /* grid */, 4 /* block */, &());
    assert!(result.is_err()); // assertion failed
use accel::*;
use accel_derive::kernel;

pub fn print() {
    let i = accel_core::index();
    accel_core::println!("Hello from {}", i);

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();
    print(&ctx, 1, 4, &())?;

Advanced Examples

Get compiled PTX as String

The proc-macro #[kernel] creates a submodule add:: in addition to a function add. Kernel Rust code is compiled into PTX string using rustc's nvptx64-nvidia-cuda toolchain. Generated PTX string is embedded into proc-macro output as {kernel_name}::PTX_STR.

use accel_derive::kernel;

unsafe fn add(a: *const f64, b: *const f64, c: *mut f64, n: usize) {
    let i = accel_core::index();
    if (i as usize) < n {
        *c.offset(i) = *a.offset(i) + *b.offset(i);

fn main() {
    // PTX assembler code is embedded as `add::PTX_STR`
    println!("{}", add::PTX_STR);

Asynchronous launch

#[kernel] creates assert::Module type definition which implements Launchable trait. This struct will read PTX_STR using Module.

use accel::*;
use accel_derive::kernel;

fn assert() {
    accel_core::assert_eq!(1 + 2, 4);

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();
    let stream = Stream::new(&ctx);

    let module = assert::Module::new(&ctx)?;
    module.stream_launch(&stream, 1, 4, &())?; // lanch will succeed
    assert!(stream.sync().is_err()); // assertion failed is detected in next sync


pub use array::*;
pub use device::*;
pub use linker::*;
pub use memory::*;
pub use module::*;
pub use stream::*;



CUDA Array and Texture, Surface Objects


CUDA Device and Context


CUDA JIT compiler and Linkers


Device and Host memory handlers


CUDA Module (i.e. loaded PTX or cubin)


