[][src]Crate accel

GPGPU framework for Rust based on CUDA Driver API

Basic Examples

Vector Add

use accel::*;
use accel_derive::kernel;

#[kernel]
unsafe fn add(a: *const f64, b: *const f64, c: *mut f64, n: usize) {
    let i = accel_core::index();
    if (i as usize) < n {
        *c.offset(i) = *a.offset(i) + *b.offset(i);
    }
}

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();

    // Allocate memories on GPU
    let n = 32;
    let mut a = DeviceMemory::<f64>::new(&ctx, n);
    let mut b = DeviceMemory::<f64>::new(&ctx, n);
    let mut c = DeviceMemory::<f64>::new(&ctx, n);

    // Accessible from CPU as usual Rust slice (though this will be slow)
    for i in 0..n {
        a[i] = i as f64;
        b[i] = 2.0 * i as f64;
    }
    println!("a = {:?}", a.as_slice());
    println!("b = {:?}", b.as_slice());

    // Launch kernel synchronously
    add(&ctx,
        1 /* grid */,
        n /* block */,
        &(&a.as_ptr(), &b.as_ptr(), &c.as_mut_ptr(), &n)
    ).expect("Kernel call failed");

    println!("c = {:?}", c.as_slice());
    Ok(())
}

Assertion on GPU

use accel::*;
use accel_derive::kernel;

#[kernel]
fn assert() {
    accel_core::assert_eq!(1 + 2, 4);  // will fail
}

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();
    let result = assert(&ctx, 1 /* grid */, 4 /* block */, &());
    assert!(result.is_err()); // assertion failed
    Ok(())
}
use accel::*;
use accel_derive::kernel;

#[kernel]
pub fn print() {
    let i = accel_core::index();
    accel_core::println!("Hello from {}", i);
}

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();
    print(&ctx, 1, 4, &())?;
    Ok(())
}

Advanced Examples

Get compiled PTX as String

The proc-macro #[kernel] creates a submodule add:: in addition to a function add. Kernel Rust code is compiled into PTX string using rustc's nvptx64-nvidia-cuda toolchain. Generated PTX string is embedded into proc-macro output as {kernel_name}::PTX_STR.

use accel_derive::kernel;

#[kernel]
unsafe fn add(a: *const f64, b: *const f64, c: *mut f64, n: usize) {
    let i = accel_core::index();
    if (i as usize) < n {
        *c.offset(i) = *a.offset(i) + *b.offset(i);
    }
}

fn main() {
    // PTX assembler code is embedded as `add::PTX_STR`
    println!("{}", add::PTX_STR);
}

Asynchronous launch

#[kernel] creates assert::Module type definition which implements Launchable trait. This struct will read PTX_STR using Module.

use accel::*;
use accel_derive::kernel;

#[kernel]
fn assert() {
    accel_core::assert_eq!(1 + 2, 4);
}

fn main() -> error::Result<()> {
    let device = Device::nth(0)?;
    let ctx = device.create_context();
    let stream = Stream::new(&ctx);

    let module = assert::Module::new(&ctx)?;
    module.stream_launch(&stream, 1, 4, &())?; // lanch will succeed
    assert!(stream.sync().is_err()); // assertion failed is detected in next sync
    Ok(())
}

Re-exports

pub use array::*;
pub use device::*;
pub use linker::*;
pub use memory::*;
pub use module::*;
pub use stream::*;

Modules

array

CUDA Array and Texture, Surface Objects

device

CUDA Device and Context

error
linker

CUDA JIT compiler and Linkers

memory

Device and Host memory handlers

module

CUDA Module (i.e. loaded PTX or cubin)

stream

Macros

ffi_call
ffi_new