1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
//! GPGPU framework for Rust based on [CUDA Driver API] //! //! [CUDA Driver API]: https://docs.nvidia.com/cuda/cuda-driver-api/ //! //! Setup //! ----- //! Currently (0.3.0), accel works only on Linux system. Windows support will come in future release (0.3.x or 0.4~). //! //! 1. Install [CUDA](https://developer.nvidia.com/cuda-downloads) on your system //! 2. Setup Rust environement using rustup (Requires 1.42 or later) //! 3. Add `nvptx64-nvidia-cuda` target and install `ptx-linker`, or run //! //! ```shell //! curl -sSL https://gitlab.com/termoshtt/accel/raw/master/setup_nvptx_toolchain.sh | bash //! ``` //! //! Examples //! -------- //! accel works with stable Rust //! //! ```toml //! [dependencies] //! accel = "=0.3.0-alpha.2" //! ``` //! //! Do **NOT** add `accel-core` to `[dependencies]`. //! It will be linked automatically into the device code. //! //! ### Vector Add //! //! ``` //! use accel::*; //! //! #[kernel] //! unsafe fn add(a: *const f32, b: *const f32, c: *mut f32, n: usize) { //! let i = accel_core::index(); //! if (i as usize) < n { //! *c.offset(i) = *a.offset(i) + *b.offset(i); //! } //! } //! //! fn main() -> error::Result<()> { //! let device = Device::nth(0)?; //! let ctx = device.create_context(); //! //! // Allocate memories on GPU //! let n = 32; //! let mut a = DeviceMemory::<f32>::zeros(ctx.clone(), n); //! let mut b = DeviceMemory::<f32>::zeros(ctx.clone(), n); //! let mut c = DeviceMemory::<f32>::zeros(ctx.clone(), n); //! //! // Accessible from CPU as usual Rust slice (though this will be slow) //! for i in 0..n { //! a[i] = i as f32; //! b[i] = 2.0 * i as f32; //! } //! println!("a = {:?}", a.as_slice()); //! println!("b = {:?}", b.as_slice()); //! //! // Launch kernel synchronously //! add(ctx, //! 1 /* grid */, //! n /* block */, //! &(&a.as_ptr(), &b.as_ptr(), &c.as_mut_ptr(), &n) //! ).expect("Kernel call failed"); //! //! println!("c = {:?}", c.as_slice()); //! Ok(()) //! } //! ``` //! //! ### Assertion on GPU //! //! ``` //! use accel::*; //! //! #[kernel] //! fn assert() { //! accel_core::assert_eq!(1 + 2, 4); // will fail //! } //! //! fn main() -> error::Result<()> { //! let device = Device::nth(0)?; //! let ctx = device.create_context(); //! let result = assert(ctx, 1 /* grid */, 4 /* block */, &()); //! assert!(result.is_err()); // assertion failed //! Ok(()) //! } //! ``` //! //! ### Print from GPU //! //! ``` //! use accel::*; //! //! #[kernel] //! pub fn print() { //! let i = accel_core::index(); //! accel_core::println!("Hello from {}", i); //! } //! //! fn main() -> error::Result<()> { //! let device = Device::nth(0)?; //! let ctx = device.create_context(); //! print(ctx, 1, 4, &())?; //! Ok(()) //! } //! ``` extern crate cuda_driver_sys as cuda; pub use accel_derive::kernel; pub mod device; pub mod error; pub mod linker; pub mod memory; pub mod module; pub mod profiler; pub mod stream; pub use device::*; pub use linker::*; pub use memory::*; pub use module::*; pub use profiler::*; pub use stream::*;