1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
//! GPGPU framework for Rust based on [CUDA Driver API]
//!
//! [CUDA Driver API]: https://docs.nvidia.com/cuda/cuda-driver-api/
//!
//! Setup
//! -----
//! Currently (0.3.0), accel works only on Linux system. Windows support will come in future release (0.3.x or 0.4~).
//!
//! 1. Install [CUDA](https://developer.nvidia.com/cuda-downloads) on your system
//! 2. Setup Rust environement using rustup (Requires 1.42 or later)
//! 3. Add `nvptx64-nvidia-cuda` target and install `ptx-linker`, or run
//!
//! ```shell
//! curl -sSL https://gitlab.com/termoshtt/accel/raw/master/setup_nvptx_toolchain.sh | bash
//! ```
//!
//! Examples
//! --------
//! accel works with stable Rust
//!
//! ```toml
//! [dependencies]
//! accel = "=0.3.0-alpha.2"
//! ```
//!
//! Do **NOT** add `accel-core` to `[dependencies]`.
//! It will be linked automatically into the device code.
//!
//! ### Vector Add
//!
//! ```
//! use accel::*;
//!
//! #[kernel]
//! unsafe fn add(a: *const f32, b: *const f32, c: *mut f32, n: usize) {
//! let i = accel_core::index();
//! if (i as usize) < n {
//! *c.offset(i) = *a.offset(i) + *b.offset(i);
//! }
//! }
//!
//! fn main() -> error::Result<()> {
//! let device = Device::nth(0)?;
//! let ctx = device.create_context();
//!
//! // Allocate memories on GPU
//! let n = 32;
//! let mut a = DeviceMemory::<f32>::zeros(ctx.clone(), n);
//! let mut b = DeviceMemory::<f32>::zeros(ctx.clone(), n);
//! let mut c = DeviceMemory::<f32>::zeros(ctx.clone(), n);
//!
//! // Accessible from CPU as usual Rust slice (though this will be slow)
//! for i in 0..n {
//! a[i] = i as f32;
//! b[i] = 2.0 * i as f32;
//! }
//! println!("a = {:?}", a.as_slice());
//! println!("b = {:?}", b.as_slice());
//!
//! // Launch kernel synchronously
//! add(ctx,
//! 1 /* grid */,
//! n /* block */,
//! &(&a.as_ptr(), &b.as_ptr(), &c.as_mut_ptr(), &n)
//! ).expect("Kernel call failed");
//!
//! println!("c = {:?}", c.as_slice());
//! Ok(())
//! }
//! ```
//!
//! ### Assertion on GPU
//!
//! ```
//! use accel::*;
//!
//! #[kernel]
//! fn assert() {
//! accel_core::assert_eq!(1 + 2, 4); // will fail
//! }
//!
//! fn main() -> error::Result<()> {
//! let device = Device::nth(0)?;
//! let ctx = device.create_context();
//! let result = assert(ctx, 1 /* grid */, 4 /* block */, &());
//! assert!(result.is_err()); // assertion failed
//! Ok(())
//! }
//! ```
//!
//! ### Print from GPU
//!
//! ```
//! use accel::*;
//!
//! #[kernel]
//! pub fn print() {
//! let i = accel_core::index();
//! accel_core::println!("Hello from {}", i);
//! }
//!
//! fn main() -> error::Result<()> {
//! let device = Device::nth(0)?;
//! let ctx = device.create_context();
//! print(ctx, 1, 4, &())?;
//! Ok(())
//! }
//! ```
extern crate cuda_driver_sys as cuda;
pub use kernel;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;