1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//! Module for GPU Acceleration of Linear Algebra operations.
//!
//! gpurs provides two main objects for GPU acceleration: MemoryCalculator and QuickCalculator.
//!
//! - MemoryCalculator requires all inputs to be stored in memory ahead of time, and keeps all outputs stored for future use.
//! This approach is less memory efficient, but can be faster if you plan to chain together a lot of operations.
//!
//! - QuickCalculator allows inputs to be pre-stored in memory, but also accepts matrices directly for certain function calls.
//! QuickCalculator does not keep outputs stored for future use. This approach is more memory efficient and is much better if you
//! don't necessarily want to keep all of your inputs/outputs stored in GPU memory for the entire lifetime of the calculator.
//!
//! These objects also allow custom kernels to be added to their own internal memory, so you can take advantage of gpurs' built-in
//! kernel compilation and memory management for your own kernels.
pub use MemoryCalculator;
pub use MemoryParameterFunction;
pub use QuickCalculator;
pub use QuickParameterFunction;
// Default amount of memory slots for matrices in MemoryCalculator and QuickCalculator
const INIT_MEMORY_CAPACITY: usize = 3;
// List of default kernel names
const PROGRAM_LIST_FLOAT: = ;
const PROGRAM_LIST_DOUBLE: = ;
// Source code for default kernels
const PROGRAM_SOURCE_FLOAT: &str = r#"
kernel void mat_mul (
global float* c,
const int N,
const int K,
const global float* a,
const global float* b
) {
const int globalRow = get_global_id(0);
const int globalCol = get_global_id(1);
float interm = 0.0f;
for (int k = 0; k < K; k++) {
interm += a[globalRow * K + k] * b[k * N + globalCol];
}
c[globalRow * N + globalCol] = interm;
}
"#;
const PROGRAM_SOURCE_DOUBLE: &str = r#"
kernel void mat_mul (
global double* c,
const int N,
const int K,
const global double* a,
const global double* b
) {
const int globalRow = get_global_id(0);
const int globalCol = get_global_id(1);
double interm = 0.0;
for (int k = 0; k < K; k++) {
interm += a[globalRow * K + k] * b[k * N + globalCol];
}
c[globalRow * N + globalCol] = interm;
}
"#;