1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
use super::api::{
build_program, create_kernels_in_program, create_program_with_source, release_mem_object,
set_kernel_arg, Kernel,
};
use crate::{BufFlag, CLDevice, Device, Error, Node, Valid};
use std::{any::TypeId, cell::RefCell, collections::HashMap, ffi::c_void, rc::Rc};
#[cfg(feature = "opencl")]
use crate::Buffer;
thread_local! {
pub static CL_CACHE: RefCell<CLCache> = RefCell::new(CLCache {
nodes: HashMap::new(),
arg_kernel_cache: HashMap::new(),
kernel_cache: HashMap::new()
});
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct OclPtr(pub *mut c_void);
unsafe impl Send for OclPtr {}
unsafe impl Sync for OclPtr {}
#[derive(Debug)]
pub struct RawCL {
pub ptr: *mut c_void,
pub host_ptr: *mut usize,
}
impl Drop for RawCL {
fn drop(&mut self) {
unsafe { release_mem_object(self.ptr).unwrap() };
}
}
type KernelIdent = (Vec<OclPtr>, Vec<TypeId>, Option<OclPtr>, String);
#[derive(Debug)]
pub struct CLCache {
pub nodes: HashMap<Node, (RawCL, Rc<Valid>)>,
pub(crate) arg_kernel_cache: HashMap<KernelIdent, Kernel>,
pub(crate) kernel_cache: HashMap<String, Kernel>,
}
impl CLCache {
pub fn add_node<T>(&mut self, device: &CLDevice, node: Node) -> Buffer<T> {
let ptr: (*mut T, *mut c_void, _) = device.alloc(node.len);
let valid = Rc::new(Valid);
let out = Buffer {
ptr,
len: node.len,
flag: BufFlag::Cache(Rc::downgrade(&valid)),
};
self.nodes.insert(node, (
RawCL {
ptr: ptr.1,
host_ptr: ptr.0 as *mut usize,
}, valid)
);
out
}
pub fn get<T>(device: &CLDevice, len: usize) -> Buffer<T> {
let node = Node::new(len);
CL_CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
let buf_info_option = cache.nodes.get(&node);
match buf_info_option {
Some(buf_info) => Buffer {
ptr: (buf_info.0.host_ptr as *mut T, buf_info.0.ptr, 0),
len,
flag: BufFlag::Cache(Rc::downgrade(&buf_info.1))
},
None => cache.add_node(device, node),
}
})
}
pub(crate) fn arg_kernel_cache<T: 'static>(
&mut self,
device: &CLDevice,
buffers: &[(&Buffer<T>, usize)],
numbers: &[(T, usize)],
output: Option<&Buffer<T>>,
src: String,
) -> Result<Kernel, Error> {
let type_ids = vec![TypeId::of::<T>(); numbers.len()];
let mems: Vec<OclPtr> = buffers
.iter()
.map(|matrix| OclPtr(matrix.0.ptr.1))
.collect();
let outputmem = output.map(|output| OclPtr(output.ptr.1));
let cache = &mut self.arg_kernel_cache;
let kernel = cache.get(&(mems.clone(), type_ids.clone(), outputmem, src.clone()));
match kernel {
Some(kernel) => Ok(*kernel),
None => {
let program = create_program_with_source(&device.ctx(), &src)?;
build_program(&program, &[device.device()], Some("-cl-std=CL1.2"))?;
let kernel = create_kernels_in_program(&program)?[0];
for (number, idx) in numbers {
set_kernel_arg(&kernel, *idx, number)?
}
for (buf, idx) in buffers {
set_kernel_arg(&kernel, *idx, &(buf.ptr.1))?;
}
if let Some(mem) = outputmem {
set_kernel_arg(&kernel, mems.len() + type_ids.len(), &mem)?;
}
cache.insert((mems, type_ids, outputmem, src), kernel);
Ok(kernel)
}
}
}
pub fn arg_kernel_cache1(&mut self, device: &CLDevice, src: String) -> Result<Kernel, Error> {
let kernel = self.kernel_cache.get(&src);
if let Some(kernel) = kernel {
return Ok(*kernel);
}
let program = create_program_with_source(&device.ctx(), &src)?;
build_program(&program, &[device.device()], Some("-cl-std=CL1.2"))?;
let kernel = create_kernels_in_program(&program)?[0];
self.kernel_cache.insert(src, kernel);
Ok(kernel)
}
}