1#![allow(dead_code, unused_imports)]
8
9use super::intrinsics;
10use crate::cfg::ComputeCapability;
11use crate::gpu_only;
12use core::sync::atomic::Ordering::{self, *};
13use paste::paste;
14
15fn ge_sm70() -> bool {
16 ComputeCapability::from_cuda_arch_env() >= ComputeCapability::Compute70
17}
18
19#[gpu_only]
20pub fn device_thread_fence(ordering: Ordering) {
21 unsafe {
22 if ge_sm70() {
23 if ordering == SeqCst {
24 return intrinsics::fence_sc_device();
25 }
26
27 if ordering == Relaxed {
28 return;
29 }
30
31 intrinsics::fence_acqrel_device();
32 } else if ordering != Relaxed {
33 intrinsics::membar_device();
34 }
35 }
36}
37
38#[gpu_only]
39pub fn block_thread_fence(ordering: Ordering) {
40 unsafe {
41 if ge_sm70() {
42 if ordering == SeqCst {
43 return intrinsics::fence_sc_block();
44 }
45
46 if ordering == Relaxed {
47 return;
48 }
49
50 intrinsics::fence_acqrel_block();
51 } else if ordering != Relaxed {
52 intrinsics::membar_block();
53 }
54 }
55}
56
57#[gpu_only]
58pub fn system_thread_fence(ordering: Ordering) {
59 unsafe {
60 if ge_sm70() {
61 if ordering == SeqCst {
62 return intrinsics::fence_sc_system();
63 }
64
65 if ordering == Relaxed {
66 return;
67 }
68
69 intrinsics::fence_acqrel_system();
70 } else if ordering != Relaxed {
71 intrinsics::membar_system();
72 }
73 }
74}
75
76macro_rules! load {
77 ($($type:ty, $width:literal, $scope:ident),* $(,)?) => {
78 $(
79 paste! {
80 #[$crate::gpu_only]
81 #[allow(clippy::missing_safety_doc)]
82 pub unsafe fn [<atomic_load_ $width _ $scope>](ptr: *mut $type, ordering: Ordering) -> $type {
83 if ge_sm70() {
84 match ordering {
85 SeqCst => {
86 intrinsics::[<fence_sc_ $scope>]();
87 intrinsics::[<atomic_load_acquire_ $width _ $scope>](ptr)
88 },
89 Acquire => {
90 intrinsics::[<atomic_load_acquire_ $width _ $scope>](ptr)
91 }
92 Relaxed => {
93 intrinsics::[<atomic_load_relaxed_ $width _ $scope>](ptr)
94 },
95 _ => panic!("Invalid Ordering for atomic load")
96 }
97 } else {
98 match ordering {
99 SeqCst => {
100 intrinsics::[<membar_ $scope>]();
101 let val = intrinsics::[<atomic_load_volatile_ $width _ $scope>](ptr);
102 intrinsics::[<membar_ $scope>]();
103 val
104 },
105 Acquire => {
106 let val = intrinsics::[<atomic_load_volatile_ $width _ $scope>](ptr);
107 intrinsics::[<membar_ $scope>]();
108 val
109 }
110 Relaxed => {
111 intrinsics::[<atomic_load_volatile_ $width _ $scope>](ptr)
112 },
113 _ => panic!("Invalid Ordering for atomic load")
114 }
115 }
116 }
117 }
118 )*
119 }
120}
121
122#[rustfmt::skip]
123load!(
124 u32, 32, device,
125 u64, 64, device,
126 u32, 32, block,
127 u64, 64, block,
128 u32, 32, system,
129 u64, 64, system,
130);
131
132macro_rules! store {
133 ($($type:ty, $width:literal, $scope:ident),* $(,)?) => {
134 $(
135 paste! {
136 #[$crate::gpu_only]
137 #[allow(clippy::missing_safety_doc)]
138 pub unsafe fn [<atomic_store_ $width _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) {
139 if ge_sm70() {
140 match ordering {
141 SeqCst => {
142 intrinsics::[<fence_sc_ $scope>]();
143 intrinsics::[<atomic_store_relaxed_ $width _ $scope>](ptr, val);
144 },
145 Release => {
146 intrinsics::[<atomic_store_release_ $width _ $scope>](ptr, val);
147 }
148 Relaxed => {
149 intrinsics::[<atomic_store_relaxed_ $width _ $scope>](ptr, val);
150 },
151 _ => panic!("Invalid Ordering for atomic store")
152 }
153 } else {
154 match ordering {
155 SeqCst | Release => {
156 intrinsics::[<membar_ $scope>]();
157 intrinsics::[<atomic_store_volatile_ $width _ $scope>](ptr, val);
158 },
159 Relaxed => {
160 intrinsics::[<atomic_store_volatile_ $width _ $scope>](ptr, val);
161 },
162 _ => panic!("Invalid Ordering for atomic store")
163 }
164 }
165 }
166 }
167 )*
168 }
169}
170
171#[rustfmt::skip]
172store!(
173 u32, 32, device,
174 u64, 64, device,
175 u32, 32, block,
176 u64, 64, block,
177 u32, 32, system,
178 u64, 64, system,
179);
180
181macro_rules! inner_fetch_ops_1_param {
182 ($($type:ty, $op:ident, $scope:ident),* $(,)?) => {
183 $(
184 paste! {
185 #[$crate::gpu_only]
186 #[allow(clippy::missing_safety_doc)]
187 pub unsafe fn [<atomic_fetch_ $op _ $type _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) -> $type {
188 if ge_sm70() {
189 match ordering {
190 SeqCst => {
191 intrinsics::[<fence_sc_ $scope>]();
192 intrinsics::[<atomic_fetch_ $op _acquire_ $type _ $scope>](ptr, val)
193 },
194 Acquire => intrinsics::[<atomic_fetch_ $op _acquire_ $type _ $scope>](ptr, val),
195 AcqRel => intrinsics::[<atomic_fetch_ $op _acqrel_ $type _ $scope>](ptr, val),
196 Release => intrinsics::[<atomic_fetch_ $op _release_ $type _ $scope>](ptr, val),
197 Relaxed => intrinsics::[<atomic_fetch_ $op _relaxed_ $type _ $scope>](ptr, val),
198 _ => unimplemented!("Weird ordering added by core")
199 }
200 } else {
201 match ordering {
202 SeqCst | AcqRel => {
203 intrinsics::[<membar_ $scope>]();
204 let val = intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val);
205 intrinsics::[<membar_ $scope>]();
206 val
207 },
208 Acquire => {
209 let val = intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val);
210 intrinsics::[<membar_ $scope>]();
211 val
212 },
213 Release => {
214 intrinsics::[<membar_ $scope>]();
215 intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val)
216 },
217 Relaxed => {
218 intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val)
219 },
220 _ => unimplemented!("Weird ordering added by core")
221 }
222 }
223 }
224 }
225 )*
226 }
227}
228
229macro_rules! fetch_ops_1_param {
230 ($($op:ident => ($($type:ident),*)),* $(,)?) => {
231 $(
232 inner_fetch_ops_1_param!(
234 $(
235 $type, $op, block,
236 $type, $op, device,
237 $type, $op, system,
238 )*
239 );
240 )*
241 };
242}
243
244fetch_ops_1_param! {
245 and => (u32, u64, i32, i64, f32, f64),
246 or => (u32, u64, i32, i64, f32, f64),
247 xor => (u32, u64, i32, i64, f32, f64),
248 add => (u32, u64, i32, i64, f32, f64),
249 sub => (u32, u64, i32, i64, f32, f64),
250 min => (u32, u64, i32, i64),
251 max => (u32, u64, i32, i64),
252 exch => (u32, u64, i32, i64, f32, f64),
253}