cuda_std/atomic/
mid.rs

1//! Mid-level intrinsics that take an ordering parameter and emulate specialized
2//! instructions when not available (on lower compute capabilities).
3//!
4//! All functions are gpu-only, they do not work on the CPU.
5
6// rustc thinks we don't use things because of gpu_only
7#![allow(dead_code, unused_imports)]
8
9use super::intrinsics;
10use crate::cfg::ComputeCapability;
11use crate::gpu_only;
12use core::sync::atomic::Ordering::{self, *};
13use paste::paste;
14
15fn ge_sm70() -> bool {
16    ComputeCapability::from_cuda_arch_env() >= ComputeCapability::Compute70
17}
18
19#[gpu_only]
20pub fn device_thread_fence(ordering: Ordering) {
21    unsafe {
22        if ge_sm70() {
23            if ordering == SeqCst {
24                return intrinsics::fence_sc_device();
25            }
26
27            if ordering == Relaxed {
28                return;
29            }
30
31            intrinsics::fence_acqrel_device();
32        } else if ordering != Relaxed {
33            intrinsics::membar_device();
34        }
35    }
36}
37
38#[gpu_only]
39pub fn block_thread_fence(ordering: Ordering) {
40    unsafe {
41        if ge_sm70() {
42            if ordering == SeqCst {
43                return intrinsics::fence_sc_block();
44            }
45
46            if ordering == Relaxed {
47                return;
48            }
49
50            intrinsics::fence_acqrel_block();
51        } else if ordering != Relaxed {
52            intrinsics::membar_block();
53        }
54    }
55}
56
57#[gpu_only]
58pub fn system_thread_fence(ordering: Ordering) {
59    unsafe {
60        if ge_sm70() {
61            if ordering == SeqCst {
62                return intrinsics::fence_sc_system();
63            }
64
65            if ordering == Relaxed {
66                return;
67            }
68
69            intrinsics::fence_acqrel_system();
70        } else if ordering != Relaxed {
71            intrinsics::membar_system();
72        }
73    }
74}
75
76macro_rules! load {
77    ($($type:ty, $width:literal, $scope:ident),* $(,)?) => {
78        $(
79            paste! {
80                #[$crate::gpu_only]
81                #[allow(clippy::missing_safety_doc)]
82                pub unsafe fn [<atomic_load_ $width _ $scope>](ptr: *mut $type, ordering: Ordering) -> $type {
83                    if ge_sm70() {
84                        match ordering {
85                            SeqCst => {
86                                intrinsics::[<fence_sc_ $scope>]();
87                                intrinsics::[<atomic_load_acquire_ $width _ $scope>](ptr)
88                            },
89                            Acquire => {
90                                intrinsics::[<atomic_load_acquire_ $width _ $scope>](ptr)
91                            }
92                            Relaxed => {
93                                intrinsics::[<atomic_load_relaxed_ $width _ $scope>](ptr)
94                            },
95                            _ => panic!("Invalid Ordering for atomic load")
96                        }
97                    } else {
98                        match ordering {
99                            SeqCst => {
100                                intrinsics::[<membar_ $scope>]();
101                                let val = intrinsics::[<atomic_load_volatile_ $width _ $scope>](ptr);
102                                intrinsics::[<membar_ $scope>]();
103                                val
104                            },
105                            Acquire => {
106                                let val = intrinsics::[<atomic_load_volatile_ $width _ $scope>](ptr);
107                                intrinsics::[<membar_ $scope>]();
108                                val
109                            }
110                            Relaxed => {
111                                intrinsics::[<atomic_load_volatile_ $width _ $scope>](ptr)
112                            },
113                            _ => panic!("Invalid Ordering for atomic load")
114                        }
115                    }
116                }
117            }
118        )*
119    }
120}
121
122#[rustfmt::skip]
123load!(
124    u32, 32, device,
125    u64, 64, device,
126    u32, 32, block,
127    u64, 64, block,
128    u32, 32, system,
129    u64, 64, system,
130);
131
132macro_rules! store {
133    ($($type:ty, $width:literal, $scope:ident),* $(,)?) => {
134        $(
135            paste! {
136                #[$crate::gpu_only]
137                #[allow(clippy::missing_safety_doc)]
138                pub unsafe fn [<atomic_store_ $width _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) {
139                    if ge_sm70() {
140                        match ordering {
141                            SeqCst => {
142                                intrinsics::[<fence_sc_ $scope>]();
143                                intrinsics::[<atomic_store_relaxed_ $width _ $scope>](ptr, val);
144                            },
145                            Release => {
146                                intrinsics::[<atomic_store_release_ $width _ $scope>](ptr, val);
147                            }
148                            Relaxed => {
149                                intrinsics::[<atomic_store_relaxed_ $width _ $scope>](ptr, val);
150                            },
151                            _ => panic!("Invalid Ordering for atomic store")
152                        }
153                    } else {
154                        match ordering {
155                            SeqCst | Release => {
156                                intrinsics::[<membar_ $scope>]();
157                                intrinsics::[<atomic_store_volatile_ $width _ $scope>](ptr, val);
158                            },
159                            Relaxed => {
160                                intrinsics::[<atomic_store_volatile_ $width _ $scope>](ptr, val);
161                            },
162                            _ => panic!("Invalid Ordering for atomic store")
163                        }
164                    }
165                }
166            }
167        )*
168    }
169}
170
171#[rustfmt::skip]
172store!(
173    u32, 32, device,
174    u64, 64, device,
175    u32, 32, block,
176    u64, 64, block,
177    u32, 32, system,
178    u64, 64, system,
179);
180
181macro_rules! inner_fetch_ops_1_param {
182    ($($type:ty, $op:ident, $scope:ident),* $(,)?) => {
183        $(
184            paste! {
185                #[$crate::gpu_only]
186                #[allow(clippy::missing_safety_doc)]
187                pub unsafe fn [<atomic_fetch_ $op _ $type _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) -> $type {
188                    if ge_sm70() {
189                        match ordering {
190                            SeqCst => {
191                                intrinsics::[<fence_sc_ $scope>]();
192                                intrinsics::[<atomic_fetch_ $op _acquire_ $type _ $scope>](ptr, val)
193                            },
194                            Acquire => intrinsics::[<atomic_fetch_ $op _acquire_ $type _ $scope>](ptr, val),
195                            AcqRel => intrinsics::[<atomic_fetch_ $op _acqrel_ $type _ $scope>](ptr, val),
196                            Release => intrinsics::[<atomic_fetch_ $op _release_ $type _ $scope>](ptr, val),
197                            Relaxed => intrinsics::[<atomic_fetch_ $op _relaxed_ $type _ $scope>](ptr, val),
198                            _ => unimplemented!("Weird ordering added by core")
199                        }
200                    } else {
201                        match ordering {
202                            SeqCst | AcqRel => {
203                                intrinsics::[<membar_ $scope>]();
204                                let val = intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val);
205                                intrinsics::[<membar_ $scope>]();
206                                val
207                            },
208                            Acquire => {
209                                let val = intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val);
210                                intrinsics::[<membar_ $scope>]();
211                                val
212                            },
213                            Release => {
214                                intrinsics::[<membar_ $scope>]();
215                                intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val)
216                            },
217                            Relaxed => {
218                                intrinsics::[<atomic_fetch_ $op _volatile_ $type _ $scope>](ptr, val)
219                            },
220                            _ => unimplemented!("Weird ordering added by core")
221                        }
222                    }
223                }
224            }
225        )*
226    }
227}
228
229macro_rules! fetch_ops_1_param {
230    ($($op:ident => ($($type:ident),*)),* $(,)?) => {
231        $(
232            // every atomic function has a block, device, and system variant
233            inner_fetch_ops_1_param!(
234                $(
235                    $type, $op, block,
236                    $type, $op, device,
237                    $type, $op, system,
238                )*
239            );
240        )*
241    };
242}
243
244fetch_ops_1_param! {
245    and => (u32, u64, i32, i64, f32, f64),
246    or => (u32, u64, i32, i64, f32, f64),
247    xor => (u32, u64, i32, i64, f32, f64),
248    add => (u32, u64, i32, i64, f32, f64),
249    sub => (u32, u64, i32, i64, f32, f64),
250    min => (u32, u64, i32, i64),
251    max => (u32, u64, i32, i64),
252    exch => (u32, u64, i32, i64, f32, f64),
253}