atomic_maybe_uninit/arch/aarch64.rs
1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4AArch64
5
6See "Atomic operation overview by architecture" for atomic operations in this architecture:
7https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#aarch64
8
9Refs:
10- Arm A-profile A64 Instruction Set Architecture
11 https://developer.arm.com/documentation/ddi0602/2025-06
12- C/C++ Atomics Application Binary Interface Standard for the Arm® 64-bit Architecture
13 https://github.com/ARM-software/abi-aa/blob/2025Q1/atomicsabi64/atomicsabi64.rst
14- Arm® Compiler armasm User Guide
15 https://developer.arm.com/documentation/dui0801/latest
16- Arm® Architecture Reference Manual for A-profile architecture
17 https://developer.arm.com/documentation/ddi0487/latest (PDF)
18- Arm® Architecture Reference Manual Supplement Armv8, for R-profile AArch64 architecture
19 https://developer.arm.com/documentation/ddi0600/latest (PDF)
20- portable-atomic https://github.com/taiki-e/portable-atomic
21
22Generated asm:
23- aarch64 https://godbolt.org/z/e8Wesj5WP
24- aarch64 msvc https://godbolt.org/z/jcTW8Eafo
25- aarch64 (+lse) https://godbolt.org/z/999aq8jGE
26- aarch64 msvc (+lse) https://godbolt.org/z/TodsEnfz6
27- aarch64 (+lse,+lse2) https://godbolt.org/z/r1M5GYWEj
28- aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/c9dnqxM45
29- aarch64 (+rcpc) https://godbolt.org/z/Ezc49YK6h
30- aarch64 (+lse2,+lse128) https://godbolt.org/z/GMdboxzjc
31- aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/9beasofnd
32*/
33
34delegate_size!(delegate_all);
35
36use core::{
37 arch::asm,
38 mem::{self, MaybeUninit},
39 sync::atomic::Ordering,
40};
41
42use crate::{
43 raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap},
44 utils::{MaybeUninit128, Pair},
45};
46
47macro_rules! atomic_rmw {
48 ($op:ident, $order:ident) => {
49 atomic_rmw!($op, $order, write = $order)
50 };
51 ($op:ident, $order:ident, write = $write:ident) => {
52 match $order {
53 Ordering::Relaxed => $op!("", "", ""),
54 Ordering::Acquire => $op!("a", "", ""),
55 Ordering::Release => $op!("", "l", ""),
56 Ordering::AcqRel => $op!("a", "l", ""),
57 // In MSVC environments, SeqCst stores/writes needs fences after writes.
58 // https://reviews.llvm.org/D141748
59 #[cfg(target_env = "msvc")]
60 Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
61 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
62 Ordering::SeqCst => $op!("a", "l", ""),
63 _ => unreachable!(),
64 }
65 };
66}
67
68#[rustfmt::skip]
69macro_rules! atomic {
70 ($ty:ident, $suffix:tt, $val_modifier:tt, $cmp_ext:tt) => {
71 delegate_signed!(delegate_all, $ty);
72 impl AtomicLoad for $ty {
73 #[inline]
74 unsafe fn atomic_load(
75 src: *const MaybeUninit<Self>,
76 order: Ordering,
77 ) -> MaybeUninit<Self> {
78 debug_assert_atomic_unsafe_precondition!(src, $ty);
79 let out: MaybeUninit<Self>;
80
81 // SAFETY: the caller must uphold the safety contract.
82 unsafe {
83 macro_rules! atomic_load {
84 ($acquire:tt) => {
85 asm!(
86 concat!("ld", $acquire, "r", $suffix, " {out", $val_modifier, "}, [{src}]"), // atomic { out = *src }
87 src = in(reg) ptr_reg!(src),
88 out = lateout(reg) out,
89 options(nostack, preserves_flags),
90 )
91 };
92 }
93 match order {
94 Ordering::Relaxed => atomic_load!(""),
95 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC.
96 #[cfg(target_feature = "rcpc")]
97 Ordering::Acquire => atomic_load!("ap"),
98 #[cfg(not(target_feature = "rcpc"))]
99 Ordering::Acquire => atomic_load!("a"),
100 Ordering::SeqCst => atomic_load!("a"),
101 _ => unreachable!(),
102 }
103 }
104 out
105 }
106 }
107 impl AtomicStore for $ty {
108 #[inline]
109 unsafe fn atomic_store(
110 dst: *mut MaybeUninit<Self>,
111 val: MaybeUninit<Self>,
112 order: Ordering,
113 ) {
114 debug_assert_atomic_unsafe_precondition!(dst, $ty);
115
116 // SAFETY: the caller must uphold the safety contract.
117 unsafe {
118 macro_rules! atomic_store {
119 ($release:tt, $fence:tt) => {
120 asm!(
121 concat!("st", $release, "r", $suffix, " {val", $val_modifier, "}, [{dst}]"), // atomic { *dst = val }
122 $fence, // fence
123 dst = in(reg) ptr_reg!(dst),
124 val = in(reg) val,
125 options(nostack, preserves_flags),
126 )
127 };
128 }
129 match order {
130 Ordering::Relaxed => atomic_store!("", ""),
131 Ordering::Release => atomic_store!("l", ""),
132 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
133 #[cfg(not(target_env = "msvc"))]
134 Ordering::SeqCst => atomic_store!("l", ""),
135 // In MSVC environments, SeqCst stores/writes needs fences after writes.
136 // https://reviews.llvm.org/D141748
137 #[cfg(target_env = "msvc")]
138 Ordering::SeqCst => atomic_store!("l", "dmb ish"),
139 _ => unreachable!(),
140 }
141 }
142 }
143 }
144 impl AtomicSwap for $ty {
145 #[inline]
146 unsafe fn atomic_swap(
147 dst: *mut MaybeUninit<Self>,
148 val: MaybeUninit<Self>,
149 order: Ordering,
150 ) -> MaybeUninit<Self> {
151 debug_assert_atomic_unsafe_precondition!(dst, $ty);
152 let mut out: MaybeUninit<Self>;
153
154 // SAFETY: the caller must uphold the safety contract.
155 unsafe {
156 #[cfg(target_feature = "lse")]
157 macro_rules! swap {
158 ($acquire:tt, $release:tt, $fence:tt) => {
159 // Refs:
160 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWP--SWPA--SWPAL--SWPL--Swap-word-or-doubleword-in-memory-
161 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPB--SWPAB--SWPALB--SWPLB--Swap-byte-in-memory-
162 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPH--SWPAH--SWPALH--SWPLH--Swap-halfword-in-memory-
163 asm!(
164 concat!("swp", $acquire, $release, $suffix, " {val", $val_modifier, "}, {out", $val_modifier, "}, [{dst}]"), // atomic { _x = *dst; *dst = val; out = _x }
165 $fence, // fence
166 dst = in(reg) ptr_reg!(dst),
167 val = in(reg) val,
168 out = lateout(reg) out,
169 options(nostack, preserves_flags),
170 )
171 };
172 }
173 #[cfg(not(target_feature = "lse"))]
174 macro_rules! swap {
175 ($acquire:tt, $release:tt, $fence:tt) => {
176 asm!(
177 "2:", // 'retry:
178 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = *dst; EXCLUSIVE = dst }
179 concat!("st", $release, "xr", $suffix, " {r:w}, {val", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val; r = 0 } else { r = 1 }; EXCLUSIVE = None }
180 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
181 $fence, // fence
182 dst = in(reg) ptr_reg!(dst),
183 val = in(reg) val,
184 out = out(reg) out,
185 r = out(reg) _,
186 options(nostack, preserves_flags),
187 )
188 };
189 }
190 atomic_rmw!(swap, order);
191 }
192 out
193 }
194 }
195 impl AtomicCompareExchange for $ty {
196 #[inline]
197 unsafe fn atomic_compare_exchange(
198 dst: *mut MaybeUninit<Self>,
199 old: MaybeUninit<Self>,
200 new: MaybeUninit<Self>,
201 success: Ordering,
202 failure: Ordering,
203 ) -> (MaybeUninit<Self>, bool) {
204 debug_assert_atomic_unsafe_precondition!(dst, $ty);
205 let order = crate::utils::upgrade_success_ordering(success, failure);
206 let mut out: MaybeUninit<Self>;
207
208 // SAFETY: the caller must uphold the safety contract.
209 unsafe {
210 let mut r: i32;
211 #[cfg(target_feature = "lse")]
212 macro_rules! cmpxchg {
213 ($acquire:tt, $release:tt, $fence:tt) => {{
214 // Refs:
215 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CAS--CASA--CASAL--CASL--Compare-and-swap-word-or-doubleword-in-memory-
216 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASB--CASAB--CASALB--CASLB--Compare-and-swap-byte-in-memory-
217 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASH--CASAH--CASALH--CASLH--Compare-and-swap-halfword-in-memory-
218 asm!(
219 // cas writes the current value to the first register,
220 // so copy the `old`'s value for later comparison.
221 concat!("mov {out", $val_modifier, "}, {old", $val_modifier, "}"), // out = old
222 concat!("cas", $acquire, $release, $suffix, " {out", $val_modifier, "}, {new", $val_modifier, "}, [{dst}]"), // atomic { if *dst == out { *dst = new } else { out = *dst } }
223 $fence, // fence
224 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if out == old { Z = 1 } else { Z = 0 }
225 "cset {r:w}, eq", // r = Z
226 dst = in(reg) ptr_reg!(dst),
227 old = in(reg) old,
228 new = in(reg) new,
229 out = out(reg) out,
230 r = lateout(reg) r,
231 // Do not use `preserves_flags` because CMP modifies the condition flags.
232 options(nostack),
233 );
234 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
235 (out, r != 0)
236 }};
237 }
238 #[cfg(not(target_feature = "lse"))]
239 macro_rules! cmpxchg {
240 ($acquire:tt, $release:tt, $fence:tt) => {{
241 asm!(
242 "2:", // 'retry:
243 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = *dst; EXCLUSIVE = dst }
244 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if out == old { Z = 1 } else { Z = 0 }
245 "b.ne 3f", // if Z == 0 { jump 'cmp-fail }
246 concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
247 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
248 $fence, // fence
249 "b 4f", // jump 'success
250 "3:", // 'cmp-fail:
251 "mov {r:w}, #1", // r = 1
252 "clrex", // EXCLUSIVE = None
253 "4:", // 'success:
254 dst = in(reg) ptr_reg!(dst),
255 old = in(reg) old,
256 new = in(reg) new,
257 out = out(reg) out,
258 r = out(reg) r,
259 // Do not use `preserves_flags` because CMP modifies the condition flags.
260 options(nostack),
261 );
262 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
263 // 0 if the store was successful, 1 if no store was performed
264 (out, r == 0)
265 }};
266 }
267 atomic_rmw!(cmpxchg, order, write = success)
268 }
269 }
270 #[cfg(not(target_feature = "lse"))]
271 #[inline]
272 unsafe fn atomic_compare_exchange_weak(
273 dst: *mut MaybeUninit<Self>,
274 old: MaybeUninit<Self>,
275 new: MaybeUninit<Self>,
276 success: Ordering,
277 failure: Ordering,
278 ) -> (MaybeUninit<Self>, bool) {
279 debug_assert_atomic_unsafe_precondition!(dst, $ty);
280 let order = crate::utils::upgrade_success_ordering(success, failure);
281 let mut out: MaybeUninit<Self>;
282
283 // SAFETY: the caller must uphold the safety contract.
284 unsafe {
285 let r: i32;
286 macro_rules! cmpxchg_weak {
287 ($acquire:tt, $release:tt, $fence:tt) => {
288 asm!(
289 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = *dst; EXCLUSIVE = dst }
290 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if out == old { Z = 1 } else { Z = 0 }
291 "b.ne 3f", // if Z == 0 { jump 'cmp-fail }
292 concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
293 // TODO: emit fence only when the above sc succeed?
294 // "cbnz {r:w}, 4f",
295 $fence, // fence
296 "b 4f", // jump 'success
297 "3:", // 'cmp-fail:
298 "mov {r:w}, #1", // r = 1
299 "clrex", // EXCLUSIVE = None
300 "4:", // 'success:
301 dst = in(reg) ptr_reg!(dst),
302 old = in(reg) old,
303 new = in(reg) new,
304 out = out(reg) out,
305 r = out(reg) r,
306 // Do not use `preserves_flags` because CMP modifies the condition flags.
307 options(nostack),
308 )
309 };
310 }
311 atomic_rmw!(cmpxchg_weak, order, write = success);
312 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
313 // 0 if the store was successful, 1 if no store was performed
314 (out, r == 0)
315 }
316 }
317 }
318 };
319}
320
321atomic!(u8, "b", ":w", ", uxtb");
322atomic!(u16, "h", ":w", ", uxth");
323atomic!(u32, "", ":w", "");
324atomic!(u64, "", "", "");
325
326// There are a few ways to implement 128-bit atomic operations in AArch64.
327//
328// - LDXP/STXP loop (DW LL/SC)
329// - CASP (DWCAS) added as Armv8.1 FEAT_LSE (optional from Armv8.0, mandatory from Armv8.1)
330// - LDP/STP (DW load/store) if Armv8.4 FEAT_LSE2 (optional from Armv8.2, mandatory from Armv8.4) is available
331// - LDIAPP/STILP (DW acquire-load/release-store) added as Armv8.9 FEAT_LRCPC3 (optional from Armv8.2) (if FEAT_LSE2 is also available)
332// - LDCLRP/LDSETP/SWPP (DW RMW) added as Armv9.4 FEAT_LSE128 (optional from Armv9.3)
333//
334// If FEAT_LSE is available at compile-time, we use CASP for load/CAS. Otherwise, use LDXP/STXP loop.
335// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
336// If FEAT_LSE128 is available at compile-time, we use SWPP for swap/{release,seqcst}-store.
337// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
338//
339// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
340//
341// Refs:
342// - LDXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDXP--Load-exclusive-pair-of-registers-
343// - LDAXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDAXP--Load-acquire-exclusive-pair-of-registers-
344// - STXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STXP--Store-exclusive-pair-of-registers-
345// - STLXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STLXP--Store-release-exclusive-pair-of-registers-
346//
347// Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
348// operation (even load/store), a corresponding Store-Exclusive pair must succeed.
349// See Arm Architecture Reference Manual for A-profile architecture
350// Section B2.2.1 "Requirements for single-copy atomicity", and
351// Section B2.9 "Synchronization and semaphores" for more.
352macro_rules! atomic128 {
353 ($ty:ident) => {
354 delegate_signed!(delegate_all, $ty);
355 impl AtomicLoad for $ty {
356 #[inline]
357 unsafe fn atomic_load(
358 src: *const MaybeUninit<Self>,
359 order: Ordering,
360 ) -> MaybeUninit<Self> {
361 debug_assert_atomic_unsafe_precondition!(src, $ty);
362 let (mut prev_lo, mut prev_hi);
363
364 #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
365 // SAFETY: the caller must guarantee that `dst` is valid for reads,
366 // 16-byte aligned, that there are no concurrent non-atomic operations.
367 // the above cfg guarantee that the CPU supports FEAT_LSE2.
368 //
369 // Refs:
370 // - LDP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDP--Load-pair-of-registers-
371 // - LDIAPP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-pair-of-registers-
372 unsafe {
373 macro_rules! atomic_load_relaxed {
374 ($iap:tt, $dmb_ishld:tt) => {
375 asm!(
376 concat!("ld", $iap, "p {prev_lo}, {prev_hi}, [{src}]"), // atomic { prev_lo:prev_hi = *src }
377 $dmb_ishld, // fence
378 src = in(reg) ptr_reg!(src),
379 prev_hi = lateout(reg) prev_hi,
380 prev_lo = lateout(reg) prev_lo,
381 options(nostack, preserves_flags),
382 )
383 };
384 }
385 match order {
386 // if FEAT_LRCPC3 && order != relaxed => ldiapp
387 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
388 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
389 Ordering::Acquire => atomic_load_relaxed!("iap", ""),
390 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
391 Ordering::SeqCst => {
392 asm!(
393 // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
394 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
395 "ldar {tmp}, [{src}]", // atomic { tmp = *src }
396 "ldiapp {prev_lo}, {prev_hi}, [{src}]", // atomic { prev_lo:prev_hi = *src }
397 src = in(reg) ptr_reg!(src),
398 prev_hi = lateout(reg) prev_hi,
399 prev_lo = lateout(reg) prev_lo,
400 tmp = out(reg) _,
401 options(nostack, preserves_flags),
402 );
403 }
404
405 // else => ldp
406 Ordering::Relaxed => atomic_load_relaxed!("", ""),
407 #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
408 Ordering::Acquire => atomic_load_relaxed!("", "dmb ishld"),
409 #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
410 Ordering::SeqCst => {
411 asm!(
412 // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
413 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
414 "ldar {tmp}, [{src}]", // atomic { tmp = *src }
415 "ldp {prev_lo}, {prev_hi}, [{src}]", // atomic { prev_lo:prev_hi = *src }
416 "dmb ishld", // fence
417 src = in(reg) ptr_reg!(src),
418 prev_hi = lateout(reg) prev_hi,
419 prev_lo = lateout(reg) prev_lo,
420 tmp = out(reg) _,
421 options(nostack, preserves_flags),
422 );
423 }
424 _ => unreachable!(),
425 }
426 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
427 }
428 #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
429 // SAFETY: the caller must uphold the safety contract.
430 unsafe {
431 #[cfg(target_feature = "lse")]
432 macro_rules! atomic_load {
433 ($acquire:tt, $release:tt) => {
434 asm!(
435 // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
436 concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), // atomic { if *src == x2:x3 { *dst = x2:x3 } else { x2:x3 = *dst } }
437 src = in(reg) ptr_reg!(src),
438 // must be allocated to even/odd register pair
439 inout("x2") 0_u64 => prev_lo,
440 inout("x3") 0_u64 => prev_hi,
441 options(nostack, preserves_flags),
442 )
443 };
444 }
445 #[cfg(not(target_feature = "lse"))]
446 macro_rules! atomic_load {
447 ($acquire:tt, $release:tt) => {
448 asm!(
449 "2:", // 'retry:
450 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{src}]"), // atomic { prev_lo:prev_hi = *src; EXCLUSIVE = src }
451 concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{src}]"), // atomic { if EXCLUSIVE == src { *src = prev_lo:prev_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
452 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
453 src = in(reg) ptr_reg!(src),
454 prev_lo = out(reg) prev_lo,
455 prev_hi = out(reg) prev_hi,
456 r = out(reg) _,
457 options(nostack, preserves_flags),
458 )
459 };
460 }
461 match order {
462 Ordering::Relaxed => atomic_load!("", ""),
463 Ordering::Acquire => atomic_load!("a", ""),
464 Ordering::SeqCst => atomic_load!("a", "l"),
465 _ => unreachable!(),
466 }
467 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
468 }
469 }
470 }
471 impl AtomicStore for $ty {
472 #[inline]
473 unsafe fn atomic_store(
474 dst: *mut MaybeUninit<Self>,
475 val: MaybeUninit<Self>,
476 order: Ordering,
477 ) {
478 debug_assert_atomic_unsafe_precondition!(dst, $ty);
479 let val = MaybeUninit128 { whole: val };
480
481 #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
482 // SAFETY: the caller must guarantee that `dst` is valid for writes,
483 // 16-byte aligned, that there are no concurrent non-atomic operations.
484 // the above cfg guarantee that the CPU supports FEAT_LSE2.
485 //
486 // Refs:
487 // - STP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STP--Store-pair-of-registers-
488 // - STILP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STILP--Store-release-ordered-pair-of-registers-
489 unsafe {
490 macro_rules! atomic_store {
491 ($il:tt, $acquire:tt, $release:tt) => {
492 asm!(
493 $release, // fence
494 concat!("st", $il, "p {val_lo}, {val_hi}, [{dst}]"), // atomic { *dst = val_lo:val_hi }
495 $acquire, // fence
496 dst = in(reg) ptr_reg!(dst),
497 val_lo = in(reg) val.pair.lo,
498 val_hi = in(reg) val.pair.hi,
499 options(nostack, preserves_flags),
500 )
501 };
502 }
503 // Use swpp if stp requires fences.
504 // https://reviews.llvm.org/D143506
505 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
506 macro_rules! atomic_store_swpp {
507 ($acquire:tt, $release:tt, $fence:tt) => {
508 asm!(
509 concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
510 $fence, // fence
511 dst = in(reg) ptr_reg!(dst),
512 val_lo = inout(reg) val.pair.lo => _,
513 val_hi = inout(reg) val.pair.hi => _,
514 options(nostack, preserves_flags),
515 )
516 };
517 }
518 match order {
519 // if FEAT_LSE128 && order == seqcst => swpp
520 // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
521 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
522 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
523 Ordering::SeqCst => atomic_rmw!(atomic_store_swpp, order),
524
525 // if FEAT_LRCPC3 && order != relaxed => stilp
526 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
527 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
528 Ordering::Release => atomic_store!("il", "", ""),
529 // LLVM uses store-release (dmb ish; stp); dmb ish, GCC (libatomic) and Atomics ABI Standard
530 // uses store-release (stilp) without fence for SeqCst store
531 // (https://github.com/gcc-mirror/gcc/commit/7107574958e2bed11d916a1480ef1319f15e5ffe).
532 // Considering https://reviews.llvm.org/D141748, LLVM's lowing seems
533 // to be the safer option here (I'm not convinced that the libatomic's implementation is wrong).
534 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
535 #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
536 Ordering::SeqCst => atomic_store!("il", "dmb ish", ""),
537
538 // if FEAT_LSE128 && order != relaxed => swpp
539 // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
540 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
541 #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
542 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
543 Ordering::Release => atomic_rmw!(atomic_store_swpp, order),
544
545 // else => stp
546 Ordering::Relaxed => atomic_store!("", "", ""),
547 #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
548 #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
549 Ordering::Release => atomic_store!("", "", "dmb ish"),
550 #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
551 #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
552 Ordering::SeqCst => atomic_store!("", "dmb ish", "dmb ish"),
553 _ => unreachable!(),
554 }
555 }
556 #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
557 // SAFETY: the caller must uphold the safety contract.
558 unsafe {
559 macro_rules! store {
560 ($acquire:tt, $release:tt, $fence:tt) => {
561 asm!(
562 "2:", // 'retry:
563 concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"), // atomic { xzr:tmp = *dst; EXCLUSIVE = dst }
564 concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; tmp = 0 } else { tmp = 1 }; EXCLUSIVE = None }
565 "cbnz {tmp:w}, 2b", // if tmp != 0 { jump 'retry }
566 $fence, // fence
567 dst = in(reg) ptr_reg!(dst),
568 val_lo = in(reg) val.pair.lo,
569 val_hi = in(reg) val.pair.hi,
570 tmp = out(reg) _,
571 options(nostack, preserves_flags),
572 )
573 };
574 }
575 atomic_rmw!(store, order);
576 }
577 }
578 }
579 impl AtomicSwap for $ty {
580 #[inline]
581 unsafe fn atomic_swap(
582 dst: *mut MaybeUninit<Self>,
583 val: MaybeUninit<Self>,
584 order: Ordering,
585 ) -> MaybeUninit<Self> {
586 debug_assert_atomic_unsafe_precondition!(dst, $ty);
587 let val = MaybeUninit128 { whole: val };
588 let (mut prev_lo, mut prev_hi);
589
590 // SAFETY: the caller must uphold the safety contract.
591 unsafe {
592 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
593 macro_rules! swap {
594 ($acquire:tt, $release:tt, $fence:tt) => {
595 asm!(
596 concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
597 $fence, // fence
598 dst = in(reg) ptr_reg!(dst),
599 val_lo = inout(reg) val.pair.lo => prev_lo,
600 val_hi = inout(reg) val.pair.hi => prev_hi,
601 options(nostack, preserves_flags),
602 )
603 };
604 }
605 #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
606 macro_rules! swap {
607 ($acquire:tt, $release:tt, $fence:tt) => {
608 asm!(
609 "2:", // 'retry:
610 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
611 concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
612 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
613 $fence, // fence
614 dst = in(reg) ptr_reg!(dst),
615 val_lo = in(reg) val.pair.lo,
616 val_hi = in(reg) val.pair.hi,
617 prev_lo = out(reg) prev_lo,
618 prev_hi = out(reg) prev_hi,
619 r = out(reg) _,
620 options(nostack, preserves_flags),
621 )
622 };
623 }
624 atomic_rmw!(swap, order);
625 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
626 }
627 }
628 }
629 impl AtomicCompareExchange for $ty {
630 #[inline]
631 unsafe fn atomic_compare_exchange(
632 dst: *mut MaybeUninit<Self>,
633 old: MaybeUninit<Self>,
634 new: MaybeUninit<Self>,
635 success: Ordering,
636 failure: Ordering,
637 ) -> (MaybeUninit<Self>, bool) {
638 debug_assert_atomic_unsafe_precondition!(dst, $ty);
639 let order = crate::utils::upgrade_success_ordering(success, failure);
640 let old = MaybeUninit128 { whole: old };
641 let new = MaybeUninit128 { whole: new };
642 let (mut prev_lo, mut prev_hi);
643
644 // SAFETY: the caller must uphold the safety contract.
645 unsafe {
646 let mut r: i32;
647 #[cfg(target_feature = "lse")]
648 macro_rules! cmpxchg {
649 ($acquire:tt, $release:tt, $fence:tt) => {
650 // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
651 asm!(
652 // casp writes the current value to the first register pair,
653 // so copy the `old`'s value for later comparison.
654 "mov x8, {old_lo}", // x8 = old_lo
655 "mov x9, {old_hi}", // x9 = old_hi
656 concat!("casp", $acquire, $release, " x8, x9, x4, x5, [{dst}]"), // atomic { if *src == x8:x9 { *dst = x4:x5 } else { x8:x9 = *dst } }
657 $fence, // fence
658 "cmp x8, {old_lo}", // if x8 == old_lo { Z = 1 } else { Z = 0 }
659 "ccmp x9, {old_hi}, #0, eq", // if Z == 1 { if x9 == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
660 "cset {r:w}, eq", // r = Z
661 dst = in(reg) ptr_reg!(dst),
662 old_lo = in(reg) old.pair.lo,
663 old_hi = in(reg) old.pair.hi,
664 r = lateout(reg) r,
665 // new pair - must be allocated to even/odd register pair
666 in("x4") new.pair.lo,
667 in("x5") new.pair.hi,
668 // prev pair - must be allocated to even/odd register pair
669 out("x8") prev_lo,
670 out("x9") prev_hi,
671 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
672 options(nostack),
673 )
674 };
675 }
676 #[cfg(not(target_feature = "lse"))]
677 macro_rules! cmpxchg {
678 ($acquire:tt, $release:tt, $fence:tt) => {
679 asm!(
680 "2:", // 'retry:
681 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
682 "cmp {prev_lo}, {old_lo}", // if prev_lo == old_lo { Z = 1 } else { Z = 0 }
683 "ccmp {prev_hi}, {old_hi}, #0, eq", // if Z == 1 { if prev_hi == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
684 // write back to ensure atomicity
685 "csel {tmp_lo}, {new_lo}, {prev_lo}, eq", // if Z == 1 { tmp_lo = new_lo } else { tmp_lo = prev_lo }
686 "csel {tmp_hi}, {new_hi}, {prev_hi}, eq", // if Z == 1 { tmp_hi = new_hi } else { tmp_hi = prev_hi }
687 concat!("st", $release, "xp {r:w}, {tmp_lo}, {tmp_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = tmp_lo:tmp_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
688 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
689 "cset {r:w}, eq", // r = Z
690 $fence,
691 dst = in(reg) ptr_reg!(dst),
692 old_lo = in(reg) old.pair.lo,
693 old_hi = in(reg) old.pair.hi,
694 new_lo = in(reg) new.pair.lo,
695 new_hi = in(reg) new.pair.hi,
696 prev_lo = out(reg) prev_lo,
697 prev_hi = out(reg) prev_hi,
698 r = out(reg) r,
699 tmp_lo = out(reg) _,
700 tmp_hi = out(reg) _,
701 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
702 options(nostack),
703 )
704 };
705 }
706 atomic_rmw!(cmpxchg, order, write = success);
707 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
708 (
709 MaybeUninit128 {
710 pair: Pair { lo: prev_lo, hi: prev_hi }
711 }.whole,
712 r != 0
713 )
714 }
715 }
716 }
717 };
718}
719
720atomic128!(u128);
721
722// -----------------------------------------------------------------------------
723// cfg macros
724
725#[macro_export]
726macro_rules! cfg_has_atomic_8 {
727 ($($tt:tt)*) => { $($tt)* };
728}
729#[macro_export]
730macro_rules! cfg_no_atomic_8 {
731 ($($tt:tt)*) => {};
732}
733#[macro_export]
734macro_rules! cfg_has_atomic_16 {
735 ($($tt:tt)*) => { $($tt)* };
736}
737#[macro_export]
738macro_rules! cfg_no_atomic_16 {
739 ($($tt:tt)*) => {};
740}
741#[macro_export]
742macro_rules! cfg_has_atomic_32 {
743 ($($tt:tt)*) => { $($tt)* };
744}
745#[macro_export]
746macro_rules! cfg_no_atomic_32 {
747 ($($tt:tt)*) => {};
748}
749#[macro_export]
750macro_rules! cfg_has_atomic_64 {
751 ($($tt:tt)*) => { $($tt)* };
752}
753#[macro_export]
754macro_rules! cfg_no_atomic_64 {
755 ($($tt:tt)*) => {};
756}
757#[macro_export]
758macro_rules! cfg_has_atomic_128 {
759 ($($tt:tt)*) => { $($tt)* };
760}
761#[macro_export]
762macro_rules! cfg_no_atomic_128 {
763 ($($tt:tt)*) => {};
764}
765#[macro_export]
766macro_rules! cfg_has_atomic_cas {
767 ($($tt:tt)*) => { $($tt)* };
768}
769#[macro_export]
770macro_rules! cfg_no_atomic_cas {
771 ($($tt:tt)*) => {};
772}