atomic_maybe_uninit/arch/
aarch64.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4AArch64
5
6See "Atomic operation overview by architecture" for atomic operations in this architecture:
7https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#aarch64
8
9Refs:
10- Arm A-profile A64 Instruction Set Architecture
11  https://developer.arm.com/documentation/ddi0602/2025-06
12- C/C++ Atomics Application Binary Interface Standard for the Arm® 64-bit Architecture
13  https://github.com/ARM-software/abi-aa/blob/2025Q1/atomicsabi64/atomicsabi64.rst
14- Arm® Compiler armasm User Guide
15  https://developer.arm.com/documentation/dui0801/latest
16- Arm® Architecture Reference Manual for A-profile architecture
17  https://developer.arm.com/documentation/ddi0487/latest (PDF)
18- Arm® Architecture Reference Manual Supplement Armv8, for R-profile AArch64 architecture
19  https://developer.arm.com/documentation/ddi0600/latest (PDF)
20- portable-atomic https://github.com/taiki-e/portable-atomic
21
22Generated asm:
23- aarch64 https://godbolt.org/z/e8Wesj5WP
24- aarch64 msvc https://godbolt.org/z/jcTW8Eafo
25- aarch64 (+lse) https://godbolt.org/z/999aq8jGE
26- aarch64 msvc (+lse) https://godbolt.org/z/TodsEnfz6
27- aarch64 (+lse,+lse2) https://godbolt.org/z/r1M5GYWEj
28- aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/c9dnqxM45
29- aarch64 (+rcpc) https://godbolt.org/z/Ezc49YK6h
30- aarch64 (+lse2,+lse128) https://godbolt.org/z/GMdboxzjc
31- aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/9beasofnd
32*/
33
34delegate_size!(delegate_all);
35
36use core::{
37    arch::asm,
38    mem::{self, MaybeUninit},
39    sync::atomic::Ordering,
40};
41
42use crate::{
43    raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap},
44    utils::{MaybeUninit128, Pair},
45};
46
47macro_rules! atomic_rmw {
48    ($op:ident, $order:ident) => {
49        atomic_rmw!($op, $order, write = $order)
50    };
51    ($op:ident, $order:ident, write = $write:ident) => {
52        match $order {
53            Ordering::Relaxed => $op!("", "", ""),
54            Ordering::Acquire => $op!("a", "", ""),
55            Ordering::Release => $op!("", "l", ""),
56            Ordering::AcqRel => $op!("a", "l", ""),
57            // In MSVC environments, SeqCst stores/writes needs fences after writes.
58            // https://reviews.llvm.org/D141748
59            #[cfg(target_env = "msvc")]
60            Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
61            // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
62            Ordering::SeqCst => $op!("a", "l", ""),
63            _ => unreachable!(),
64        }
65    };
66}
67
68#[rustfmt::skip]
69macro_rules! atomic {
70    ($ty:ident, $suffix:tt, $val_modifier:tt, $cmp_ext:tt) => {
71        delegate_signed!(delegate_all, $ty);
72        impl AtomicLoad for $ty {
73            #[inline]
74            unsafe fn atomic_load(
75                src: *const MaybeUninit<Self>,
76                order: Ordering,
77            ) -> MaybeUninit<Self> {
78                debug_assert_atomic_unsafe_precondition!(src, $ty);
79                let out: MaybeUninit<Self>;
80
81                // SAFETY: the caller must uphold the safety contract.
82                unsafe {
83                    macro_rules! atomic_load {
84                        ($acquire:tt) => {
85                            asm!(
86                                concat!("ld", $acquire, "r", $suffix, " {out", $val_modifier, "}, [{src}]"), // atomic { out = *src }
87                                src = in(reg) ptr_reg!(src),
88                                out = lateout(reg) out,
89                                options(nostack, preserves_flags),
90                            )
91                        };
92                    }
93                    match order {
94                        Ordering::Relaxed => atomic_load!(""),
95                        // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC.
96                        #[cfg(target_feature = "rcpc")]
97                        Ordering::Acquire => atomic_load!("ap"),
98                        #[cfg(not(target_feature = "rcpc"))]
99                        Ordering::Acquire => atomic_load!("a"),
100                        Ordering::SeqCst => atomic_load!("a"),
101                        _ => unreachable!(),
102                    }
103                }
104                out
105            }
106        }
107        impl AtomicStore for $ty {
108            #[inline]
109            unsafe fn atomic_store(
110                dst: *mut MaybeUninit<Self>,
111                val: MaybeUninit<Self>,
112                order: Ordering,
113            ) {
114                debug_assert_atomic_unsafe_precondition!(dst, $ty);
115
116                // SAFETY: the caller must uphold the safety contract.
117                unsafe {
118                    macro_rules! atomic_store {
119                        ($release:tt, $fence:tt) => {
120                            asm!(
121                                concat!("st", $release, "r", $suffix, " {val", $val_modifier, "}, [{dst}]"), // atomic { *dst = val }
122                                $fence,                                                                      // fence
123                                dst = in(reg) ptr_reg!(dst),
124                                val = in(reg) val,
125                                options(nostack, preserves_flags),
126                            )
127                        };
128                    }
129                    match order {
130                        Ordering::Relaxed => atomic_store!("", ""),
131                        Ordering::Release => atomic_store!("l", ""),
132                        // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
133                        #[cfg(not(target_env = "msvc"))]
134                        Ordering::SeqCst => atomic_store!("l", ""),
135                        // In MSVC environments, SeqCst stores/writes needs fences after writes.
136                        // https://reviews.llvm.org/D141748
137                        #[cfg(target_env = "msvc")]
138                        Ordering::SeqCst => atomic_store!("l", "dmb ish"),
139                        _ => unreachable!(),
140                    }
141                }
142            }
143        }
144        impl AtomicSwap for $ty {
145            #[inline]
146            unsafe fn atomic_swap(
147                dst: *mut MaybeUninit<Self>,
148                val: MaybeUninit<Self>,
149                order: Ordering,
150            ) -> MaybeUninit<Self> {
151                debug_assert_atomic_unsafe_precondition!(dst, $ty);
152                let mut out: MaybeUninit<Self>;
153
154                // SAFETY: the caller must uphold the safety contract.
155                unsafe {
156                    #[cfg(target_feature = "lse")]
157                    macro_rules! swap {
158                        ($acquire:tt, $release:tt, $fence:tt) => {
159                            // Refs:
160                            // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWP--SWPA--SWPAL--SWPL--Swap-word-or-doubleword-in-memory-
161                            // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPB--SWPAB--SWPALB--SWPLB--Swap-byte-in-memory-
162                            // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPH--SWPAH--SWPALH--SWPLH--Swap-halfword-in-memory-
163                            asm!(
164                                concat!("swp", $acquire, $release, $suffix, " {val", $val_modifier, "}, {out", $val_modifier, "}, [{dst}]"), // atomic { _x = *dst; *dst = val; out = _x }
165                                $fence,                                                                                                      // fence
166                                dst = in(reg) ptr_reg!(dst),
167                                val = in(reg) val,
168                                out = lateout(reg) out,
169                                options(nostack, preserves_flags),
170                            )
171                        };
172                    }
173                    #[cfg(not(target_feature = "lse"))]
174                    macro_rules! swap {
175                        ($acquire:tt, $release:tt, $fence:tt) => {
176                            asm!(
177                                "2:", // 'retry:
178                                    concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"),        // atomic { out = *dst; EXCLUSIVE = dst }
179                                    concat!("st", $release, "xr", $suffix, " {r:w}, {val", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val; r = 0 } else { r = 1 }; EXCLUSIVE = None }
180                                    "cbnz {r:w}, 2b",                                                                    // if r != 0 { jump 'retry }
181                                $fence,                                                                                  // fence
182                                dst = in(reg) ptr_reg!(dst),
183                                val = in(reg) val,
184                                out = out(reg) out,
185                                r = out(reg) _,
186                                options(nostack, preserves_flags),
187                            )
188                        };
189                    }
190                    atomic_rmw!(swap, order);
191                }
192                out
193            }
194        }
195        impl AtomicCompareExchange for $ty {
196            #[inline]
197            unsafe fn atomic_compare_exchange(
198                dst: *mut MaybeUninit<Self>,
199                old: MaybeUninit<Self>,
200                new: MaybeUninit<Self>,
201                success: Ordering,
202                failure: Ordering,
203            ) -> (MaybeUninit<Self>, bool) {
204                debug_assert_atomic_unsafe_precondition!(dst, $ty);
205                let order = crate::utils::upgrade_success_ordering(success, failure);
206                let mut out: MaybeUninit<Self>;
207
208                // SAFETY: the caller must uphold the safety contract.
209                unsafe {
210                    let mut r: i32;
211                    #[cfg(target_feature = "lse")]
212                    macro_rules! cmpxchg {
213                        ($acquire:tt, $release:tt, $fence:tt) => {{
214                            // Refs:
215                            // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CAS--CASA--CASAL--CASL--Compare-and-swap-word-or-doubleword-in-memory-
216                            // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASB--CASAB--CASALB--CASLB--Compare-and-swap-byte-in-memory-
217                            // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASH--CASAH--CASALH--CASLH--Compare-and-swap-halfword-in-memory-
218                            asm!(
219                                // cas writes the current value to the first register,
220                                // so copy the `old`'s value for later comparison.
221                                concat!("mov {out", $val_modifier, "}, {old", $val_modifier, "}"),                                           // out = old
222                                concat!("cas", $acquire, $release, $suffix, " {out", $val_modifier, "}, {new", $val_modifier, "}, [{dst}]"), // atomic { if *dst == out { *dst = new } else { out = *dst } }
223                                $fence,                                                                                                      // fence
224                                concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext),                                 // if out == old { Z = 1 } else { Z = 0 }
225                                "cset {r:w}, eq",                                                                                            // r = Z
226                                dst = in(reg) ptr_reg!(dst),
227                                old = in(reg) old,
228                                new = in(reg) new,
229                                out = out(reg) out,
230                                r = lateout(reg) r,
231                                // Do not use `preserves_flags` because CMP modifies the condition flags.
232                                options(nostack),
233                            );
234                            crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
235                            (out, r != 0)
236                        }};
237                    }
238                    #[cfg(not(target_feature = "lse"))]
239                    macro_rules! cmpxchg {
240                        ($acquire:tt, $release:tt, $fence:tt) => {{
241                            asm!(
242                                "2:", // 'retry:
243                                    concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"),        // atomic { out = *dst; EXCLUSIVE = dst }
244                                    concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext),         // if out == old { Z = 1 } else { Z = 0 }
245                                    "b.ne 3f",                                                                           // if Z == 0 { jump 'cmp-fail }
246                                    concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
247                                    "cbnz {r:w}, 2b",                                                                    // if r != 0 { jump 'retry }
248                                    $fence,                                                                              // fence
249                                    "b 4f",                                                                              // jump 'success
250                                "3:", // 'cmp-fail:
251                                    "mov {r:w}, #1",                                                                     // r = 1
252                                    "clrex",                                                                             // EXCLUSIVE = None
253                                "4:", // 'success:
254                                dst = in(reg) ptr_reg!(dst),
255                                old = in(reg) old,
256                                new = in(reg) new,
257                                out = out(reg) out,
258                                r = out(reg) r,
259                                // Do not use `preserves_flags` because CMP modifies the condition flags.
260                                options(nostack),
261                            );
262                            crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
263                            // 0 if the store was successful, 1 if no store was performed
264                            (out, r == 0)
265                        }};
266                    }
267                    atomic_rmw!(cmpxchg, order, write = success)
268                }
269            }
270            #[cfg(not(target_feature = "lse"))]
271            #[inline]
272            unsafe fn atomic_compare_exchange_weak(
273                dst: *mut MaybeUninit<Self>,
274                old: MaybeUninit<Self>,
275                new: MaybeUninit<Self>,
276                success: Ordering,
277                failure: Ordering,
278            ) -> (MaybeUninit<Self>, bool) {
279                debug_assert_atomic_unsafe_precondition!(dst, $ty);
280                let order = crate::utils::upgrade_success_ordering(success, failure);
281                let mut out: MaybeUninit<Self>;
282
283                // SAFETY: the caller must uphold the safety contract.
284                unsafe {
285                    let r: i32;
286                    macro_rules! cmpxchg_weak {
287                        ($acquire:tt, $release:tt, $fence:tt) => {
288                            asm!(
289                                concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"),        // atomic { out = *dst; EXCLUSIVE = dst }
290                                concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext),         // if out == old { Z = 1 } else { Z = 0 }
291                                "b.ne 3f",                                                                           // if Z == 0 { jump 'cmp-fail }
292                                concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
293                                // TODO: emit fence only when the above sc succeed?
294                                // "cbnz {r:w}, 4f",
295                                $fence,                                                                              // fence
296                                "b 4f",                                                                              // jump 'success
297                                "3:", // 'cmp-fail:
298                                    "mov {r:w}, #1",                                                                 // r = 1
299                                    "clrex",                                                                         // EXCLUSIVE = None
300                                "4:", // 'success:
301                                dst = in(reg) ptr_reg!(dst),
302                                old = in(reg) old,
303                                new = in(reg) new,
304                                out = out(reg) out,
305                                r = out(reg) r,
306                                // Do not use `preserves_flags` because CMP modifies the condition flags.
307                                options(nostack),
308                            )
309                        };
310                    }
311                    atomic_rmw!(cmpxchg_weak, order, write = success);
312                    crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
313                    // 0 if the store was successful, 1 if no store was performed
314                    (out, r == 0)
315                }
316            }
317        }
318    };
319}
320
321atomic!(u8, "b", ":w", ", uxtb");
322atomic!(u16, "h", ":w", ", uxth");
323atomic!(u32, "", ":w", "");
324atomic!(u64, "", "", "");
325
326// There are a few ways to implement 128-bit atomic operations in AArch64.
327//
328// - LDXP/STXP loop (DW LL/SC)
329// - CASP (DWCAS) added as Armv8.1 FEAT_LSE (optional from Armv8.0, mandatory from Armv8.1)
330// - LDP/STP (DW load/store) if Armv8.4 FEAT_LSE2 (optional from Armv8.2, mandatory from Armv8.4) is available
331// - LDIAPP/STILP (DW acquire-load/release-store) added as Armv8.9 FEAT_LRCPC3 (optional from Armv8.2) (if FEAT_LSE2 is also available)
332// - LDCLRP/LDSETP/SWPP (DW RMW) added as Armv9.4 FEAT_LSE128 (optional from Armv9.3)
333//
334// If FEAT_LSE is available at compile-time, we use CASP for load/CAS. Otherwise, use LDXP/STXP loop.
335// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
336// If FEAT_LSE128 is available at compile-time, we use SWPP for swap/{release,seqcst}-store.
337// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
338//
339// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
340//
341// Refs:
342// - LDXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDXP--Load-exclusive-pair-of-registers-
343// - LDAXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDAXP--Load-acquire-exclusive-pair-of-registers-
344// - STXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STXP--Store-exclusive-pair-of-registers-
345// - STLXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STLXP--Store-release-exclusive-pair-of-registers-
346//
347// Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
348// operation (even load/store), a corresponding Store-Exclusive pair must succeed.
349// See Arm Architecture Reference Manual for A-profile architecture
350// Section B2.2.1 "Requirements for single-copy atomicity", and
351// Section B2.9 "Synchronization and semaphores" for more.
352macro_rules! atomic128 {
353    ($ty:ident) => {
354        delegate_signed!(delegate_all, $ty);
355        impl AtomicLoad for $ty {
356            #[inline]
357            unsafe fn atomic_load(
358                src: *const MaybeUninit<Self>,
359                order: Ordering,
360            ) -> MaybeUninit<Self> {
361                debug_assert_atomic_unsafe_precondition!(src, $ty);
362                let (mut prev_lo, mut prev_hi);
363
364                #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
365                // SAFETY: the caller must guarantee that `dst` is valid for reads,
366                // 16-byte aligned, that there are no concurrent non-atomic operations.
367                // the above cfg guarantee that the CPU supports FEAT_LSE2.
368                //
369                // Refs:
370                // - LDP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDP--Load-pair-of-registers-
371                // - LDIAPP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-pair-of-registers-
372                unsafe {
373                    macro_rules! atomic_load_relaxed {
374                        ($iap:tt, $dmb_ishld:tt) => {
375                            asm!(
376                                concat!("ld", $iap, "p {prev_lo}, {prev_hi}, [{src}]"), // atomic { prev_lo:prev_hi = *src }
377                                $dmb_ishld,                                             // fence
378                                src = in(reg) ptr_reg!(src),
379                                prev_hi = lateout(reg) prev_hi,
380                                prev_lo = lateout(reg) prev_lo,
381                                options(nostack, preserves_flags),
382                            )
383                        };
384                    }
385                    match order {
386                        // if FEAT_LRCPC3 && order != relaxed => ldiapp
387                        // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
388                        #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
389                        Ordering::Acquire => atomic_load_relaxed!("iap", ""),
390                        #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
391                        Ordering::SeqCst => {
392                            asm!(
393                                // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
394                                // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
395                                "ldar {tmp}, [{src}]",                  // atomic { tmp = *src }
396                                "ldiapp {prev_lo}, {prev_hi}, [{src}]", // atomic { prev_lo:prev_hi = *src }
397                                src = in(reg) ptr_reg!(src),
398                                prev_hi = lateout(reg) prev_hi,
399                                prev_lo = lateout(reg) prev_lo,
400                                tmp = out(reg) _,
401                                options(nostack, preserves_flags),
402                            );
403                        }
404
405                        // else => ldp
406                        Ordering::Relaxed => atomic_load_relaxed!("", ""),
407                        #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
408                        Ordering::Acquire => atomic_load_relaxed!("", "dmb ishld"),
409                        #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
410                        Ordering::SeqCst => {
411                            asm!(
412                                // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
413                                // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
414                                "ldar {tmp}, [{src}]",               // atomic { tmp = *src }
415                                "ldp {prev_lo}, {prev_hi}, [{src}]", // atomic { prev_lo:prev_hi = *src }
416                                "dmb ishld",                         // fence
417                                src = in(reg) ptr_reg!(src),
418                                prev_hi = lateout(reg) prev_hi,
419                                prev_lo = lateout(reg) prev_lo,
420                                tmp = out(reg) _,
421                                options(nostack, preserves_flags),
422                            );
423                        }
424                        _ => unreachable!(),
425                    }
426                    MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
427                }
428                #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
429                // SAFETY: the caller must uphold the safety contract.
430                unsafe {
431                    #[cfg(target_feature = "lse")]
432                    macro_rules! atomic_load {
433                        ($acquire:tt, $release:tt) => {
434                            asm!(
435                                // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
436                                concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), // atomic { if *src == x2:x3 { *dst = x2:x3 } else { x2:x3 = *dst } }
437                                src = in(reg) ptr_reg!(src),
438                                // must be allocated to even/odd register pair
439                                inout("x2") 0_u64 => prev_lo,
440                                inout("x3") 0_u64 => prev_hi,
441                                options(nostack, preserves_flags),
442                            )
443                        };
444                    }
445                    #[cfg(not(target_feature = "lse"))]
446                    macro_rules! atomic_load {
447                        ($acquire:tt, $release:tt) => {
448                            asm!(
449                                "2:", // 'retry:
450                                    concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{src}]"),        // atomic { prev_lo:prev_hi = *src; EXCLUSIVE = src }
451                                    concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{src}]"), // atomic { if EXCLUSIVE == src { *src = prev_lo:prev_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
452                                    "cbnz {r:w}, 2b",                                                   // if r != 0 { jump 'retry }
453                                src = in(reg) ptr_reg!(src),
454                                prev_lo = out(reg) prev_lo,
455                                prev_hi = out(reg) prev_hi,
456                                r = out(reg) _,
457                                options(nostack, preserves_flags),
458                            )
459                        };
460                    }
461                    match order {
462                        Ordering::Relaxed => atomic_load!("", ""),
463                        Ordering::Acquire => atomic_load!("a", ""),
464                        Ordering::SeqCst => atomic_load!("a", "l"),
465                        _ => unreachable!(),
466                    }
467                    MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
468                }
469            }
470        }
471        impl AtomicStore for $ty {
472            #[inline]
473            unsafe fn atomic_store(
474                dst: *mut MaybeUninit<Self>,
475                val: MaybeUninit<Self>,
476                order: Ordering,
477            ) {
478                debug_assert_atomic_unsafe_precondition!(dst, $ty);
479                let val = MaybeUninit128 { whole: val };
480
481                #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
482                // SAFETY: the caller must guarantee that `dst` is valid for writes,
483                // 16-byte aligned, that there are no concurrent non-atomic operations.
484                // the above cfg guarantee that the CPU supports FEAT_LSE2.
485                //
486                // Refs:
487                // - STP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STP--Store-pair-of-registers-
488                // - STILP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STILP--Store-release-ordered-pair-of-registers-
489                unsafe {
490                    macro_rules! atomic_store {
491                        ($il:tt, $acquire:tt, $release:tt) => {
492                            asm!(
493                                $release,                                            // fence
494                                concat!("st", $il, "p {val_lo}, {val_hi}, [{dst}]"), // atomic { *dst = val_lo:val_hi }
495                                $acquire,                                            // fence
496                                dst = in(reg) ptr_reg!(dst),
497                                val_lo = in(reg) val.pair.lo,
498                                val_hi = in(reg) val.pair.hi,
499                                options(nostack, preserves_flags),
500                            )
501                        };
502                    }
503                    // Use swpp if stp requires fences.
504                    // https://reviews.llvm.org/D143506
505                    #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
506                    macro_rules! atomic_store_swpp {
507                        ($acquire:tt, $release:tt, $fence:tt) => {
508                            asm!(
509                                concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
510                                $fence,                                                              // fence
511                                dst = in(reg) ptr_reg!(dst),
512                                val_lo = inout(reg) val.pair.lo => _,
513                                val_hi = inout(reg) val.pair.hi => _,
514                                options(nostack, preserves_flags),
515                            )
516                        };
517                    }
518                    match order {
519                        // if FEAT_LSE128 && order == seqcst => swpp
520                        // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
521                        // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
522                        #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
523                        Ordering::SeqCst => atomic_rmw!(atomic_store_swpp, order),
524
525                        // if FEAT_LRCPC3 && order != relaxed => stilp
526                        // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
527                        #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
528                        Ordering::Release => atomic_store!("il", "", ""),
529                        // LLVM uses store-release (dmb ish; stp); dmb ish, GCC (libatomic) and Atomics ABI Standard
530                        // uses store-release (stilp) without fence for SeqCst store
531                        // (https://github.com/gcc-mirror/gcc/commit/7107574958e2bed11d916a1480ef1319f15e5ffe).
532                        // Considering https://reviews.llvm.org/D141748, LLVM's lowing seems
533                        // to be the safer option here (I'm not convinced that the libatomic's implementation is wrong).
534                        #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
535                        #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
536                        Ordering::SeqCst => atomic_store!("il", "dmb ish", ""),
537
538                        // if FEAT_LSE128 && order != relaxed => swpp
539                        // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
540                        // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
541                        #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
542                        #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
543                        Ordering::Release => atomic_rmw!(atomic_store_swpp, order),
544
545                        // else => stp
546                        Ordering::Relaxed => atomic_store!("", "", ""),
547                        #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
548                        #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
549                        Ordering::Release => atomic_store!("", "", "dmb ish"),
550                        #[cfg(not(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3")))]
551                        #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
552                        Ordering::SeqCst => atomic_store!("", "dmb ish", "dmb ish"),
553                        _ => unreachable!(),
554                    }
555                }
556                #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
557                // SAFETY: the caller must uphold the safety contract.
558                unsafe {
559                    macro_rules! store {
560                        ($acquire:tt, $release:tt, $fence:tt) => {
561                            asm!(
562                                "2:", // 'retry:
563                                    concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"),                  // atomic { xzr:tmp = *dst; EXCLUSIVE = dst }
564                                    concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; tmp = 0 } else { tmp = 1 }; EXCLUSIVE = None }
565                                    "cbnz {tmp:w}, 2b",                                                 // if tmp != 0 { jump 'retry }
566                                $fence,                                                                 // fence
567                                dst = in(reg) ptr_reg!(dst),
568                                val_lo = in(reg) val.pair.lo,
569                                val_hi = in(reg) val.pair.hi,
570                                tmp = out(reg) _,
571                                options(nostack, preserves_flags),
572                            )
573                        };
574                    }
575                    atomic_rmw!(store, order);
576                }
577            }
578        }
579        impl AtomicSwap for $ty {
580            #[inline]
581            unsafe fn atomic_swap(
582                dst: *mut MaybeUninit<Self>,
583                val: MaybeUninit<Self>,
584                order: Ordering,
585            ) -> MaybeUninit<Self> {
586                debug_assert_atomic_unsafe_precondition!(dst, $ty);
587                let val = MaybeUninit128 { whole: val };
588                let (mut prev_lo, mut prev_hi);
589
590                // SAFETY: the caller must uphold the safety contract.
591                unsafe {
592                    #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
593                    macro_rules! swap {
594                        ($acquire:tt, $release:tt, $fence:tt) => {
595                            asm!(
596                                concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
597                                $fence,                                                              // fence
598                                dst = in(reg) ptr_reg!(dst),
599                                val_lo = inout(reg) val.pair.lo => prev_lo,
600                                val_hi = inout(reg) val.pair.hi => prev_hi,
601                                options(nostack, preserves_flags),
602                            )
603                        };
604                    }
605                    #[cfg(not(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128")))]
606                    macro_rules! swap {
607                        ($acquire:tt, $release:tt, $fence:tt) => {
608                            asm!(
609                                "2:", // 'retry:
610                                    concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),      // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
611                                    concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
612                                    "cbnz {r:w}, 2b",                                                 // if r != 0 { jump 'retry }
613                                $fence,                                                               // fence
614                                dst = in(reg) ptr_reg!(dst),
615                                val_lo = in(reg) val.pair.lo,
616                                val_hi = in(reg) val.pair.hi,
617                                prev_lo = out(reg) prev_lo,
618                                prev_hi = out(reg) prev_hi,
619                                r = out(reg) _,
620                                options(nostack, preserves_flags),
621                            )
622                        };
623                    }
624                    atomic_rmw!(swap, order);
625                    MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
626                }
627            }
628        }
629        impl AtomicCompareExchange for $ty {
630            #[inline]
631            unsafe fn atomic_compare_exchange(
632                dst: *mut MaybeUninit<Self>,
633                old: MaybeUninit<Self>,
634                new: MaybeUninit<Self>,
635                success: Ordering,
636                failure: Ordering,
637            ) -> (MaybeUninit<Self>, bool) {
638                debug_assert_atomic_unsafe_precondition!(dst, $ty);
639                let order = crate::utils::upgrade_success_ordering(success, failure);
640                let old = MaybeUninit128 { whole: old };
641                let new = MaybeUninit128 { whole: new };
642                let (mut prev_lo, mut prev_hi);
643
644                // SAFETY: the caller must uphold the safety contract.
645                unsafe {
646                    let mut r: i32;
647                    #[cfg(target_feature = "lse")]
648                    macro_rules! cmpxchg {
649                        ($acquire:tt, $release:tt, $fence:tt) => {
650                            // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
651                            asm!(
652                                // casp writes the current value to the first register pair,
653                                // so copy the `old`'s value for later comparison.
654                                "mov x8, {old_lo}",                                              // x8 = old_lo
655                                "mov x9, {old_hi}",                                              // x9 = old_hi
656                                concat!("casp", $acquire, $release, " x8, x9, x4, x5, [{dst}]"), // atomic { if *src == x8:x9 { *dst = x4:x5 } else { x8:x9 = *dst } }
657                                $fence,                                                          // fence
658                                "cmp x8, {old_lo}",                                              // if x8 == old_lo { Z = 1 } else { Z = 0 }
659                                "ccmp x9, {old_hi}, #0, eq",                                     // if Z == 1 { if x9 == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
660                                "cset {r:w}, eq",                                                // r = Z
661                                dst = in(reg) ptr_reg!(dst),
662                                old_lo = in(reg) old.pair.lo,
663                                old_hi = in(reg) old.pair.hi,
664                                r = lateout(reg) r,
665                                // new pair - must be allocated to even/odd register pair
666                                in("x4") new.pair.lo,
667                                in("x5") new.pair.hi,
668                                // prev pair - must be allocated to even/odd register pair
669                                out("x8") prev_lo,
670                                out("x9") prev_hi,
671                                // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
672                                options(nostack),
673                            )
674                        };
675                    }
676                    #[cfg(not(target_feature = "lse"))]
677                    macro_rules! cmpxchg {
678                        ($acquire:tt, $release:tt, $fence:tt) => {
679                            asm!(
680                                "2:", // 'retry:
681                                    concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),      // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
682                                    "cmp {prev_lo}, {old_lo}",                                        // if prev_lo == old_lo { Z = 1 } else { Z = 0 }
683                                    "ccmp {prev_hi}, {old_hi}, #0, eq",                               // if Z == 1 { if prev_hi == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
684                                    // write back to ensure atomicity
685                                    "csel {tmp_lo}, {new_lo}, {prev_lo}, eq",                         // if Z == 1 { tmp_lo = new_lo } else { tmp_lo = prev_lo }
686                                    "csel {tmp_hi}, {new_hi}, {prev_hi}, eq",                         // if Z == 1 { tmp_hi = new_hi } else { tmp_hi = prev_hi }
687                                    concat!("st", $release, "xp {r:w}, {tmp_lo}, {tmp_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = tmp_lo:tmp_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
688                                    "cbnz {r:w}, 2b",                                                 // if r != 0 { jump 'retry }
689                                "cset {r:w}, eq",                                                     // r = Z
690                                $fence,
691                                dst = in(reg) ptr_reg!(dst),
692                                old_lo = in(reg) old.pair.lo,
693                                old_hi = in(reg) old.pair.hi,
694                                new_lo = in(reg) new.pair.lo,
695                                new_hi = in(reg) new.pair.hi,
696                                prev_lo = out(reg) prev_lo,
697                                prev_hi = out(reg) prev_hi,
698                                r = out(reg) r,
699                                tmp_lo = out(reg) _,
700                                tmp_hi = out(reg) _,
701                                // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
702                                options(nostack),
703                            )
704                        };
705                    }
706                    atomic_rmw!(cmpxchg, order, write = success);
707                    crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
708                    (
709                        MaybeUninit128 {
710                            pair: Pair { lo: prev_lo, hi: prev_hi }
711                        }.whole,
712                        r != 0
713                    )
714                }
715            }
716        }
717    };
718}
719
720atomic128!(u128);
721
722// -----------------------------------------------------------------------------
723// cfg macros
724
725#[macro_export]
726macro_rules! cfg_has_atomic_8 {
727    ($($tt:tt)*) => { $($tt)* };
728}
729#[macro_export]
730macro_rules! cfg_no_atomic_8 {
731    ($($tt:tt)*) => {};
732}
733#[macro_export]
734macro_rules! cfg_has_atomic_16 {
735    ($($tt:tt)*) => { $($tt)* };
736}
737#[macro_export]
738macro_rules! cfg_no_atomic_16 {
739    ($($tt:tt)*) => {};
740}
741#[macro_export]
742macro_rules! cfg_has_atomic_32 {
743    ($($tt:tt)*) => { $($tt)* };
744}
745#[macro_export]
746macro_rules! cfg_no_atomic_32 {
747    ($($tt:tt)*) => {};
748}
749#[macro_export]
750macro_rules! cfg_has_atomic_64 {
751    ($($tt:tt)*) => { $($tt)* };
752}
753#[macro_export]
754macro_rules! cfg_no_atomic_64 {
755    ($($tt:tt)*) => {};
756}
757#[macro_export]
758macro_rules! cfg_has_atomic_128 {
759    ($($tt:tt)*) => { $($tt)* };
760}
761#[macro_export]
762macro_rules! cfg_no_atomic_128 {
763    ($($tt:tt)*) => {};
764}
765#[macro_export]
766macro_rules! cfg_has_atomic_cas {
767    ($($tt:tt)*) => { $($tt)* };
768}
769#[macro_export]
770macro_rules! cfg_no_atomic_cas {
771    ($($tt:tt)*) => {};
772}