atomic_maybe_uninit/arch/aarch64.rs
1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4AArch64
5
6See "Atomic operation overview by architecture" for atomic operations in this architecture:
7https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#aarch64
8
9Refs:
10- Arm A-profile A64 Instruction Set Architecture
11 https://developer.arm.com/documentation/ddi0602/2025-06
12- C/C++ Atomics Application Binary Interface Standard for the Arm® 64-bit Architecture
13 https://github.com/ARM-software/abi-aa/blob/2025Q4/atomicsabi64/atomicsabi64.rst
14- Arm® Compiler armasm User Guide
15 https://developer.arm.com/documentation/dui0801/latest
16- Arm® Architecture Reference Manual for A-profile architecture
17 https://developer.arm.com/documentation/ddi0487/latest (PDF)
18- Arm® Architecture Reference Manual Supplement Armv8, for R-profile AArch64 architecture
19 https://developer.arm.com/documentation/ddi0600/latest (PDF)
20- portable-atomic https://github.com/taiki-e/portable-atomic
21
22See tests/asm-test/asm/atomic-maybe-uninit for generated assembly.
23*/
24
25delegate_size!(delegate_all);
26
27use core::{
28 arch::asm,
29 mem::{self, MaybeUninit},
30 sync::atomic::Ordering,
31};
32
33use crate::{
34 raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap},
35 utils::{MaybeUninit128, Pair},
36};
37
38macro_rules! atomic_rmw {
39 ($op:ident, $order:ident) => {
40 atomic_rmw!($op, $order, write = $order)
41 };
42 ($op:ident, $order:ident, write = $write:ident) => {
43 // op(acquire, release, msvc_fence)
44 match $order {
45 Ordering::Relaxed => $op!("", "", ""),
46 Ordering::Acquire => $op!("a", "", ""),
47 Ordering::Release => $op!("", "l", ""),
48 Ordering::AcqRel => $op!("a", "l", ""),
49 // In MSVC environments, SeqCst stores/writes by non-LSE* instructions needs fences after writes.
50 // https://reviews.llvm.org/D141748
51 // https://github.com/llvm/llvm-project/commit/1ea201d73be2fdf03347e9c6be09ebed5f8e0e00
52 #[cfg(target_env = "msvc")]
53 Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
54 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
55 Ordering::SeqCst => $op!("a", "l", ""),
56 _ => unreachable!(),
57 }
58 };
59}
60
61// -----------------------------------------------------------------------------
62// Register-width or smaller atomics
63
64#[rustfmt::skip]
65macro_rules! atomic {
66 ($ty:ident, $suffix:tt, $val_modifier:tt, $cmp_ext:tt) => {
67 delegate_signed!(delegate_all, $ty);
68 impl AtomicLoad for $ty {
69 #[inline]
70 unsafe fn atomic_load(
71 src: *const MaybeUninit<Self>,
72 order: Ordering,
73 ) -> MaybeUninit<Self> {
74 debug_assert_atomic_unsafe_precondition!(src, $ty);
75 let out: MaybeUninit<Self>;
76
77 // SAFETY: the caller must uphold the safety contract.
78 unsafe {
79 macro_rules! atomic_load {
80 ($acquire:tt) => {
81 asm!(
82 concat!("ld", $acquire, "r", $suffix, " {out", $val_modifier, "}, [{src}]"), // atomic { out = zero_extend(*src) }
83 src = in(reg) ptr_reg!(src),
84 out = lateout(reg) out,
85 options(nostack, preserves_flags),
86 )
87 };
88 }
89 match order {
90 Ordering::Relaxed => atomic_load!(""),
91 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC.
92 #[cfg(target_feature = "rcpc")]
93 Ordering::Acquire => atomic_load!("ap"),
94 #[cfg(not(target_feature = "rcpc"))]
95 Ordering::Acquire => atomic_load!("a"),
96 Ordering::SeqCst => atomic_load!("a"),
97 _ => crate::utils::unreachable_unchecked(),
98 }
99 }
100 out
101 }
102 }
103 impl AtomicStore for $ty {
104 #[inline]
105 unsafe fn atomic_store(
106 dst: *mut MaybeUninit<Self>,
107 val: MaybeUninit<Self>,
108 order: Ordering,
109 ) {
110 debug_assert_atomic_unsafe_precondition!(dst, $ty);
111
112 // SAFETY: the caller must uphold the safety contract.
113 unsafe {
114 macro_rules! atomic_store {
115 ($release:tt, $msvc_fence:tt) => {
116 asm!(
117 concat!("st", $release, "r", $suffix, " {val", $val_modifier, "}, [{dst}]"), // atomic { *dst = val }
118 $msvc_fence, // fence
119 dst = in(reg) ptr_reg!(dst),
120 val = in(reg) val,
121 options(nostack, preserves_flags),
122 )
123 };
124 }
125 match order {
126 Ordering::Relaxed => atomic_store!("", ""),
127 Ordering::Release => atomic_store!("l", ""),
128 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
129 #[cfg(not(target_env = "msvc"))]
130 Ordering::SeqCst => atomic_store!("l", ""),
131 // In MSVC environments, SeqCst stores/writes needs fences after writes.
132 // https://reviews.llvm.org/D141748
133 #[cfg(target_env = "msvc")]
134 Ordering::SeqCst => atomic_store!("l", "dmb ish"),
135 _ => crate::utils::unreachable_unchecked(),
136 }
137 }
138 }
139 }
140 impl AtomicSwap for $ty {
141 #[inline]
142 unsafe fn atomic_swap(
143 dst: *mut MaybeUninit<Self>,
144 val: MaybeUninit<Self>,
145 order: Ordering,
146 ) -> MaybeUninit<Self> {
147 debug_assert_atomic_unsafe_precondition!(dst, $ty);
148 let mut out: MaybeUninit<Self>;
149
150 // SAFETY: the caller must uphold the safety contract.
151 unsafe {
152 #[cfg(target_feature = "lse")]
153 macro_rules! swap {
154 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
155 // Refs:
156 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWP--SWPA--SWPAL--SWPL--Swap-word-or-doubleword-in-memory-
157 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPB--SWPAB--SWPALB--SWPLB--Swap-byte-in-memory-
158 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPH--SWPAH--SWPALH--SWPLH--Swap-halfword-in-memory-
159 asm!(
160 concat!("swp", $acquire, $release, $suffix, " {val", $val_modifier, "}, {out", $val_modifier, "}, [{dst}]"), // atomic { _x = *dst; *dst = val; out = zero_extend(_x) }
161 dst = in(reg) ptr_reg!(dst),
162 val = in(reg) val,
163 out = lateout(reg) out,
164 options(nostack, preserves_flags),
165 )
166 };
167 }
168 #[cfg(not(target_feature = "lse"))]
169 macro_rules! swap {
170 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
171 asm!(
172 "2:", // 'retry:
173 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = zero_extend(*dst); EXCLUSIVE = dst }
174 concat!("st", $release, "xr", $suffix, " {r:w}, {val", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val; r = 0 } else { r = 1 }; EXCLUSIVE = None }
175 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
176 $msvc_fence, // fence
177 dst = in(reg) ptr_reg!(dst),
178 val = in(reg) val,
179 out = out(reg) out,
180 r = out(reg) _,
181 options(nostack, preserves_flags),
182 )
183 };
184 }
185 atomic_rmw!(swap, order);
186 }
187 out
188 }
189 }
190 impl AtomicCompareExchange for $ty {
191 #[inline]
192 unsafe fn atomic_compare_exchange(
193 dst: *mut MaybeUninit<Self>,
194 old: MaybeUninit<Self>,
195 new: MaybeUninit<Self>,
196 success: Ordering,
197 failure: Ordering,
198 ) -> (MaybeUninit<Self>, bool) {
199 debug_assert_atomic_unsafe_precondition!(dst, $ty);
200 let order = crate::utils::upgrade_success_ordering(success, failure);
201 let mut out: MaybeUninit<Self>;
202 let mut r: i32;
203
204 // SAFETY: the caller must uphold the safety contract.
205 unsafe {
206 #[cfg(target_feature = "lse")]
207 macro_rules! cmpxchg {
208 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {{
209 // Refs:
210 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CAS--CASA--CASAL--CASL--Compare-and-swap-word-or-doubleword-in-memory-
211 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASB--CASAB--CASALB--CASLB--Compare-and-swap-byte-in-memory-
212 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASH--CASAH--CASALH--CASLH--Compare-and-swap-halfword-in-memory-
213 asm!(
214 // cas writes the current value to the first register,
215 // so copy the `old`'s value for later comparison.
216 concat!("mov {out", $val_modifier, "}, {old", $val_modifier, "}"), // out = old
217 concat!("cas", $acquire, $release, $suffix, " {out", $val_modifier, "}, {new", $val_modifier, "}, [{dst}]"), // atomic { _x = *dst; if _x == out { *dst = new }; out = zero_extend(_x) }
218 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if zero_extend(out) == zero_extend(old) { Z = 1 } else { Z = 0 }
219 "cset {r:w}, eq", // r = Z
220 dst = in(reg) ptr_reg!(dst),
221 old = in(reg) old,
222 new = in(reg) new,
223 out = out(reg) out,
224 r = lateout(reg) r,
225 // Do not use `preserves_flags` because CMP modifies the condition flags.
226 options(nostack),
227 );
228 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
229 (out, r != 0)
230 }};
231 }
232 #[cfg(not(target_feature = "lse"))]
233 macro_rules! cmpxchg {
234 ($acquire:tt, $release:tt, $msvc_fence:tt) => {{
235 asm!(
236 "2:", // 'retry:
237 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = zero_extend(*dst); EXCLUSIVE = dst }
238 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if zero_extend(out) == zero_extend(old) { Z = 1 } else { Z = 0 }
239 "b.ne 3f", // if Z == 0 { jump 'cmp-fail }
240 concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
241 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
242 $msvc_fence, // fence
243 "b 4f", // jump 'success
244 "3:", // 'cmp-fail:
245 "mov {r:w}, #1", // r = 1
246 "clrex", // EXCLUSIVE = None
247 "4:", // 'success:
248 dst = in(reg) ptr_reg!(dst),
249 old = in(reg) old,
250 new = in(reg) new,
251 out = out(reg) out,
252 r = out(reg) r,
253 // Do not use `preserves_flags` because CMP modifies the condition flags.
254 options(nostack),
255 );
256 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
257 // 0 if the store was successful, 1 if no store was performed
258 (out, r == 0)
259 }};
260 }
261 atomic_rmw!(cmpxchg, order, write = success)
262 }
263 }
264 #[cfg(not(target_feature = "lse"))]
265 #[inline]
266 unsafe fn atomic_compare_exchange_weak(
267 dst: *mut MaybeUninit<Self>,
268 old: MaybeUninit<Self>,
269 new: MaybeUninit<Self>,
270 success: Ordering,
271 failure: Ordering,
272 ) -> (MaybeUninit<Self>, bool) {
273 debug_assert_atomic_unsafe_precondition!(dst, $ty);
274 let order = crate::utils::upgrade_success_ordering(success, failure);
275 let mut out: MaybeUninit<Self>;
276 let r: i32;
277
278 // SAFETY: the caller must uphold the safety contract.
279 unsafe {
280 macro_rules! cmpxchg_weak {
281 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
282 asm!(
283 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = zero_extend(*dst); EXCLUSIVE = dst }
284 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if zero_extend(out) == zero_extend(old) { Z = 1 } else { Z = 0 }
285 "b.ne 3f", // if Z == 0 { jump 'cmp-fail }
286 concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
287 if_any!($msvc_fence, "cbnz {r:w}, 4f"), // if r != 0 { jump 'end }
288 $msvc_fence, // fence
289 "b 4f", // jump 'end
290 "3:", // 'cmp-fail:
291 "mov {r:w}, #1", // r = 1
292 "clrex", // EXCLUSIVE = None
293 "4:", // 'end:
294 dst = in(reg) ptr_reg!(dst),
295 old = in(reg) old,
296 new = in(reg) new,
297 out = out(reg) out,
298 r = out(reg) r,
299 // Do not use `preserves_flags` because CMP modifies the condition flags.
300 options(nostack),
301 )
302 };
303 }
304 atomic_rmw!(cmpxchg_weak, order, write = success);
305 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
306 }
307 // 0 if the store was successful, 1 if no store was performed
308 (out, r == 0)
309 }
310 }
311 };
312}
313
314atomic!(u8, "b", ":w", ", uxtb");
315atomic!(u16, "h", ":w", ", uxth");
316atomic!(u32, "", ":w", "");
317atomic!(u64, "", "", "");
318
319// -----------------------------------------------------------------------------
320// 128-bit atomics
321//
322// There are a few ways to implement 128-bit atomic operations in AArch64.
323//
324// - LDXP/STXP loop (DW LL/SC)
325// - CASP (DWCAS) added as Armv8.1 FEAT_LSE (optional from Armv8.0, mandatory from Armv8.1)
326// - LDP/STP (DW load/store) if Armv8.4 FEAT_LSE2 (optional from Armv8.2, mandatory from Armv8.4) is available
327// - LDIAPP/STILP (DW acquire-load/release-store) added as Armv8.9 FEAT_LRCPC3 (optional from Armv8.2) (if FEAT_LSE2 is also available)
328// - LDCLRP/LDSETP/SWPP (DW RMW) added as Armv9.4 FEAT_LSE128 (optional from Armv9.3)
329//
330// If FEAT_LSE is available at compile-time, we use CASP for load/CAS. Otherwise, use LDXP/STXP loop.
331// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
332// If FEAT_LSE128 is available at compile-time, we use SWPP for swap/{release,seqcst}-store.
333// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
334//
335// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
336//
337// Refs:
338// - LDXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDXP--Load-exclusive-pair-of-registers-
339// - LDAXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDAXP--Load-acquire-exclusive-pair-of-registers-
340// - STXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STXP--Store-exclusive-pair-of-registers-
341// - STLXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STLXP--Store-release-exclusive-pair-of-registers-
342//
343// Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
344// operation (even load/store), a corresponding Store-Exclusive pair must succeed.
345// See Arm Architecture Reference Manual for A-profile architecture
346// Section B2.2.1 "Requirements for single-copy atomicity", and
347// Section B2.9 "Synchronization and semaphores" for more.
348
349delegate_signed!(delegate_all, u128);
350impl AtomicLoad for u128 {
351 #[inline]
352 unsafe fn atomic_load(src: *const MaybeUninit<Self>, order: Ordering) -> MaybeUninit<Self> {
353 debug_assert_atomic_unsafe_precondition!(src, u128);
354 let (mut out_lo, mut out_hi);
355
356 #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
357 // SAFETY: the caller must guarantee that `dst` is valid for reads,
358 // 16-byte aligned, that there are no concurrent non-atomic operations.
359 // the above cfg guarantee that the CPU supports FEAT_LSE2.
360 //
361 // Refs:
362 // - LDP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDP--Load-pair-of-registers-
363 // - LDIAPP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-pair-of-registers-
364 unsafe {
365 macro_rules! atomic_load_relaxed {
366 ($iap:tt, $dmb_ishld:tt) => {
367 asm!(
368 concat!("ld", $iap, "p {out_lo}, {out_hi}, [{src}]"), // atomic { out_lo:out_hi = *src }
369 $dmb_ishld, // fence
370 src = in(reg) ptr_reg!(src),
371 out_hi = lateout(reg) out_hi,
372 out_lo = lateout(reg) out_lo,
373 options(nostack, preserves_flags),
374 )
375 };
376 }
377 match order {
378 // if FEAT_LRCPC3 && order != relaxed => ldiapp
379 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
380 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
381 Ordering::Acquire => atomic_load_relaxed!("iap", ""),
382 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
383 Ordering::SeqCst => {
384 asm!(
385 // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
386 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
387 "ldar {tmp}, [{src}]", // atomic { tmp = *src }
388 "ldiapp {out_lo}, {out_hi}, [{src}]", // atomic { out_lo:out_hi = *src }
389 src = in(reg) ptr_reg!(src),
390 out_hi = lateout(reg) out_hi,
391 out_lo = lateout(reg) out_lo,
392 tmp = out(reg) _,
393 options(nostack, preserves_flags),
394 );
395 }
396
397 // else => ldp
398 Ordering::Relaxed => atomic_load_relaxed!("", ""),
399 #[cfg(not(any(
400 target_feature = "rcpc3",
401 atomic_maybe_uninit_target_feature = "rcpc3"
402 )))]
403 Ordering::Acquire => atomic_load_relaxed!("", "dmb ishld"),
404 #[cfg(not(any(
405 target_feature = "rcpc3",
406 atomic_maybe_uninit_target_feature = "rcpc3"
407 )))]
408 Ordering::SeqCst => {
409 asm!(
410 // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
411 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
412 "ldar {tmp}, [{src}]", // atomic { tmp = *src }
413 "ldp {out_lo}, {out_hi}, [{src}]", // atomic { out_lo:out_hi = *src }
414 "dmb ishld", // fence
415 src = in(reg) ptr_reg!(src),
416 out_hi = lateout(reg) out_hi,
417 out_lo = lateout(reg) out_lo,
418 tmp = out(reg) _,
419 options(nostack, preserves_flags),
420 );
421 }
422 _ => crate::utils::unreachable_unchecked(),
423 }
424 MaybeUninit128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
425 }
426 #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
427 // SAFETY: the caller must uphold the safety contract.
428 unsafe {
429 #[cfg(target_feature = "lse")]
430 macro_rules! atomic_load {
431 ($acquire:tt, $release:tt) => {
432 asm!(
433 // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
434 concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), // atomic { _x = *src; if _x == x2:x3 { *src = x2:x3 }; x2:x3 = _x }
435 src = in(reg) ptr_reg!(src),
436 // must be allocated to even/odd register pair
437 inout("x2") 0_u64 => out_lo,
438 inout("x3") 0_u64 => out_hi,
439 options(nostack, preserves_flags),
440 )
441 };
442 }
443 #[cfg(not(target_feature = "lse"))]
444 macro_rules! atomic_load {
445 ($acquire:tt, $release:tt) => {
446 asm!(
447 "2:", // 'retry:
448 concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{src}]"), // atomic { out_lo:out_hi = *src; EXCLUSIVE = src }
449 // write back to ensure atomicity
450 concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{src}]"), // atomic { if EXCLUSIVE == src { *src = out_lo:out_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
451 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
452 src = in(reg) ptr_reg!(src),
453 out_lo = out(reg) out_lo,
454 out_hi = out(reg) out_hi,
455 r = out(reg) _,
456 options(nostack, preserves_flags),
457 )
458 };
459 }
460 match order {
461 Ordering::Relaxed => atomic_load!("", ""),
462 Ordering::Acquire => atomic_load!("a", ""),
463 // TODO: in atomicsabi64, seqcst load is the same as acquire load
464 Ordering::SeqCst => atomic_load!("a", "l"),
465 _ => crate::utils::unreachable_unchecked(),
466 }
467 MaybeUninit128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
468 }
469 }
470}
471impl AtomicStore for u128 {
472 #[inline]
473 unsafe fn atomic_store(dst: *mut MaybeUninit<Self>, val: MaybeUninit<Self>, order: Ordering) {
474 debug_assert_atomic_unsafe_precondition!(dst, u128);
475 let val = MaybeUninit128 { whole: val };
476
477 #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
478 // SAFETY: the caller must guarantee that `dst` is valid for writes,
479 // 16-byte aligned, that there are no concurrent non-atomic operations.
480 // the above cfg guarantee that the CPU supports FEAT_LSE2.
481 //
482 // Refs:
483 // - STP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STP--Store-pair-of-registers-
484 // - STILP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STILP--Store-release-ordered-pair-of-registers-
485 unsafe {
486 macro_rules! atomic_store {
487 ($il:tt, $acquire:tt, $release:tt) => {
488 asm!(
489 $release, // fence
490 concat!("st", $il, "p {val_lo}, {val_hi}, [{dst}]"), // atomic { *dst = val_lo:val_hi }
491 $acquire, // fence
492 dst = in(reg) ptr_reg!(dst),
493 val_lo = in(reg) val.pair.lo,
494 val_hi = in(reg) val.pair.hi,
495 options(nostack, preserves_flags),
496 )
497 };
498 }
499 // Use swpp if stp requires fences.
500 // https://reviews.llvm.org/D143506
501 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
502 macro_rules! atomic_store_swpp {
503 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
504 asm!(
505 concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
506 dst = in(reg) ptr_reg!(dst),
507 val_lo = inout(reg) val.pair.lo => _,
508 val_hi = inout(reg) val.pair.hi => _,
509 options(nostack, preserves_flags),
510 )
511 };
512 }
513 match order {
514 // if FEAT_LSE128 && order == seqcst => swpp
515 // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
516 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
517 #[cfg(any(
518 target_feature = "lse128",
519 atomic_maybe_uninit_target_feature = "lse128",
520 ))]
521 Ordering::SeqCst => atomic_rmw!(atomic_store_swpp, order),
522
523 // if FEAT_LRCPC3 && order != relaxed => stilp
524 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
525 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
526 Ordering::Release => atomic_store!("il", "", ""),
527 // TODO: in atomicsabi64, seqcst store is the same as release store
528 // LLVM uses store-release (dmb ish; stp); dmb ish, GCC (libatomic) and Atomics ABI Standard
529 // uses store-release (stilp) without fence for SeqCst store
530 // (https://github.com/gcc-mirror/gcc/commit/7107574958e2bed11d916a1480ef1319f15e5ffe).
531 // Considering https://reviews.llvm.org/D141748, LLVM's lowering seems
532 // to be the safer option here (I'm not convinced that the libatomic's implementation is wrong).
533 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
534 #[cfg(not(any(
535 target_feature = "lse128",
536 atomic_maybe_uninit_target_feature = "lse128",
537 )))]
538 Ordering::SeqCst => atomic_store!("il", "dmb ish", ""),
539
540 // if FEAT_LSE128 && order != relaxed => swpp
541 // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
542 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
543 #[cfg(not(any(
544 target_feature = "rcpc3",
545 atomic_maybe_uninit_target_feature = "rcpc3",
546 )))]
547 #[cfg(any(
548 target_feature = "lse128",
549 atomic_maybe_uninit_target_feature = "lse128",
550 ))]
551 Ordering::Release => atomic_rmw!(atomic_store_swpp, order),
552
553 // else => stp
554 Ordering::Relaxed => atomic_store!("", "", ""),
555 #[cfg(not(any(
556 target_feature = "rcpc3",
557 atomic_maybe_uninit_target_feature = "rcpc3",
558 )))]
559 #[cfg(not(any(
560 target_feature = "lse128",
561 atomic_maybe_uninit_target_feature = "lse128",
562 )))]
563 Ordering::Release => atomic_store!("", "", "dmb ish"),
564 #[cfg(not(any(
565 target_feature = "rcpc3",
566 atomic_maybe_uninit_target_feature = "rcpc3",
567 )))]
568 #[cfg(not(any(
569 target_feature = "lse128",
570 atomic_maybe_uninit_target_feature = "lse128",
571 )))]
572 Ordering::SeqCst => atomic_store!("", "dmb ish", "dmb ish"),
573 _ => crate::utils::unreachable_unchecked(),
574 }
575 }
576 #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
577 // SAFETY: the caller must uphold the safety contract.
578 // Do not use atomic_swap because it needs extra registers to implement store.
579 unsafe {
580 macro_rules! store {
581 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
582 asm!(
583 "2:", // 'retry:
584 concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"), // atomic { xzr:tmp = *dst; EXCLUSIVE = dst }
585 concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; tmp = 0 } else { tmp = 1 }; EXCLUSIVE = None }
586 "cbnz {tmp:w}, 2b", // if tmp != 0 { jump 'retry }
587 $msvc_fence, // fence
588 dst = in(reg) ptr_reg!(dst),
589 val_lo = in(reg) val.pair.lo,
590 val_hi = in(reg) val.pair.hi,
591 tmp = out(reg) _,
592 options(nostack, preserves_flags),
593 )
594 };
595 }
596 atomic_rmw!(store, order);
597 }
598 }
599}
600impl AtomicSwap for u128 {
601 #[inline]
602 unsafe fn atomic_swap(
603 dst: *mut MaybeUninit<Self>,
604 val: MaybeUninit<Self>,
605 order: Ordering,
606 ) -> MaybeUninit<Self> {
607 debug_assert_atomic_unsafe_precondition!(dst, u128);
608 let val = MaybeUninit128 { whole: val };
609 let (mut prev_lo, mut prev_hi);
610
611 // SAFETY: the caller must uphold the safety contract.
612 unsafe {
613 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
614 macro_rules! swap {
615 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
616 asm!(
617 concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
618 dst = in(reg) ptr_reg!(dst),
619 val_lo = inout(reg) val.pair.lo => prev_lo,
620 val_hi = inout(reg) val.pair.hi => prev_hi,
621 options(nostack, preserves_flags),
622 )
623 };
624 }
625 #[cfg(not(any(
626 target_feature = "lse128",
627 atomic_maybe_uninit_target_feature = "lse128",
628 )))]
629 macro_rules! swap {
630 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
631 asm!(
632 "2:", // 'retry:
633 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
634 concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
635 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
636 $msvc_fence, // fence
637 dst = in(reg) ptr_reg!(dst),
638 val_lo = in(reg) val.pair.lo,
639 val_hi = in(reg) val.pair.hi,
640 prev_lo = out(reg) prev_lo,
641 prev_hi = out(reg) prev_hi,
642 r = out(reg) _,
643 options(nostack, preserves_flags),
644 )
645 };
646 }
647 atomic_rmw!(swap, order);
648 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
649 }
650 }
651}
652impl AtomicCompareExchange for u128 {
653 #[inline]
654 unsafe fn atomic_compare_exchange(
655 dst: *mut MaybeUninit<Self>,
656 old: MaybeUninit<Self>,
657 new: MaybeUninit<Self>,
658 success: Ordering,
659 failure: Ordering,
660 ) -> (MaybeUninit<Self>, bool) {
661 debug_assert_atomic_unsafe_precondition!(dst, u128);
662 let order = crate::utils::upgrade_success_ordering(success, failure);
663 let old = MaybeUninit128 { whole: old };
664 let new = MaybeUninit128 { whole: new };
665 let (mut prev_lo, mut prev_hi);
666 let mut r: i32;
667
668 // SAFETY: the caller must uphold the safety contract.
669 unsafe {
670 #[cfg(target_feature = "lse")]
671 macro_rules! cmpxchg {
672 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
673 // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
674 asm!(
675 // casp writes the current value to the first register pair,
676 // so copy the `old`'s value for later comparison.
677 "mov x8, {old_lo}", // x8 = old_lo
678 "mov x9, {old_hi}", // x9 = old_hi
679 concat!("casp", $acquire, $release, " x8, x9, x4, x5, [{dst}]"), // atomic { _x = *dst; if _x == x8:x9 { *dst = x4:x5 }; x8:x9 = _x }
680 "cmp x8, {old_lo}", // if x8 == old_lo { Z = 1 } else { Z = 0 }
681 "ccmp x9, {old_hi}, #0, eq", // if Z == 1 { if x9 == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
682 "cset {r:w}, eq", // r = Z
683 dst = in(reg) ptr_reg!(dst),
684 old_lo = in(reg) old.pair.lo,
685 old_hi = in(reg) old.pair.hi,
686 r = lateout(reg) r,
687 // new pair - must be allocated to even/odd register pair
688 in("x4") new.pair.lo,
689 in("x5") new.pair.hi,
690 // prev pair - must be allocated to even/odd register pair
691 out("x8") prev_lo,
692 out("x9") prev_hi,
693 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
694 options(nostack),
695 )
696 };
697 }
698 #[cfg(not(target_feature = "lse"))]
699 macro_rules! cmpxchg {
700 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
701 asm!(
702 "2:", // 'retry:
703 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
704 "cmp {prev_lo}, {old_lo}", // if prev_lo == old_lo { Z = 1 } else { Z = 0 }
705 "ccmp {prev_hi}, {old_hi}, #0, eq", // if Z == 1 { if prev_hi == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
706 // write back to ensure atomicity
707 "csel {tmp_lo}, {new_lo}, {prev_lo}, eq", // if Z == 1 { tmp_lo = new_lo } else { tmp_lo = prev_lo }
708 "csel {tmp_hi}, {new_hi}, {prev_hi}, eq", // if Z == 1 { tmp_hi = new_hi } else { tmp_hi = prev_hi }
709 concat!("st", $release, "xp {r:w}, {tmp_lo}, {tmp_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = tmp_lo:tmp_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
710 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
711 "cset {r:w}, eq", // r = Z
712 $msvc_fence, // fence
713 dst = in(reg) ptr_reg!(dst),
714 old_lo = in(reg) old.pair.lo,
715 old_hi = in(reg) old.pair.hi,
716 new_lo = in(reg) new.pair.lo,
717 new_hi = in(reg) new.pair.hi,
718 prev_lo = out(reg) prev_lo,
719 prev_hi = out(reg) prev_hi,
720 r = out(reg) r,
721 tmp_lo = out(reg) _,
722 tmp_hi = out(reg) _,
723 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
724 options(nostack),
725 )
726 };
727 }
728 atomic_rmw!(cmpxchg, order, write = success);
729 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
730 (MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0)
731 }
732 }
733}
734
735// -----------------------------------------------------------------------------
736// cfg macros
737
738#[macro_export]
739macro_rules! cfg_has_atomic_8 {
740 ($($tt:tt)*) => { $($tt)* };
741}
742#[macro_export]
743macro_rules! cfg_no_atomic_8 {
744 ($($tt:tt)*) => {};
745}
746#[macro_export]
747macro_rules! cfg_has_atomic_16 {
748 ($($tt:tt)*) => { $($tt)* };
749}
750#[macro_export]
751macro_rules! cfg_no_atomic_16 {
752 ($($tt:tt)*) => {};
753}
754#[macro_export]
755macro_rules! cfg_has_atomic_32 {
756 ($($tt:tt)*) => { $($tt)* };
757}
758#[macro_export]
759macro_rules! cfg_no_atomic_32 {
760 ($($tt:tt)*) => {};
761}
762#[macro_export]
763macro_rules! cfg_has_atomic_64 {
764 ($($tt:tt)*) => { $($tt)* };
765}
766#[macro_export]
767macro_rules! cfg_no_atomic_64 {
768 ($($tt:tt)*) => {};
769}
770#[macro_export]
771macro_rules! cfg_has_atomic_128 {
772 ($($tt:tt)*) => { $($tt)* };
773}
774#[macro_export]
775macro_rules! cfg_no_atomic_128 {
776 ($($tt:tt)*) => {};
777}
778#[macro_export]
779macro_rules! cfg_has_atomic_cas {
780 ($($tt:tt)*) => { $($tt)* };
781}
782#[macro_export]
783macro_rules! cfg_no_atomic_cas {
784 ($($tt:tt)*) => {};
785}