atomic_maybe_uninit/arch/aarch64.rs
1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4AArch64
5
6See "Atomic operation overview by architecture" for atomic operations in this architecture:
7https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#aarch64
8
9Refs:
10- Arm A-profile A64 Instruction Set Architecture
11 https://developer.arm.com/documentation/ddi0602/2025-06
12- C/C++ Atomics Application Binary Interface Standard for the Arm® 64-bit Architecture
13 https://github.com/ARM-software/abi-aa/blob/2025Q1/atomicsabi64/atomicsabi64.rst
14- Arm® Compiler armasm User Guide
15 https://developer.arm.com/documentation/dui0801/latest
16- Arm® Architecture Reference Manual for A-profile architecture
17 https://developer.arm.com/documentation/ddi0487/latest (PDF)
18- Arm® Architecture Reference Manual Supplement Armv8, for R-profile AArch64 architecture
19 https://developer.arm.com/documentation/ddi0600/latest (PDF)
20- portable-atomic https://github.com/taiki-e/portable-atomic
21
22See tests/asm-test/asm/atomic-maybe-uninit for generated assembly.
23*/
24
25delegate_size!(delegate_all);
26
27use core::{
28 arch::asm,
29 mem::{self, MaybeUninit},
30 sync::atomic::Ordering,
31};
32
33use crate::{
34 raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap},
35 utils::{MaybeUninit128, Pair},
36};
37
38macro_rules! atomic_rmw {
39 ($op:ident, $order:ident) => {
40 atomic_rmw!($op, $order, write = $order)
41 };
42 ($op:ident, $order:ident, write = $write:ident) => {
43 // op(acquire, release, msvc_fence)
44 match $order {
45 Ordering::Relaxed => $op!("", "", ""),
46 Ordering::Acquire => $op!("a", "", ""),
47 Ordering::Release => $op!("", "l", ""),
48 Ordering::AcqRel => $op!("a", "l", ""),
49 // In MSVC environments, SeqCst stores/writes needs fences after writes.
50 // https://reviews.llvm.org/D141748
51 #[cfg(target_env = "msvc")]
52 Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
53 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
54 Ordering::SeqCst => $op!("a", "l", ""),
55 _ => unreachable!(),
56 }
57 };
58}
59
60// -----------------------------------------------------------------------------
61// Register-width or smaller atomics
62
63#[rustfmt::skip]
64macro_rules! atomic {
65 ($ty:ident, $suffix:tt, $val_modifier:tt, $cmp_ext:tt) => {
66 delegate_signed!(delegate_all, $ty);
67 impl AtomicLoad for $ty {
68 #[inline]
69 unsafe fn atomic_load(
70 src: *const MaybeUninit<Self>,
71 order: Ordering,
72 ) -> MaybeUninit<Self> {
73 debug_assert_atomic_unsafe_precondition!(src, $ty);
74 let out: MaybeUninit<Self>;
75
76 // SAFETY: the caller must uphold the safety contract.
77 unsafe {
78 macro_rules! atomic_load {
79 ($acquire:tt) => {
80 asm!(
81 concat!("ld", $acquire, "r", $suffix, " {out", $val_modifier, "}, [{src}]"), // atomic { out = zero_extend(*src) }
82 src = in(reg) ptr_reg!(src),
83 out = lateout(reg) out,
84 options(nostack, preserves_flags),
85 )
86 };
87 }
88 match order {
89 Ordering::Relaxed => atomic_load!(""),
90 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC.
91 #[cfg(target_feature = "rcpc")]
92 Ordering::Acquire => atomic_load!("ap"),
93 #[cfg(not(target_feature = "rcpc"))]
94 Ordering::Acquire => atomic_load!("a"),
95 Ordering::SeqCst => atomic_load!("a"),
96 _ => crate::utils::unreachable_unchecked(),
97 }
98 }
99 out
100 }
101 }
102 impl AtomicStore for $ty {
103 #[inline]
104 unsafe fn atomic_store(
105 dst: *mut MaybeUninit<Self>,
106 val: MaybeUninit<Self>,
107 order: Ordering,
108 ) {
109 debug_assert_atomic_unsafe_precondition!(dst, $ty);
110
111 // SAFETY: the caller must uphold the safety contract.
112 unsafe {
113 macro_rules! atomic_store {
114 ($release:tt, $msvc_fence:tt) => {
115 asm!(
116 concat!("st", $release, "r", $suffix, " {val", $val_modifier, "}, [{dst}]"), // atomic { *dst = val }
117 $msvc_fence, // fence
118 dst = in(reg) ptr_reg!(dst),
119 val = in(reg) val,
120 options(nostack, preserves_flags),
121 )
122 };
123 }
124 match order {
125 Ordering::Relaxed => atomic_store!("", ""),
126 Ordering::Release => atomic_store!("l", ""),
127 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
128 #[cfg(not(target_env = "msvc"))]
129 Ordering::SeqCst => atomic_store!("l", ""),
130 // In MSVC environments, SeqCst stores/writes needs fences after writes.
131 // https://reviews.llvm.org/D141748
132 #[cfg(target_env = "msvc")]
133 Ordering::SeqCst => atomic_store!("l", "dmb ish"),
134 _ => crate::utils::unreachable_unchecked(),
135 }
136 }
137 }
138 }
139 impl AtomicSwap for $ty {
140 #[inline]
141 unsafe fn atomic_swap(
142 dst: *mut MaybeUninit<Self>,
143 val: MaybeUninit<Self>,
144 order: Ordering,
145 ) -> MaybeUninit<Self> {
146 debug_assert_atomic_unsafe_precondition!(dst, $ty);
147 let mut out: MaybeUninit<Self>;
148
149 // SAFETY: the caller must uphold the safety contract.
150 unsafe {
151 #[cfg(target_feature = "lse")]
152 macro_rules! swap {
153 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
154 // Refs:
155 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWP--SWPA--SWPAL--SWPL--Swap-word-or-doubleword-in-memory-
156 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPB--SWPAB--SWPALB--SWPLB--Swap-byte-in-memory-
157 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/SWPH--SWPAH--SWPALH--SWPLH--Swap-halfword-in-memory-
158 asm!(
159 concat!("swp", $acquire, $release, $suffix, " {val", $val_modifier, "}, {out", $val_modifier, "}, [{dst}]"), // atomic { _x = *dst; *dst = val; out = zero_extend(_x) }
160 dst = in(reg) ptr_reg!(dst),
161 val = in(reg) val,
162 out = lateout(reg) out,
163 options(nostack, preserves_flags),
164 )
165 };
166 }
167 #[cfg(not(target_feature = "lse"))]
168 macro_rules! swap {
169 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
170 asm!(
171 "2:", // 'retry:
172 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = zero_extend(*dst); EXCLUSIVE = dst }
173 concat!("st", $release, "xr", $suffix, " {r:w}, {val", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val; r = 0 } else { r = 1 }; EXCLUSIVE = None }
174 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
175 $msvc_fence, // fence
176 dst = in(reg) ptr_reg!(dst),
177 val = in(reg) val,
178 out = out(reg) out,
179 r = out(reg) _,
180 options(nostack, preserves_flags),
181 )
182 };
183 }
184 atomic_rmw!(swap, order);
185 }
186 out
187 }
188 }
189 impl AtomicCompareExchange for $ty {
190 #[inline]
191 unsafe fn atomic_compare_exchange(
192 dst: *mut MaybeUninit<Self>,
193 old: MaybeUninit<Self>,
194 new: MaybeUninit<Self>,
195 success: Ordering,
196 failure: Ordering,
197 ) -> (MaybeUninit<Self>, bool) {
198 debug_assert_atomic_unsafe_precondition!(dst, $ty);
199 let order = crate::utils::upgrade_success_ordering(success, failure);
200 let mut out: MaybeUninit<Self>;
201 let mut r: i32;
202
203 // SAFETY: the caller must uphold the safety contract.
204 unsafe {
205 #[cfg(target_feature = "lse")]
206 macro_rules! cmpxchg {
207 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {{
208 // Refs:
209 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CAS--CASA--CASAL--CASL--Compare-and-swap-word-or-doubleword-in-memory-
210 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASB--CASAB--CASALB--CASLB--Compare-and-swap-byte-in-memory-
211 // - https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASH--CASAH--CASALH--CASLH--Compare-and-swap-halfword-in-memory-
212 asm!(
213 // cas writes the current value to the first register,
214 // so copy the `old`'s value for later comparison.
215 concat!("mov {out", $val_modifier, "}, {old", $val_modifier, "}"), // out = old
216 concat!("cas", $acquire, $release, $suffix, " {out", $val_modifier, "}, {new", $val_modifier, "}, [{dst}]"), // atomic { if *dst == out { *dst = new } else { out = zero_extend(*dst) } }
217 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if out == old { Z = 1 } else { Z = 0 }
218 "cset {r:w}, eq", // r = Z
219 dst = in(reg) ptr_reg!(dst),
220 old = in(reg) old,
221 new = in(reg) new,
222 out = out(reg) out,
223 r = lateout(reg) r,
224 // Do not use `preserves_flags` because CMP modifies the condition flags.
225 options(nostack),
226 );
227 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
228 (out, r != 0)
229 }};
230 }
231 #[cfg(not(target_feature = "lse"))]
232 macro_rules! cmpxchg {
233 ($acquire:tt, $release:tt, $msvc_fence:tt) => {{
234 asm!(
235 "2:", // 'retry:
236 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = zero_extend(*dst); EXCLUSIVE = dst }
237 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if out == old { Z = 1 } else { Z = 0 }
238 "b.ne 3f", // if Z == 0 { jump 'cmp-fail }
239 concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
240 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
241 $msvc_fence, // fence
242 "b 4f", // jump 'success
243 "3:", // 'cmp-fail:
244 "mov {r:w}, #1", // r = 1
245 "clrex", // EXCLUSIVE = None
246 "4:", // 'success:
247 dst = in(reg) ptr_reg!(dst),
248 old = in(reg) old,
249 new = in(reg) new,
250 out = out(reg) out,
251 r = out(reg) r,
252 // Do not use `preserves_flags` because CMP modifies the condition flags.
253 options(nostack),
254 );
255 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
256 // 0 if the store was successful, 1 if no store was performed
257 (out, r == 0)
258 }};
259 }
260 atomic_rmw!(cmpxchg, order, write = success)
261 }
262 }
263 #[cfg(not(target_feature = "lse"))]
264 #[inline]
265 unsafe fn atomic_compare_exchange_weak(
266 dst: *mut MaybeUninit<Self>,
267 old: MaybeUninit<Self>,
268 new: MaybeUninit<Self>,
269 success: Ordering,
270 failure: Ordering,
271 ) -> (MaybeUninit<Self>, bool) {
272 debug_assert_atomic_unsafe_precondition!(dst, $ty);
273 let order = crate::utils::upgrade_success_ordering(success, failure);
274 let mut out: MaybeUninit<Self>;
275 let r: i32;
276
277 // SAFETY: the caller must uphold the safety contract.
278 unsafe {
279 macro_rules! cmpxchg_weak {
280 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
281 asm!(
282 concat!("ld", $acquire, "xr", $suffix, " {out", $val_modifier, "}, [{dst}]"), // atomic { out = zero_extend(*dst); EXCLUSIVE = dst }
283 concat!("cmp {out", $val_modifier, "}, {old", $val_modifier, "}", $cmp_ext), // if out == old { Z = 1 } else { Z = 0 }
284 "b.ne 3f", // if Z == 0 { jump 'cmp-fail }
285 concat!("st", $release, "xr", $suffix, " {r:w}, {new", $val_modifier, "}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
286 if_any!($msvc_fence, "cbnz {r:w}, 4f"), // if r != 0 { jump 'end }
287 $msvc_fence, // fence
288 "b 4f", // jump 'end
289 "3:", // 'cmp-fail:
290 "mov {r:w}, #1", // r = 1
291 "clrex", // EXCLUSIVE = None
292 "4:", // 'end:
293 dst = in(reg) ptr_reg!(dst),
294 old = in(reg) old,
295 new = in(reg) new,
296 out = out(reg) out,
297 r = out(reg) r,
298 // Do not use `preserves_flags` because CMP modifies the condition flags.
299 options(nostack),
300 )
301 };
302 }
303 atomic_rmw!(cmpxchg_weak, order, write = success);
304 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
305 }
306 // 0 if the store was successful, 1 if no store was performed
307 (out, r == 0)
308 }
309 }
310 };
311}
312
313atomic!(u8, "b", ":w", ", uxtb");
314atomic!(u16, "h", ":w", ", uxth");
315atomic!(u32, "", ":w", "");
316atomic!(u64, "", "", "");
317
318// -----------------------------------------------------------------------------
319// 128-bit atomics
320//
321// There are a few ways to implement 128-bit atomic operations in AArch64.
322//
323// - LDXP/STXP loop (DW LL/SC)
324// - CASP (DWCAS) added as Armv8.1 FEAT_LSE (optional from Armv8.0, mandatory from Armv8.1)
325// - LDP/STP (DW load/store) if Armv8.4 FEAT_LSE2 (optional from Armv8.2, mandatory from Armv8.4) is available
326// - LDIAPP/STILP (DW acquire-load/release-store) added as Armv8.9 FEAT_LRCPC3 (optional from Armv8.2) (if FEAT_LSE2 is also available)
327// - LDCLRP/LDSETP/SWPP (DW RMW) added as Armv9.4 FEAT_LSE128 (optional from Armv9.3)
328//
329// If FEAT_LSE is available at compile-time, we use CASP for load/CAS. Otherwise, use LDXP/STXP loop.
330// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
331// If FEAT_LSE128 is available at compile-time, we use SWPP for swap/{release,seqcst}-store.
332// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
333//
334// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
335//
336// Refs:
337// - LDXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDXP--Load-exclusive-pair-of-registers-
338// - LDAXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDAXP--Load-acquire-exclusive-pair-of-registers-
339// - STXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STXP--Store-exclusive-pair-of-registers-
340// - STLXP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STLXP--Store-release-exclusive-pair-of-registers-
341//
342// Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
343// operation (even load/store), a corresponding Store-Exclusive pair must succeed.
344// See Arm Architecture Reference Manual for A-profile architecture
345// Section B2.2.1 "Requirements for single-copy atomicity", and
346// Section B2.9 "Synchronization and semaphores" for more.
347
348delegate_signed!(delegate_all, u128);
349impl AtomicLoad for u128 {
350 #[inline]
351 unsafe fn atomic_load(src: *const MaybeUninit<Self>, order: Ordering) -> MaybeUninit<Self> {
352 debug_assert_atomic_unsafe_precondition!(src, u128);
353 let (mut prev_lo, mut prev_hi);
354
355 #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
356 // SAFETY: the caller must guarantee that `dst` is valid for reads,
357 // 16-byte aligned, that there are no concurrent non-atomic operations.
358 // the above cfg guarantee that the CPU supports FEAT_LSE2.
359 //
360 // Refs:
361 // - LDP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDP--Load-pair-of-registers-
362 // - LDIAPP https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-pair-of-registers-
363 unsafe {
364 macro_rules! atomic_load_relaxed {
365 ($iap:tt, $dmb_ishld:tt) => {
366 asm!(
367 concat!("ld", $iap, "p {prev_lo}, {prev_hi}, [{src}]"), // atomic { prev_lo:prev_hi = *src }
368 $dmb_ishld, // fence
369 src = in(reg) ptr_reg!(src),
370 prev_hi = lateout(reg) prev_hi,
371 prev_lo = lateout(reg) prev_lo,
372 options(nostack, preserves_flags),
373 )
374 };
375 }
376 match order {
377 // if FEAT_LRCPC3 && order != relaxed => ldiapp
378 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
379 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
380 Ordering::Acquire => atomic_load_relaxed!("iap", ""),
381 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
382 Ordering::SeqCst => {
383 asm!(
384 // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
385 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
386 "ldar {tmp}, [{src}]", // atomic { tmp = *src }
387 "ldiapp {prev_lo}, {prev_hi}, [{src}]", // atomic { prev_lo:prev_hi = *src }
388 src = in(reg) ptr_reg!(src),
389 prev_hi = lateout(reg) prev_hi,
390 prev_lo = lateout(reg) prev_lo,
391 tmp = out(reg) _,
392 options(nostack, preserves_flags),
393 );
394 }
395
396 // else => ldp
397 Ordering::Relaxed => atomic_load_relaxed!("", ""),
398 #[cfg(not(any(
399 target_feature = "rcpc3",
400 atomic_maybe_uninit_target_feature = "rcpc3"
401 )))]
402 Ordering::Acquire => atomic_load_relaxed!("", "dmb ishld"),
403 #[cfg(not(any(
404 target_feature = "rcpc3",
405 atomic_maybe_uninit_target_feature = "rcpc3"
406 )))]
407 Ordering::SeqCst => {
408 asm!(
409 // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
410 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891
411 "ldar {tmp}, [{src}]", // atomic { tmp = *src }
412 "ldp {prev_lo}, {prev_hi}, [{src}]", // atomic { prev_lo:prev_hi = *src }
413 "dmb ishld", // fence
414 src = in(reg) ptr_reg!(src),
415 prev_hi = lateout(reg) prev_hi,
416 prev_lo = lateout(reg) prev_lo,
417 tmp = out(reg) _,
418 options(nostack, preserves_flags),
419 );
420 }
421 _ => crate::utils::unreachable_unchecked(),
422 }
423 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
424 }
425 #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
426 // SAFETY: the caller must uphold the safety contract.
427 unsafe {
428 #[cfg(target_feature = "lse")]
429 macro_rules! atomic_load {
430 ($acquire:tt, $release:tt) => {
431 asm!(
432 // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
433 concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), // atomic { if *src == x2:x3 { *dst = x2:x3 } else { x2:x3 = *dst } }
434 src = in(reg) ptr_reg!(src),
435 // must be allocated to even/odd register pair
436 inout("x2") 0_u64 => prev_lo,
437 inout("x3") 0_u64 => prev_hi,
438 options(nostack, preserves_flags),
439 )
440 };
441 }
442 #[cfg(not(target_feature = "lse"))]
443 macro_rules! atomic_load {
444 ($acquire:tt, $release:tt) => {
445 asm!(
446 "2:", // 'retry:
447 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{src}]"), // atomic { prev_lo:prev_hi = *src; EXCLUSIVE = src }
448 // write back to ensure atomicity
449 concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{src}]"), // atomic { if EXCLUSIVE == src { *src = prev_lo:prev_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
450 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
451 src = in(reg) ptr_reg!(src),
452 prev_lo = out(reg) prev_lo,
453 prev_hi = out(reg) prev_hi,
454 r = out(reg) _,
455 options(nostack, preserves_flags),
456 )
457 };
458 }
459 match order {
460 Ordering::Relaxed => atomic_load!("", ""),
461 Ordering::Acquire => atomic_load!("a", ""),
462 Ordering::SeqCst => atomic_load!("a", "l"),
463 _ => crate::utils::unreachable_unchecked(),
464 }
465 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
466 }
467 }
468}
469impl AtomicStore for u128 {
470 #[inline]
471 unsafe fn atomic_store(dst: *mut MaybeUninit<Self>, val: MaybeUninit<Self>, order: Ordering) {
472 debug_assert_atomic_unsafe_precondition!(dst, u128);
473 let val = MaybeUninit128 { whole: val };
474
475 #[cfg(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2"))]
476 // SAFETY: the caller must guarantee that `dst` is valid for writes,
477 // 16-byte aligned, that there are no concurrent non-atomic operations.
478 // the above cfg guarantee that the CPU supports FEAT_LSE2.
479 //
480 // Refs:
481 // - STP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STP--Store-pair-of-registers-
482 // - STILP: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/STILP--Store-release-ordered-pair-of-registers-
483 unsafe {
484 macro_rules! atomic_store {
485 ($il:tt, $acquire:tt, $release:tt) => {
486 asm!(
487 $release, // fence
488 concat!("st", $il, "p {val_lo}, {val_hi}, [{dst}]"), // atomic { *dst = val_lo:val_hi }
489 $acquire, // fence
490 dst = in(reg) ptr_reg!(dst),
491 val_lo = in(reg) val.pair.lo,
492 val_hi = in(reg) val.pair.hi,
493 options(nostack, preserves_flags),
494 )
495 };
496 }
497 // Use swpp if stp requires fences.
498 // https://reviews.llvm.org/D143506
499 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
500 macro_rules! atomic_store_swpp {
501 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
502 asm!(
503 concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
504 dst = in(reg) ptr_reg!(dst),
505 val_lo = inout(reg) val.pair.lo => _,
506 val_hi = inout(reg) val.pair.hi => _,
507 options(nostack, preserves_flags),
508 )
509 };
510 }
511 match order {
512 // if FEAT_LSE128 && order == seqcst => swpp
513 // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
514 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
515 #[cfg(any(
516 target_feature = "lse128",
517 atomic_maybe_uninit_target_feature = "lse128",
518 ))]
519 Ordering::SeqCst => atomic_rmw!(atomic_store_swpp, order),
520
521 // if FEAT_LRCPC3 && order != relaxed => stilp
522 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
523 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
524 Ordering::Release => atomic_store!("il", "", ""),
525 // LLVM uses store-release (dmb ish; stp); dmb ish, GCC (libatomic) and Atomics ABI Standard
526 // uses store-release (stilp) without fence for SeqCst store
527 // (https://github.com/gcc-mirror/gcc/commit/7107574958e2bed11d916a1480ef1319f15e5ffe).
528 // Considering https://reviews.llvm.org/D141748, LLVM's lowering seems
529 // to be the safer option here (I'm not convinced that the libatomic's implementation is wrong).
530 #[cfg(any(target_feature = "rcpc3", atomic_maybe_uninit_target_feature = "rcpc3"))]
531 #[cfg(not(any(
532 target_feature = "lse128",
533 atomic_maybe_uninit_target_feature = "lse128",
534 )))]
535 Ordering::SeqCst => atomic_store!("il", "dmb ish", ""),
536
537 // if FEAT_LSE128 && order != relaxed => swpp
538 // Prefer swpp if stp requires fences. https://reviews.llvm.org/D143506
539 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
540 #[cfg(not(any(
541 target_feature = "rcpc3",
542 atomic_maybe_uninit_target_feature = "rcpc3",
543 )))]
544 #[cfg(any(
545 target_feature = "lse128",
546 atomic_maybe_uninit_target_feature = "lse128",
547 ))]
548 Ordering::Release => atomic_rmw!(atomic_store_swpp, order),
549
550 // else => stp
551 Ordering::Relaxed => atomic_store!("", "", ""),
552 #[cfg(not(any(
553 target_feature = "rcpc3",
554 atomic_maybe_uninit_target_feature = "rcpc3",
555 )))]
556 #[cfg(not(any(
557 target_feature = "lse128",
558 atomic_maybe_uninit_target_feature = "lse128",
559 )))]
560 Ordering::Release => atomic_store!("", "", "dmb ish"),
561 #[cfg(not(any(
562 target_feature = "rcpc3",
563 atomic_maybe_uninit_target_feature = "rcpc3",
564 )))]
565 #[cfg(not(any(
566 target_feature = "lse128",
567 atomic_maybe_uninit_target_feature = "lse128",
568 )))]
569 Ordering::SeqCst => atomic_store!("", "dmb ish", "dmb ish"),
570 _ => crate::utils::unreachable_unchecked(),
571 }
572 }
573 #[cfg(not(any(target_feature = "lse2", atomic_maybe_uninit_target_feature = "lse2")))]
574 // SAFETY: the caller must uphold the safety contract.
575 // Do not use atomic_swap because it needs extra registers to implement store.
576 unsafe {
577 macro_rules! store {
578 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
579 asm!(
580 "2:", // 'retry:
581 concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"), // atomic { xzr:tmp = *dst; EXCLUSIVE = dst }
582 concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; tmp = 0 } else { tmp = 1 }; EXCLUSIVE = None }
583 "cbnz {tmp:w}, 2b", // if tmp != 0 { jump 'retry }
584 $msvc_fence, // fence
585 dst = in(reg) ptr_reg!(dst),
586 val_lo = in(reg) val.pair.lo,
587 val_hi = in(reg) val.pair.hi,
588 tmp = out(reg) _,
589 options(nostack, preserves_flags),
590 )
591 };
592 }
593 atomic_rmw!(store, order);
594 }
595 }
596}
597impl AtomicSwap for u128 {
598 #[inline]
599 unsafe fn atomic_swap(
600 dst: *mut MaybeUninit<Self>,
601 val: MaybeUninit<Self>,
602 order: Ordering,
603 ) -> MaybeUninit<Self> {
604 debug_assert_atomic_unsafe_precondition!(dst, u128);
605 let val = MaybeUninit128 { whole: val };
606 let (mut prev_lo, mut prev_hi);
607
608 // SAFETY: the caller must uphold the safety contract.
609 unsafe {
610 #[cfg(any(target_feature = "lse128", atomic_maybe_uninit_target_feature = "lse128"))]
611 macro_rules! swap {
612 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
613 asm!(
614 concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), // atomic { _x = *dst; *dst = val_lo:val_hi; val_lo:val_hi = _x }
615 dst = in(reg) ptr_reg!(dst),
616 val_lo = inout(reg) val.pair.lo => prev_lo,
617 val_hi = inout(reg) val.pair.hi => prev_hi,
618 options(nostack, preserves_flags),
619 )
620 };
621 }
622 #[cfg(not(any(
623 target_feature = "lse128",
624 atomic_maybe_uninit_target_feature = "lse128",
625 )))]
626 macro_rules! swap {
627 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
628 asm!(
629 "2:", // 'retry:
630 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
631 concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = val_lo:val_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
632 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
633 $msvc_fence, // fence
634 dst = in(reg) ptr_reg!(dst),
635 val_lo = in(reg) val.pair.lo,
636 val_hi = in(reg) val.pair.hi,
637 prev_lo = out(reg) prev_lo,
638 prev_hi = out(reg) prev_hi,
639 r = out(reg) _,
640 options(nostack, preserves_flags),
641 )
642 };
643 }
644 atomic_rmw!(swap, order);
645 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
646 }
647 }
648}
649impl AtomicCompareExchange for u128 {
650 #[inline]
651 unsafe fn atomic_compare_exchange(
652 dst: *mut MaybeUninit<Self>,
653 old: MaybeUninit<Self>,
654 new: MaybeUninit<Self>,
655 success: Ordering,
656 failure: Ordering,
657 ) -> (MaybeUninit<Self>, bool) {
658 debug_assert_atomic_unsafe_precondition!(dst, u128);
659 let order = crate::utils::upgrade_success_ordering(success, failure);
660 let old = MaybeUninit128 { whole: old };
661 let new = MaybeUninit128 { whole: new };
662 let (mut prev_lo, mut prev_hi);
663 let mut r: i32;
664
665 // SAFETY: the caller must uphold the safety contract.
666 unsafe {
667 #[cfg(target_feature = "lse")]
668 macro_rules! cmpxchg {
669 ($acquire:tt, $release:tt, $_msvc_fence:tt) => {
670 // Refs: https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-swap-pair-of-words-or-doublewords-in-memory-
671 asm!(
672 // casp writes the current value to the first register pair,
673 // so copy the `old`'s value for later comparison.
674 "mov x8, {old_lo}", // x8 = old_lo
675 "mov x9, {old_hi}", // x9 = old_hi
676 concat!("casp", $acquire, $release, " x8, x9, x4, x5, [{dst}]"), // atomic { if *src == x8:x9 { *dst = x4:x5 } else { x8:x9 = *dst } }
677 "cmp x8, {old_lo}", // if x8 == old_lo { Z = 1 } else { Z = 0 }
678 "ccmp x9, {old_hi}, #0, eq", // if Z == 1 { if x9 == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
679 "cset {r:w}, eq", // r = Z
680 dst = in(reg) ptr_reg!(dst),
681 old_lo = in(reg) old.pair.lo,
682 old_hi = in(reg) old.pair.hi,
683 r = lateout(reg) r,
684 // new pair - must be allocated to even/odd register pair
685 in("x4") new.pair.lo,
686 in("x5") new.pair.hi,
687 // prev pair - must be allocated to even/odd register pair
688 out("x8") prev_lo,
689 out("x9") prev_hi,
690 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
691 options(nostack),
692 )
693 };
694 }
695 #[cfg(not(target_feature = "lse"))]
696 macro_rules! cmpxchg {
697 ($acquire:tt, $release:tt, $msvc_fence:tt) => {
698 asm!(
699 "2:", // 'retry:
700 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), // atomic { prev_lo:prev_hi = *dst; EXCLUSIVE = dst }
701 "cmp {prev_lo}, {old_lo}", // if prev_lo == old_lo { Z = 1 } else { Z = 0 }
702 "ccmp {prev_hi}, {old_hi}, #0, eq", // if Z == 1 { if prev_hi == old_hi { Z = 1 } else { Z = 0 } } else { Z = 0 }
703 // write back to ensure atomicity
704 "csel {tmp_lo}, {new_lo}, {prev_lo}, eq", // if Z == 1 { tmp_lo = new_lo } else { tmp_lo = prev_lo }
705 "csel {tmp_hi}, {new_hi}, {prev_hi}, eq", // if Z == 1 { tmp_hi = new_hi } else { tmp_hi = prev_hi }
706 concat!("st", $release, "xp {r:w}, {tmp_lo}, {tmp_hi}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = tmp_lo:tmp_hi; r = 0 } else { r = 1 }; EXCLUSIVE = None }
707 "cbnz {r:w}, 2b", // if r != 0 { jump 'retry }
708 "cset {r:w}, eq", // r = Z
709 $msvc_fence, // fence
710 dst = in(reg) ptr_reg!(dst),
711 old_lo = in(reg) old.pair.lo,
712 old_hi = in(reg) old.pair.hi,
713 new_lo = in(reg) new.pair.lo,
714 new_hi = in(reg) new.pair.hi,
715 prev_lo = out(reg) prev_lo,
716 prev_hi = out(reg) prev_hi,
717 r = out(reg) r,
718 tmp_lo = out(reg) _,
719 tmp_hi = out(reg) _,
720 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
721 options(nostack),
722 )
723 };
724 }
725 atomic_rmw!(cmpxchg, order, write = success);
726 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
727 (MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0)
728 }
729 }
730}
731
732// -----------------------------------------------------------------------------
733// cfg macros
734
735#[macro_export]
736macro_rules! cfg_has_atomic_8 {
737 ($($tt:tt)*) => { $($tt)* };
738}
739#[macro_export]
740macro_rules! cfg_no_atomic_8 {
741 ($($tt:tt)*) => {};
742}
743#[macro_export]
744macro_rules! cfg_has_atomic_16 {
745 ($($tt:tt)*) => { $($tt)* };
746}
747#[macro_export]
748macro_rules! cfg_no_atomic_16 {
749 ($($tt:tt)*) => {};
750}
751#[macro_export]
752macro_rules! cfg_has_atomic_32 {
753 ($($tt:tt)*) => { $($tt)* };
754}
755#[macro_export]
756macro_rules! cfg_no_atomic_32 {
757 ($($tt:tt)*) => {};
758}
759#[macro_export]
760macro_rules! cfg_has_atomic_64 {
761 ($($tt:tt)*) => { $($tt)* };
762}
763#[macro_export]
764macro_rules! cfg_no_atomic_64 {
765 ($($tt:tt)*) => {};
766}
767#[macro_export]
768macro_rules! cfg_has_atomic_128 {
769 ($($tt:tt)*) => { $($tt)* };
770}
771#[macro_export]
772macro_rules! cfg_no_atomic_128 {
773 ($($tt:tt)*) => {};
774}
775#[macro_export]
776macro_rules! cfg_has_atomic_cas {
777 ($($tt:tt)*) => { $($tt)* };
778}
779#[macro_export]
780macro_rules! cfg_no_atomic_cas {
781 ($($tt:tt)*) => {};
782}