atomic_maybe_uninit/arch/x86.rs
1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4x86 and x86_64
5
6Refs:
7- IntelĀ® 64 and IA-32 Architectures Software Developer Manuals
8 https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html
9- x86 and amd64 instruction reference
10 https://www.felixcloutier.com/x86
11- portable-atomic
12 https://github.com/taiki-e/portable-atomic
13
14See tests/asm-test/asm/atomic-maybe-uninit for generated assembly.
15*/
16
17#[cfg(target_arch = "x86_64")]
18#[cfg(target_feature = "cmpxchg16b")]
19#[cfg(not(atomic_maybe_uninit_no_outline_atomics))]
20#[cfg(not(target_env = "sgx"))]
21#[cfg_attr(
22 not(test),
23 cfg(not(any(
24 target_feature = "avx",
25 all(
26 not(target_feature = "avx"),
27 any(
28 atomic_maybe_uninit_no_outline_atomics,
29 target_env = "sgx",
30 not(target_feature = "sse"),
31 ),
32 ),
33 )))
34)]
35#[path = "../detect/x86_64.rs"]
36mod detect;
37
38delegate_size!(delegate_load_store);
39delegate_size!(delegate_swap);
40#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))]
41delegate_size!(delegate_cas);
42
43#[cfg(target_arch = "x86")]
44#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))]
45#[cfg(all(target_feature = "sse", not(atomic_maybe_uninit_test_prefer_x87_over_sse)))]
46use core::arch::x86::__m128;
47#[cfg(target_arch = "x86")]
48#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))]
49#[cfg(all(target_feature = "sse2", not(atomic_maybe_uninit_test_prefer_x87_over_sse)))]
50use core::arch::x86::__m128i;
51#[cfg(target_arch = "x86_64")]
52#[cfg(target_feature = "cmpxchg16b")]
53#[cfg(not(all(
54 not(target_feature = "avx"),
55 any(atomic_maybe_uninit_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
56)))]
57use core::arch::x86_64::__m128i;
58use core::{
59 arch::asm,
60 mem::{self, MaybeUninit},
61 sync::atomic::Ordering,
62};
63
64#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))]
65use crate::raw::AtomicCompareExchange;
66use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap};
67#[cfg(target_arch = "x86")]
68#[cfg(not(atomic_maybe_uninit_no_cmpxchg8b))]
69use crate::utils::{MaybeUninit64, Pair};
70#[cfg(target_arch = "x86_64")]
71#[cfg(target_feature = "cmpxchg16b")]
72use crate::utils::{MaybeUninit128, Pair};
73
74#[cfg(target_pointer_width = "32")]
75macro_rules! ptr_modifier {
76 () => {
77 ":e"
78 };
79}
80#[cfg(target_pointer_width = "64")]
81macro_rules! ptr_modifier {
82 () => {
83 ""
84 };
85}
86
87// -----------------------------------------------------------------------------
88// Register-width or smaller atomics
89
90macro_rules! atomic {
91 (
92 $ty:ident, $val_reg:ident, $ux_reg:ident, $ux:ident,
93 $zx:literal, $val_modifier:literal, $reg_val_modifier:tt, $zx_val_modifier:tt, $ptr_size:tt,
94 $cmpxchg_cmp_reg:tt
95 ) => {
96 #[cfg(target_arch = "x86")]
97 atomic!($ty, $val_reg, $ux_reg, reg_abcd, $ux, $zx, $val_modifier,
98 $reg_val_modifier, $zx_val_modifier, $ptr_size, $cmpxchg_cmp_reg);
99 #[cfg(target_arch = "x86_64")]
100 atomic!($ty, $val_reg, $ux_reg, reg, $ux, $zx, $val_modifier,
101 $reg_val_modifier, $zx_val_modifier, $ptr_size, $cmpxchg_cmp_reg);
102 };
103 (
104 $ty:ident, $val_reg:ident, $ux_reg:ident, $r_reg:ident, $ux:ident,
105 $zx:literal, $val_modifier:literal, $reg_val_modifier:tt, $zx_val_modifier:tt, $ptr_size:tt,
106 $cmpxchg_cmp_reg:tt
107 ) => {
108 delegate_signed!(delegate_load_store, $ty);
109 delegate_signed!(delegate_swap, $ty);
110 #[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))]
111 delegate_signed!(delegate_cas, $ty);
112 impl AtomicLoad for $ty {
113 #[inline]
114 unsafe fn atomic_load(
115 src: *const MaybeUninit<Self>,
116 _order: Ordering,
117 ) -> MaybeUninit<Self> {
118 debug_assert_atomic_unsafe_precondition!(src, $ty);
119 let out;
120
121 // SAFETY: the caller must uphold the safety contract.
122 // load by MOV has SeqCst semantics.
123 unsafe {
124 asm!(
125 concat!("mov", $zx, " {out", $zx_val_modifier, "}, ", $ptr_size, " ptr [{src", ptr_modifier!(), "}]"), // atomic { out = zero_extend(*src) }
126 src = in(reg) src,
127 out = lateout(reg) out,
128 options(nostack, preserves_flags),
129 );
130 }
131 crate::utils::extend32::$ty::extract(out)
132 }
133 }
134 impl AtomicStore for $ty {
135 #[inline]
136 unsafe fn atomic_store(
137 dst: *mut MaybeUninit<Self>,
138 mut val: MaybeUninit<Self>,
139 order: Ordering,
140 ) {
141 debug_assert_atomic_unsafe_precondition!(dst, $ty);
142
143 // SAFETY: the caller must uphold the safety contract.
144 unsafe {
145 match order {
146 // Relaxed and Release stores are equivalent.
147 Ordering::Relaxed | Ordering::Release => {
148 asm!(
149 concat!("mov ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {val", $val_modifier, "}"), // atomic { *dst = val }
150 dst = in(reg) dst,
151 val = in($val_reg) val,
152 options(nostack, preserves_flags),
153 );
154 }
155 #[allow(unused_assignments)] // TODO(gcc): Workaround for rustc_codegen_gcc bug
156 Ordering::SeqCst => {
157 asm!(
158 // SeqCst store is xchg, not mov
159 concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {val", $val_modifier, "}"), // atomic { _x = *dst; *dst = val; val = _x }
160 dst = in(reg) dst,
161 val = inout($val_reg) val,
162 options(nostack, preserves_flags),
163 );
164 }
165 _ => crate::utils::unreachable_unchecked(),
166 }
167 }
168 }
169 }
170 impl AtomicSwap for $ty {
171 #[inline]
172 unsafe fn atomic_swap(
173 dst: *mut MaybeUninit<Self>,
174 val: MaybeUninit<Self>,
175 _order: Ordering,
176 ) -> MaybeUninit<Self> {
177 debug_assert_atomic_unsafe_precondition!(dst, $ty);
178 let out: MaybeUninit<Self>;
179
180 // SAFETY: the caller must uphold the safety contract.
181 // XCHG has SeqCst semantics.
182 unsafe {
183 asm!(
184 concat!("xchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {val", $val_modifier, "}"), // atomic { _x = *dst; *dst = val; val = _x }
185 dst = in(reg) dst,
186 val = inout($val_reg) val => out,
187 options(nostack, preserves_flags),
188 );
189 }
190 out
191 }
192 }
193 #[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))]
194 impl AtomicCompareExchange for $ty {
195 #[inline]
196 unsafe fn atomic_compare_exchange(
197 dst: *mut MaybeUninit<Self>,
198 old: MaybeUninit<Self>,
199 new: MaybeUninit<Self>,
200 _success: Ordering,
201 _failure: Ordering,
202 ) -> (MaybeUninit<Self>, bool) {
203 debug_assert_atomic_unsafe_precondition!(dst, $ty);
204 let out: MaybeUninit<Self>;
205
206 // SAFETY: the caller must uphold the safety contract.
207 // CMPXCHG has SeqCst semantics.
208 //
209 // Refs: https://www.felixcloutier.com/x86/cmpxchg
210 unsafe {
211 let r: MaybeUninit<u32>;
212 asm!(
213 concat!("lock cmpxchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {new", $reg_val_modifier, "}"), // atomic { if *dst == $cmpxchg_cmp_reg { ZF = 1; *dst = new } else { ZF = 0; $cmpxchg_cmp_reg = *dst } }
214 "sete {r:l}", // r = ZF
215 dst = in(reg) dst,
216 // Avoid reg_byte ($val_reg) in new and r to work around cranelift bug with multiple or lateout reg_byte.
217 new = in($ux_reg) crate::utils::extend32::$ty::$ux(new),
218 r = lateout($r_reg) r,
219 inout($cmpxchg_cmp_reg) old => out,
220 // Do not use `preserves_flags` because CMPXCHG modifies the ZF, CF, PF, AF, SF, and OF flags.
221 options(nostack),
222 );
223 let r = crate::utils::extend32::u8::extract(r).assume_init();
224 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
225 (out, r != 0)
226 }
227 }
228 }
229 };
230}
231
232#[cfg(target_arch = "x86")]
233atomic!(u8, reg_byte, reg_abcd, uninit, "zx", "", ":l", ":e", "byte", "al");
234#[cfg(target_arch = "x86_64")]
235atomic!(u8, reg_byte, reg, uninit, "zx", "", ":l", ":e", "byte", "al");
236atomic!(u16, reg, reg, identity, "zx", ":x", ":x", ":e", "word", "ax");
237atomic!(u32, reg, reg, identity, "", ":e", ":e", ":e", "dword", "eax");
238#[cfg(target_arch = "x86_64")]
239atomic!(u64, reg, reg, identity, "", "", "", "", "qword", "rax");
240
241// -----------------------------------------------------------------------------
242// 64-bit atomics on x86_32
243//
244// For load/store, we can use MOVQ(SSE2)/MOVLPS(SSE)/FILD&FISTP(x87) instead of CMPXCHG8B.
245// Refs: https://github.com/llvm/llvm-project/blob/llvmorg-22.1.0-rc1/llvm/test/CodeGen/X86/atomic-load-store-wide.ll
246
247#[cfg(all(target_arch = "x86", not(atomic_maybe_uninit_no_cmpxchg8b)))]
248delegate_signed!(delegate_all, u64);
249#[cfg(all(target_arch = "x86", not(atomic_maybe_uninit_no_cmpxchg8b)))]
250impl AtomicLoad for u64 {
251 #[inline]
252 unsafe fn atomic_load(src: *const MaybeUninit<Self>, _order: Ordering) -> MaybeUninit<Self> {
253 debug_assert_atomic_unsafe_precondition!(src, u64);
254
255 #[cfg(all(target_feature = "sse2", not(atomic_maybe_uninit_test_prefer_x87_over_sse)))]
256 // SAFETY: the caller must uphold the safety contract.
257 // cfg guarantees that the CPU supports SSE.
258 // load by MOVQ has SeqCst semantics.
259 //
260 // Refs:
261 // - https://www.felixcloutier.com/x86/movq (SSE2)
262 // - https://www.felixcloutier.com/x86/movd:movq (SSE2)
263 unsafe {
264 let out;
265 asm!(
266 "movq {out}, qword ptr [{src}]", // atomic { out[:] = *src }
267 src = in(reg) src,
268 out = out(xmm_reg) out,
269 options(nostack, preserves_flags),
270 );
271 mem::transmute::<MaybeUninit<__m128i>, [MaybeUninit<Self>; 2]>(out)[0]
272 }
273 #[cfg(all(
274 not(target_feature = "sse2"),
275 target_feature = "sse",
276 not(atomic_maybe_uninit_test_prefer_x87_over_sse),
277 ))]
278 // SAFETY: the caller must uphold the safety contract.
279 // cfg guarantees that the CPU supports SSE.
280 // load by MOVLPS has SeqCst semantics.
281 //
282 // Refs:
283 // - https://www.felixcloutier.com/x86/movlps (SSE)
284 unsafe {
285 let out;
286 asm!(
287 "movlps {out}, qword ptr [{src}]", // atomic { out[:] = *src }
288 src = in(reg) src,
289 out = out(xmm_reg) out,
290 options(nostack, preserves_flags),
291 );
292 mem::transmute::<MaybeUninit<__m128>, [MaybeUninit<Self>; 2]>(out)[0]
293 }
294 #[cfg(all(
295 any(not(target_feature = "sse"), atomic_maybe_uninit_test_prefer_x87_over_sse),
296 all(
297 any(target_feature = "x87", atomic_maybe_uninit_target_feature = "x87"),
298 not(atomic_maybe_uninit_test_prefer_cmpxchg8b_over_x87),
299 ),
300 ))]
301 // SAFETY: the caller must uphold the safety contract.
302 // load by FILD has SeqCst semantics.
303 //
304 // Refs:
305 // - https://www.felixcloutier.com/x86/fild
306 // - https://www.felixcloutier.com/x86/fist:fistp
307 unsafe {
308 let mut out = MaybeUninit::<Self>::uninit();
309 asm!(
310 "fild qword ptr [{src}]", // atomic { st.push(*src) }
311 "fistp qword ptr [{out}]", // *out = st.pop()
312 src = in(reg) src,
313 out = in(reg) out.as_mut_ptr(),
314 out("st(0)") _,
315 out("st(1)") _,
316 out("st(2)") _,
317 out("st(3)") _,
318 out("st(4)") _,
319 out("st(5)") _,
320 out("st(6)") _,
321 out("st(7)") _,
322 // Do not use `preserves_flags` because FILD and FISTP modify C1 in x87 FPU status word.
323 options(nostack),
324 );
325 out
326 }
327 #[cfg(all(
328 any(not(target_feature = "sse"), atomic_maybe_uninit_test_prefer_x87_over_sse),
329 not(all(
330 any(target_feature = "x87", atomic_maybe_uninit_target_feature = "x87"),
331 not(atomic_maybe_uninit_test_prefer_cmpxchg8b_over_x87),
332 )),
333 ))]
334 // SAFETY: the caller must uphold the safety contract.
335 // CMPXCHG8B has SeqCst semantics.
336 //
337 // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
338 unsafe {
339 let (out_lo, out_hi);
340 asm!(
341 "lock cmpxchg8b qword ptr [edi]", // atomic { if *edi == edx:eax { ZF = 1; *edi = ecx:ebx } else { ZF = 0; edx:eax = *edi } }
342 // set old/new args of CMPXCHG8B to 0
343 in("ebx") 0_u32,
344 in("ecx") 0_u32,
345 inout("eax") 0_u32 => out_lo,
346 inout("edx") 0_u32 => out_hi,
347 in("edi") src,
348 // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag.
349 options(nostack),
350 );
351 MaybeUninit64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
352 }
353 }
354}
355#[cfg(all(target_arch = "x86", not(atomic_maybe_uninit_no_cmpxchg8b)))]
356impl AtomicStore for u64 {
357 #[inline]
358 unsafe fn atomic_store(dst: *mut MaybeUninit<Self>, val: MaybeUninit<Self>, order: Ordering) {
359 debug_assert_atomic_unsafe_precondition!(dst, u64);
360
361 #[cfg(all(target_feature = "sse", not(atomic_maybe_uninit_test_prefer_x87_over_sse)))]
362 // SAFETY: the caller must uphold the safety contract.
363 // cfg guarantees that the CPU supports SSE.
364 //
365 // Refs:
366 // - https://www.felixcloutier.com/x86/movlps (SSE)
367 // - https://www.felixcloutier.com/x86/lock
368 // - https://www.felixcloutier.com/x86/or
369 unsafe {
370 let val: MaybeUninit<__m128> = mem::transmute([val, MaybeUninit::uninit()]);
371 match order {
372 // Relaxed and Release stores are equivalent.
373 Ordering::Relaxed | Ordering::Release => {
374 asm!(
375 "movlps qword ptr [{dst}], {val}", // atomic { *dst = val[:] }
376 dst = in(reg) dst,
377 val = in(xmm_reg) val,
378 options(nostack, preserves_flags),
379 );
380 }
381 Ordering::SeqCst => {
382 let p = core::cell::UnsafeCell::new(MaybeUninit::<u32>::uninit());
383 asm!(
384 "movlps qword ptr [{dst}], {val}", // atomic { *dst = val[:] }
385 // Equivalent to `mfence`, but is up to 3.1x faster on Coffee Lake and up to 2.4x faster on Raptor Lake-H at least in simple cases.
386 // - https://github.com/taiki-e/portable-atomic/pull/156
387 // - LLVM uses `lock or` https://godbolt.org/z/vv6rjzfYd
388 // - Windows uses `xchg` for x86_32 for MemoryBarrier https://learn.microsoft.com/en-us/windows/win32/api/winnt/nf-winnt-memorybarrier
389 // - MSVC STL uses `lock inc` https://github.com/microsoft/STL/pull/740
390 // - boost uses `lock or` https://github.com/boostorg/atomic/commit/559eba81af71386cedd99f170dc6101c6ad7bf22
391 "xchg dword ptr [{p}], {tmp}", // fence
392 dst = in(reg) dst,
393 val = in(xmm_reg) val,
394 p = inout(reg) p.get() => _,
395 tmp = lateout(reg) _,
396 options(nostack, preserves_flags),
397 );
398 }
399 _ => crate::utils::unreachable_unchecked(),
400 }
401 }
402 #[cfg(all(
403 any(not(target_feature = "sse"), atomic_maybe_uninit_test_prefer_x87_over_sse),
404 all(
405 any(target_feature = "x87", atomic_maybe_uninit_target_feature = "x87"),
406 not(atomic_maybe_uninit_test_prefer_cmpxchg8b_over_x87),
407 ),
408 ))]
409 // SAFETY: the caller must uphold the safety contract.
410 //
411 // Refs:
412 // - https://www.felixcloutier.com/x86/fild
413 // - https://www.felixcloutier.com/x86/fist:fistp
414 unsafe {
415 match order {
416 // Relaxed and Release stores are equivalent.
417 Ordering::Relaxed | Ordering::Release => {
418 asm!(
419 "fild qword ptr [{val}]", // st.push(*val)
420 "fistp qword ptr [{dst}]", // atomic { *dst = st.pop() }
421 dst = in(reg) dst,
422 val = in(reg) val.as_ptr(),
423 out("st(0)") _,
424 out("st(1)") _,
425 out("st(2)") _,
426 out("st(3)") _,
427 out("st(4)") _,
428 out("st(5)") _,
429 out("st(6)") _,
430 out("st(7)") _,
431 // Do not use `preserves_flags` because FILD and FISTP modify condition code flags in x87 FPU status word.
432 options(nostack),
433 );
434 }
435 Ordering::SeqCst => {
436 let p = core::cell::UnsafeCell::new(MaybeUninit::<u32>::uninit());
437 asm!(
438 "fild qword ptr [{val}]", // st.push(*val)
439 "fistp qword ptr [{dst}]", // atomic { *dst = st.pop() }
440 // Equivalent to `mfence`, but is up to 3.1x faster on Coffee Lake and up to 2.4x faster on Raptor Lake-H at least in simple cases.
441 // - https://github.com/taiki-e/portable-atomic/pull/156
442 // - LLVM uses `lock or` https://godbolt.org/z/vv6rjzfYd
443 // - Windows uses `xchg` for x86_32 for MemoryBarrier https://learn.microsoft.com/en-us/windows/win32/api/winnt/nf-winnt-memorybarrier
444 // - MSVC STL uses `lock inc` https://github.com/microsoft/STL/pull/740
445 // - boost uses `lock or` https://github.com/boostorg/atomic/commit/559eba81af71386cedd99f170dc6101c6ad7bf22
446 "xchg dword ptr [{p}], {tmp}", // fence
447 dst = in(reg) dst,
448 val = in(reg) val.as_ptr(),
449 p = inout(reg) p.get() => _,
450 tmp = lateout(reg) _,
451 out("st(0)") _,
452 out("st(1)") _,
453 out("st(2)") _,
454 out("st(3)") _,
455 out("st(4)") _,
456 out("st(5)") _,
457 out("st(6)") _,
458 out("st(7)") _,
459 // Do not use `preserves_flags` because FILD and FISTP modify condition code flags in x87 FPU status word.
460 options(nostack),
461 );
462 }
463 _ => crate::utils::unreachable_unchecked(),
464 }
465 }
466 #[cfg(all(
467 any(not(target_feature = "sse"), atomic_maybe_uninit_test_prefer_x87_over_sse),
468 not(all(
469 any(target_feature = "x87", atomic_maybe_uninit_target_feature = "x87"),
470 not(atomic_maybe_uninit_test_prefer_cmpxchg8b_over_x87),
471 )),
472 ))]
473 // SAFETY: the caller must uphold the safety contract.
474 unsafe {
475 // CMPXCHG8B has SeqCst semantics.
476 let _ = order;
477 <Self as AtomicSwap>::atomic_swap(dst, val, Ordering::SeqCst);
478 }
479 }
480}
481#[cfg(all(target_arch = "x86", not(atomic_maybe_uninit_no_cmpxchg8b)))]
482impl AtomicSwap for u64 {
483 #[inline]
484 unsafe fn atomic_swap(
485 dst: *mut MaybeUninit<Self>,
486 val: MaybeUninit<Self>,
487 _order: Ordering,
488 ) -> MaybeUninit<Self> {
489 debug_assert_atomic_unsafe_precondition!(dst, u64);
490 let val = MaybeUninit64 { whole: val };
491 let (mut prev_lo, mut prev_hi);
492
493 // SAFETY: the caller must uphold the safety contract.
494 // CMPXCHG8B has SeqCst semantics.
495 //
496 // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
497 unsafe {
498 asm!(
499 // This is based on the code generated for the first load in DW RMWs by LLVM,
500 // but it is interesting that they generate code that does mixed-sized atomic access.
501 //
502 // This is not single-copy atomic reads, but this is ok because subsequent
503 // CAS will check for consistency.
504 "mov eax, dword ptr [edi]", // atomic { eax = *edi }
505 "mov edx, dword ptr [edi + 4]", // atomic { edx = *edi.byte_add(4) }
506 "2:", // 'retry:
507 "lock cmpxchg8b qword ptr [edi]", // atomic { if *edi == edx:eax { ZF = 1; *edi = ecx:ebx } else { ZF = 0; edx:eax = *edi } }
508 "jne 2b", // if ZF == 0 { jump 'retry }
509 in("ebx") val.pair.lo,
510 in("ecx") val.pair.hi,
511 out("eax") prev_lo,
512 out("edx") prev_hi,
513 in("edi") dst,
514 // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag.
515 options(nostack),
516 );
517 MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
518 }
519 }
520}
521#[cfg(all(target_arch = "x86", not(atomic_maybe_uninit_no_cmpxchg8b)))]
522impl AtomicCompareExchange for u64 {
523 #[inline]
524 unsafe fn atomic_compare_exchange(
525 dst: *mut MaybeUninit<Self>,
526 old: MaybeUninit<Self>,
527 new: MaybeUninit<Self>,
528 _success: Ordering,
529 _failure: Ordering,
530 ) -> (MaybeUninit<Self>, bool) {
531 debug_assert_atomic_unsafe_precondition!(dst, u64);
532 let old = MaybeUninit64 { whole: old };
533 let new = MaybeUninit64 { whole: new };
534 let (prev_lo, prev_hi);
535 let r: u8;
536
537 // SAFETY: the caller must uphold the safety contract.
538 // CMPXCHG8B has SeqCst semantics.
539 //
540 // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
541 unsafe {
542 asm!(
543 "lock cmpxchg8b qword ptr [edi]", // atomic { if *edi == edx:eax { ZF = 1; *edi = ecx:ebx } else { ZF = 0; edx:eax = *edi } }
544 "sete cl", // cl = ZF
545 in("ebx") new.pair.lo,
546 in("ecx") new.pair.hi,
547 inout("eax") old.pair.lo => prev_lo,
548 inout("edx") old.pair.hi => prev_hi,
549 in("edi") dst,
550 lateout("cl") r,
551 // Do not use `preserves_flags` because CMPXCHG8B modifies the ZF flag.
552 options(nostack),
553 );
554 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
555 (MaybeUninit64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0)
556 }
557 }
558}
559
560// -----------------------------------------------------------------------------
561// 128-bit atomics on x86_64
562
563#[cfg(target_arch = "x86_64")]
564#[cfg(target_feature = "cmpxchg16b")]
565macro_rules! atomic128 {
566 () => {
567 // rdi and rsi are call-preserved on Windows.
568 #[cfg(not(windows))]
569 #[cfg(target_pointer_width = "32")]
570 atomic128!("edi", "esi", "rsi");
571 #[cfg(not(windows))]
572 #[cfg(target_pointer_width = "64")]
573 atomic128!("rdi", "rsi", "rsi");
574 #[cfg(windows)]
575 #[cfg(target_pointer_width = "32")]
576 atomic128!("r9d", "r11d", "r8");
577 #[cfg(windows)]
578 #[cfg(target_pointer_width = "64")]
579 atomic128!("r9", "r11", "r8");
580 };
581 ($dst:tt, $cas_dst:tt, $save:tt) => {
582 delegate_signed!(delegate_all, u128);
583 impl AtomicLoad for u128 {
584 #[inline]
585 unsafe fn atomic_load(
586 src: *const MaybeUninit<Self>,
587 _order: Ordering,
588 ) -> MaybeUninit<Self> {
589 // VMOVDQA is atomic when AVX is available.
590 // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
591 //
592 // Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
593 #[cfg(not(all(
594 not(target_feature = "avx"),
595 any(atomic_maybe_uninit_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
596 )))]
597 #[target_feature(enable = "avx")]
598 #[inline]
599 unsafe fn atomic_load_avx(
600 src: *const MaybeUninit<u128>,
601 ) -> MaybeUninit<u128> {
602 // SAFETY: the caller must guarantee that `src` is valid for reads,
603 // 16-byte aligned, and that there are no concurrent non-atomic operations.
604 // load by VMOVDQA has SeqCst semantics.
605 unsafe {
606 let out;
607 asm!(
608 concat!("vmovdqa {out}, xmmword ptr [{src", ptr_modifier!(), "}]"), // atomic { out = *src }
609 src = in(reg) src,
610 out = lateout(xmm_reg) out,
611 options(nostack, preserves_flags),
612 );
613 mem::transmute::<MaybeUninit<__m128i>, MaybeUninit<u128>>(out)
614 }
615 }
616 #[cfg(not(target_feature = "avx"))]
617 #[inline]
618 unsafe fn atomic_load_cmpxchg16b(
619 src: *const MaybeUninit<u128>,
620 ) -> MaybeUninit<u128> {
621 // SAFETY: the caller must guarantee that `src` is valid for both writes and
622 // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
623 // CMPXCHG16B has SeqCst semantics.
624 //
625 // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
626 unsafe {
627 let (out_lo, out_hi);
628 asm!(
629 concat!("mov ", $save, ", rbx"), // save rbx which is reserved by LLVM
630 "xor rbx, rbx", // zeroed rbx
631 concat!("lock cmpxchg16b xmmword ptr [", $dst, "]"), // atomic { if *$rdi == rdx:rax { ZF = 1; *$rdi = rcx:rbx } else { ZF = 0; rdx:rax = *$rdi } }
632 concat!("mov rbx, ", $save), // restore rbx
633 // set old/new args of CMPXCHG16B to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg)
634 out($save) _,
635 in("rcx") 0_u64,
636 inout("rax") 0_u64 => out_lo,
637 inout("rdx") 0_u64 => out_hi,
638 in($dst) src,
639 // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
640 options(nostack),
641 );
642 MaybeUninit128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
643 }
644 }
645 debug_assert_atomic_unsafe_precondition!(src, u128);
646
647 #[cfg(target_feature = "avx")]
648 // SAFETY: the caller must uphold the safety contract.
649 // cfg guarantees that the CPU supports AVX.
650 unsafe {
651 atomic_load_avx(src)
652 }
653 #[cfg(not(target_feature = "avx"))]
654 #[cfg(not(all(
655 not(target_feature = "avx"),
656 any(atomic_maybe_uninit_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
657 )))]
658 // SAFETY: the caller must uphold the safety contract.
659 // cfg guarantees that the CPU supports CMPXCHG16B.
660 unsafe {
661 ifunc!(unsafe fn(src: *const MaybeUninit<u128>) -> MaybeUninit<u128> {
662 if detect::detect().avx() {
663 atomic_load_avx
664 } else {
665 atomic_load_cmpxchg16b
666 }
667 })
668 }
669 #[cfg(not(target_feature = "avx"))]
670 #[cfg(all(
671 not(target_feature = "avx"),
672 any(atomic_maybe_uninit_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
673 ))]
674 // SAFETY: the caller must uphold the safety contract.
675 // cfg guarantees that the CPU supports CMPXCHG16B.
676 unsafe {
677 atomic_load_cmpxchg16b(src)
678 }
679 }
680 }
681 impl AtomicStore for u128 {
682 #[inline]
683 unsafe fn atomic_store(
684 dst: *mut MaybeUninit<Self>,
685 val: MaybeUninit<Self>,
686 order: Ordering,
687 ) {
688 // VMOVDQA is atomic when AVX is available.
689 // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
690 //
691 // Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
692 #[cfg(not(all(
693 not(target_feature = "avx"),
694 any(atomic_maybe_uninit_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
695 )))]
696 #[target_feature(enable = "avx")]
697 #[inline]
698 unsafe fn atomic_store_avx(
699 dst: *mut MaybeUninit<u128>,
700 val: MaybeUninit<u128>,
701 order: Ordering,
702 ) {
703 // SAFETY: the caller must guarantee that `dst` is valid for writes,
704 // 16-byte aligned, and that there are no concurrent non-atomic operations.
705 // cfg guarantees that the CPU supports AVX.
706 unsafe {
707 let val: MaybeUninit<__m128i> = mem::transmute(val);
708 match order {
709 // Relaxed and Release stores are equivalent.
710 Ordering::Relaxed | Ordering::Release => {
711 asm!(
712 concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), // atomic { *dst = val }
713 dst = in(reg) dst,
714 val = in(xmm_reg) val,
715 options(nostack, preserves_flags),
716 );
717 }
718 Ordering::SeqCst => {
719 let p = core::cell::UnsafeCell::new(MaybeUninit::<u64>::uninit());
720 asm!(
721 concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), // atomic { *dst = val }
722 // Equivalent to `mfence`, but is up to 3.1x faster on Coffee Lake and up to 2.4x faster on Raptor Lake-H at least in simple cases.
723 // - https://github.com/taiki-e/portable-atomic/pull/156
724 // - LLVM uses `lock or` https://godbolt.org/z/vv6rjzfYd
725 // - Windows uses `xchg` for x86_32 for MemoryBarrier https://learn.microsoft.com/en-us/windows/win32/api/winnt/nf-winnt-memorybarrier
726 // - MSVC STL uses `lock inc` https://github.com/microsoft/STL/pull/740
727 // - boost uses `lock or` https://github.com/boostorg/atomic/commit/559eba81af71386cedd99f170dc6101c6ad7bf22
728 concat!("xchg qword ptr [{p", ptr_modifier!(), "}], {tmp}"), // fence
729 dst = in(reg) dst,
730 val = in(xmm_reg) val,
731 p = in(reg) p.get(),
732 tmp = out(reg) _,
733 options(nostack, preserves_flags),
734 );
735 }
736 _ => crate::utils::unreachable_unchecked(),
737 }
738 }
739 }
740 #[cfg(not(target_feature = "avx"))]
741 #[inline]
742 unsafe fn atomic_store_cmpxchg16b(
743 dst: *mut MaybeUninit<u128>,
744 val: MaybeUninit<u128>,
745 ) {
746 // SAFETY: the caller must uphold the safety contract.
747 unsafe {
748 // CMPXCHG16B has SeqCst semantics.
749 <u128 as AtomicSwap>::atomic_swap(dst, val, Ordering::SeqCst);
750 }
751 }
752 debug_assert_atomic_unsafe_precondition!(dst, u128);
753
754 #[cfg(target_feature = "avx")]
755 // SAFETY: the caller must uphold the safety contract.
756 // cfg guarantees that the CPU supports AVX.
757 unsafe {
758 atomic_store_avx(dst, val, order);
759 }
760 #[cfg(not(target_feature = "avx"))]
761 #[cfg(not(all(
762 not(target_feature = "avx"),
763 any(atomic_maybe_uninit_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
764 )))]
765 // SAFETY: the caller must uphold the safety contract.
766 // cfg guarantees that the CPU supports CMPXCHG16B.
767 unsafe {
768 fn_alias! {
769 #[target_feature(enable = "avx")]
770 unsafe fn(dst: *mut MaybeUninit<u128>, val: MaybeUninit<u128>);
771 // atomic store by vmovdqa has at least release semantics.
772 atomic_store_avx_non_seqcst = atomic_store_avx(Ordering::Release);
773 atomic_store_avx_seqcst = atomic_store_avx(Ordering::SeqCst);
774 }
775 match order {
776 // Relaxed and Release stores are equivalent in all implementations
777 // that may be called here.
778 Ordering::Relaxed | Ordering::Release => {
779 ifunc!(unsafe fn(dst: *mut MaybeUninit<u128>, val: MaybeUninit<u128>) {
780 if detect::detect().avx() {
781 atomic_store_avx_non_seqcst
782 } else {
783 atomic_store_cmpxchg16b
784 }
785 });
786 }
787 Ordering::SeqCst => {
788 ifunc!(unsafe fn(dst: *mut MaybeUninit<u128>, val: MaybeUninit<u128>) {
789 if detect::detect().avx() {
790 atomic_store_avx_seqcst
791 } else {
792 atomic_store_cmpxchg16b
793 }
794 });
795 }
796 _ => crate::utils::unreachable_unchecked(),
797 }
798 }
799 #[cfg(not(target_feature = "avx"))]
800 #[cfg(all(
801 not(target_feature = "avx"),
802 any(atomic_maybe_uninit_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
803 ))]
804 // SAFETY: the caller must uphold the safety contract.
805 // cfg guarantees that the CPU supports CMPXCHG16B.
806 unsafe {
807 // CMPXCHG16B has SeqCst semantics.
808 let _ = order;
809 atomic_store_cmpxchg16b(dst, val);
810 }
811 }
812 }
813 impl AtomicSwap for u128 {
814 #[inline]
815 unsafe fn atomic_swap(
816 dst: *mut MaybeUninit<Self>,
817 val: MaybeUninit<Self>,
818 _order: Ordering,
819 ) -> MaybeUninit<Self> {
820 debug_assert_atomic_unsafe_precondition!(dst, u128);
821 let val = MaybeUninit128 { whole: val };
822 let (mut prev_lo, mut prev_hi);
823
824 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
825 // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
826 // cfg guarantees that the CPU supports CMPXCHG16B.
827 // CMPXCHG16B has SeqCst semantics.
828 //
829 // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
830 unsafe {
831 asm!(
832 concat!("xchg ", $save, ", rbx"), // save rbx which is reserved by LLVM
833 // This is based on the code generated for the first load in DW RMWs by LLVM,
834 // but it is interesting that they generate code that does mixed-sized atomic access.
835 //
836 // This is not single-copy atomic reads, but this is ok because subsequent
837 // CAS will check for consistency.
838 concat!("mov rax, qword ptr [", $dst, "]"), // atomic { rax = *$rdi }
839 concat!("mov rdx, qword ptr [", $dst, " + 8]"), // atomic { rdx = *$rdi.byte_add(8) }
840 "2:", // 'retry:
841 concat!("lock cmpxchg16b xmmword ptr [", $dst, "]"), // atomic { if *$rdi == rdx:rax { ZF = 1; *$rdi = rcx:rbx } else { ZF = 0; rdx:rax = *$rdi } }
842 "jne 2b", // if ZF == 0 { jump 'retry }
843 concat!("mov rbx, ", $save), // restore rbx
844 inout($save) val.pair.lo => _,
845 in("rcx") val.pair.hi,
846 out("rax") prev_lo,
847 out("rdx") prev_hi,
848 in($dst) dst,
849 // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
850 options(nostack),
851 );
852 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
853 }
854 }
855 }
856 impl AtomicCompareExchange for u128 {
857 #[inline]
858 unsafe fn atomic_compare_exchange(
859 dst: *mut MaybeUninit<Self>,
860 old: MaybeUninit<Self>,
861 new: MaybeUninit<Self>,
862 _success: Ordering,
863 _failure: Ordering,
864 ) -> (MaybeUninit<Self>, bool) {
865 debug_assert_atomic_unsafe_precondition!(dst, u128);
866 let old = MaybeUninit128 { whole: old };
867 let new = MaybeUninit128 { whole: new };
868 let (prev_lo, prev_hi);
869 let r: u8;
870
871 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
872 // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
873 // cfg guarantees that the CPU supports CMPXCHG16B.
874 // CMPXCHG16B has SeqCst semantics.
875 //
876 // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
877 unsafe {
878 asm!(
879 "xchg r8, rbx", // save rbx which is reserved by LLVM
880 concat!("lock cmpxchg16b xmmword ptr [", $cas_dst, "]"), // atomic { if *$rdi == rdx:rax { ZF = 1; *$rdi = rcx:rbx } else { ZF = 0; rdx:rax = *$rdi } }
881 "sete cl", // cl = ZF
882 "mov rbx, r8", // restore rbx
883 inout("r8") new.pair.lo => _,
884 in("rcx") new.pair.hi,
885 inout("rax") old.pair.lo => prev_lo,
886 inout("rdx") old.pair.hi => prev_hi,
887 in($cas_dst) dst,
888 lateout("cl") r,
889 // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
890 options(nostack),
891 );
892 crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
893 (
894 MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole,
895 r != 0
896 )
897 }
898 }
899 }
900 };
901}
902
903#[cfg(target_arch = "x86_64")]
904#[cfg(target_feature = "cmpxchg16b")]
905atomic128!();
906
907// -----------------------------------------------------------------------------
908// cfg macros
909
910#[macro_export]
911macro_rules! cfg_has_atomic_8 {
912 ($($tt:tt)*) => { $($tt)* };
913}
914#[macro_export]
915macro_rules! cfg_no_atomic_8 {
916 ($($tt:tt)*) => {};
917}
918#[macro_export]
919macro_rules! cfg_has_atomic_16 {
920 ($($tt:tt)*) => { $($tt)* };
921}
922#[macro_export]
923macro_rules! cfg_no_atomic_16 {
924 ($($tt:tt)*) => {};
925}
926#[macro_export]
927macro_rules! cfg_has_atomic_32 {
928 ($($tt:tt)*) => { $($tt)* };
929}
930#[macro_export]
931macro_rules! cfg_no_atomic_32 {
932 ($($tt:tt)*) => {};
933}
934#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b)))]
935#[macro_export]
936macro_rules! cfg_has_atomic_64 {
937 ($($tt:tt)*) => { $($tt)* };
938}
939#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b)))]
940#[macro_export]
941macro_rules! cfg_no_atomic_64 {
942 ($($tt:tt)*) => {};
943}
944#[cfg(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b))]
945#[macro_export]
946macro_rules! cfg_has_atomic_64 {
947 ($($tt:tt)*) => {};
948}
949#[cfg(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg8b))]
950#[macro_export]
951macro_rules! cfg_no_atomic_64 {
952 ($($tt:tt)*) => { $($tt)* };
953}
954#[cfg(not(all(target_arch = "x86_64", target_feature = "cmpxchg16b")))]
955#[macro_export]
956macro_rules! cfg_has_atomic_128 {
957 ($($tt:tt)*) => {};
958}
959#[cfg(not(all(target_arch = "x86_64", target_feature = "cmpxchg16b")))]
960#[macro_export]
961macro_rules! cfg_no_atomic_128 {
962 ($($tt:tt)*) => { $($tt)* };
963}
964#[cfg(all(target_arch = "x86_64", target_feature = "cmpxchg16b"))]
965#[macro_export]
966macro_rules! cfg_has_atomic_128 {
967 ($($tt:tt)*) => { $($tt)* };
968}
969#[cfg(all(target_arch = "x86_64", target_feature = "cmpxchg16b"))]
970#[macro_export]
971macro_rules! cfg_no_atomic_128 {
972 ($($tt:tt)*) => {};
973}
974#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))]
975#[macro_export]
976macro_rules! cfg_has_atomic_cas {
977 ($($tt:tt)*) => { $($tt)* };
978}
979#[cfg(not(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg)))]
980#[macro_export]
981macro_rules! cfg_no_atomic_cas {
982 ($($tt:tt)*) => {};
983}
984#[cfg(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg))]
985#[macro_export]
986macro_rules! cfg_has_atomic_cas {
987 ($($tt:tt)*) => {};
988}
989#[cfg(all(target_arch = "x86", atomic_maybe_uninit_no_cmpxchg))]
990#[macro_export]
991macro_rules! cfg_no_atomic_cas {
992 ($($tt:tt)*) => { $($tt)* };
993}