portable-atomic 1.0.1

Portable atomic types including support for 128-bit atomics, atomic float, etc.
Documentation
// Atomic{I,U}128 implementation on powerpc64.
//
// powerpc64 on pwr8+ support 128-bit atomics:
// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
// https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
// https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0/llvm/test/CodeGen/PowerPC/atomics-i128.ll
//
// powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0/llvm/lib/Target/PowerPC/PPC.td#L652
// See also https://github.com/rust-lang/rust/issues/59932
//
// Note that we do not separate LL and SC into separate functions, but handle
// them within a single asm block. This is because it is theoretically possible
// for the compiler to insert operations that might clear the reservation between
// LL and SC. See aarch64.rs for details.
//
// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
// this module and use intrinsics.rs instead.
//
// Refs:
// - Power ISA https://openpowerfoundation.org/specifications/isa
// - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference
// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
//
// Generated asm:
// - powerpc64 (pwr8) https://godbolt.org/z/EzMTYq8ne
// - powerpc64le https://godbolt.org/z/cE1511Ks1

include!("macros.rs");

use core::{arch::asm, sync::atomic::Ordering};

/// A 128-bit value represented as a pair of 64-bit values.
// This type is #[repr(C)], both fields have the same in-memory representation
// and are plain old datatypes, so access to the fields is always safe.
#[derive(Clone, Copy)]
#[repr(C)]
union U128 {
    whole: u128,
    pair: Pair,
}
#[derive(Clone, Copy)]
#[repr(C)]
union I128 {
    whole: i128,
    pair: Pair,
}

#[derive(Clone, Copy)]
#[repr(C)]
struct Pair {
    #[cfg(target_endian = "big")]
    hi: u64,
    lo: u64,
    #[cfg(target_endian = "little")]
    hi: u64,
}

macro_rules! atomic_rmw {
    ($op:ident, $order:ident) => {
        match $order {
            Ordering::Relaxed => $op!("", ""),
            Ordering::Acquire => $op!("lwsync", ""),
            Ordering::Release => $op!("", "lwsync"),
            Ordering::AcqRel => $op!("lwsync", "lwsync"),
            Ordering::SeqCst => $op!("lwsync", "sync"),
            _ => unreachable!("{:?}", $order),
        }
    };
}

#[inline]
unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
    debug_assert!(src as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_load`.
    //
    // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
    unsafe {
        let (out_hi, out_lo);
        match order {
            Ordering::Relaxed => {
                asm!(
                    "lq %r4, 0({src})",
                    src = in(reg) src,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r4") out_hi,
                    out("r5") out_lo,
                    options(nostack, readonly),
                );
            }
            Ordering::Acquire => {
                asm!(
                    "lq %r4, 0({src})",
                    // Lightweight acquire sync
                    // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62
                    "cmpd %cr7, %r4, %r4",
                    "bne- %cr7, 2f",
                    "2:",
                    "isync",
                    src = in(reg) src,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r4") out_hi,
                    out("r5") out_lo,
                    out("cr7") _,
                    options(nostack),
                );
            }
            Ordering::SeqCst => {
                asm!(
                    "sync",
                    "lq %r4, 0({src})",
                    // Lightweight acquire sync
                    // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62
                    "cmpd %cr7, %r4, %r4",
                    "bne- %cr7, 2f",
                    "2:",
                    "isync",
                    src = in(reg) src,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r4") out_hi,
                    out("r5") out_lo,
                    out("cr7") _,
                    options(nostack),
                );
            }
            _ => unreachable!("{:?}", order),
        }
        U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_store`.
    //
    // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
    unsafe {
        let val = U128 { whole: val };
        macro_rules! atomic_store {
            ($release:tt) => {
                asm!(
                    $release,
                    "stq %r4, 0({dst})",
                    dst = in(reg) dst,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    in("r4") val.pair.hi,
                    in("r5") val.pair.lo,
                    options(nostack),
                )
            };
        }
        match order {
            Ordering::Relaxed => atomic_store!(""),
            Ordering::Release => atomic_store!("lwsync"),
            Ordering::SeqCst => atomic_store!("sync"),
            _ => unreachable!("{:?}", order),
        }
    }
}

#[inline]
unsafe fn atomic_compare_exchange(
    dst: *mut u128,
    old: u128,
    new: u128,
    success: Ordering,
    failure: Ordering,
) -> Result<u128, u128> {
    debug_assert!(dst as usize % 16 == 0);
    let order = crate::utils::upgrade_success_ordering(success, failure);

    // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`.
    //
    // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
    let res = unsafe {
        let old = U128 { whole: old };
        let new = U128 { whole: new };
        let (mut prev_hi, mut prev_lo);
        macro_rules! cmpxchg {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r8, 0, {dst}",
                        "xor {tmp_lo}, %r9, {old_lo}",
                        "xor {tmp_hi}, %r8, {old_hi}",
                        "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
                        "bne %cr0, 3f",
                        "stqcx. %r6, 0, {dst}",
                        "bne %cr0, 2b",
                        "b 4f",
                    "3:",
                        "stqcx. %r8, 0, {dst}",
                    "4:",
                    $acquire,
                    dst = in(reg) dst,
                    old_hi = in(reg) old.pair.hi,
                    old_lo = in(reg) old.pair.lo,
                    tmp_hi = out(reg) _,
                    tmp_lo = out(reg) _,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    in("r6") new.pair.hi,
                    in("r7") new.pair.lo,
                    out("r8") prev_hi,
                    out("r9") prev_lo,
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(cmpxchg, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    };
    if res == old {
        Ok(res)
    } else {
        Err(res)
    }
}

// LLVM appears to generate strong CAS for powerpc64 128-bit weak CAS,
// so we always use strong CAS for now.
use atomic_compare_exchange as atomic_compare_exchange_weak;

#[inline]
unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_swap`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! swap {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "mr %r9, {val_lo}",
                        "mr %r8, {val_hi}",
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(swap, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_add`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! add {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "addc %r9, {val_lo}, %r7",
                        "adde %r8, {val_hi}, %r6",
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(add, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_sub`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! sub {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "subc %r9, %r7, {val_lo}",
                        "subfe %r8, {val_hi}, %r6",
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(sub, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_and`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! and {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "and %r9, {val_lo}, %r7",
                        "and %r8, {val_hi}, %r6",
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(and, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_nand`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! nand {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "nand %r9, {val_lo}, %r7",
                        "nand %r8, {val_hi}, %r6",
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(nand, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_or`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! or {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "or %r9, {val_lo}, %r7",
                        "or %r8, {val_hi}, %r6",
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(or, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_xor`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! xor {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "xor %r9, {val_lo}, %r7",
                        "xor %r8, {val_hi}, %r6",
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(xor, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 {
    // SAFETY: the caller must uphold the safety contract for `atomic_not`.
    unsafe { atomic_xor(dst, core::u128::MAX, order) }
}

#[inline]
unsafe fn atomic_max(dst: *mut i128, val: i128, order: Ordering) -> i128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_max`.
    unsafe {
        let val = I128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! max {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
                        "cmpd %cr1, %r6, {val_hi}",  // (signed) compare hi 64-bit, store result to cr1
                        "crandc 20, 5, 2",
                        "cmpld %cr1, %r7, {val_lo}", // compare lo 64-bit, store result to cr1
                        "crand 21, 2, 5",
                        "cror 20, 21, 20",
                        "isel %r8, %r6, {val_hi}, 20", // select hi 64-bit
                        "isel %r9, %r7, {val_lo}, 20", // select lo 64-bit
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    out("cr1") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(max, order);
        I128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_umax`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! umax {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
                        "cmpld %cr1, %r7, {val_lo}", // compare lo 64-bit, store result to cr1
                        "crandc 20, 1, 2",
                        "crand 21, 2, 5",
                        "cror 20, 21, 20",
                        "isel %r8, %r6, {val_hi}, 20", // select hi 64-bit
                        "isel %r9, %r7, {val_lo}, 20", // select lo 64-bit
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    out("cr1") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(umax, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_min(dst: *mut i128, val: i128, order: Ordering) -> i128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_min`.
    unsafe {
        let val = I128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! min {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
                        "cmpd %cr1, %r6, {val_hi}",  // (signed) compare hi 64-bit, store result to cr1
                        "crandc 20, 5, 2",
                        "cmpld %cr1, %r7, {val_lo}", // compare lo 64-bit, store result to cr1
                        "crand 21, 2, 5",
                        "cror 20, 21, 20",
                        "isel %r8, {val_hi}, %r6, 20", // select hi 64-bit
                        "isel %r9, {val_lo}, %r7, 20", // select lo 64-bit
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    out("cr1") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(min, order);
        I128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

#[inline]
unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 {
    debug_assert!(dst as usize % 16 == 0);

    // SAFETY: the caller must uphold the safety contract for `atomic_umin`.
    unsafe {
        let val = U128 { whole: val };
        let (mut prev_hi, mut prev_lo);
        macro_rules! umin {
            ($acquire:tt, $release:tt) => {
                asm!(
                    $release,
                    "2:",
                        "lqarx %r6, 0, {dst}",
                        "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
                        "cmpld %cr1, %r7, {val_lo}", // compare lo 64-bit, store result to cr1
                        "crandc 20, 1, 2",
                        "crand 21, 2, 5",
                        "cror 20, 21, 20",
                        "isel %r8, {val_hi}, %r6, 20", // select hi 64-bit
                        "isel %r9, {val_lo}, %r7, 20", // select lo 64-bit
                        "stqcx. %r8, 0, {dst}",
                        "bne %cr0, 2b",
                    $acquire,
                    dst = in(reg) dst,
                    val_hi = in(reg) val.pair.hi,
                    val_lo = in(reg) val.pair.lo,
                    out("r0") _,
                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
                    // We cannot use r1 and r2, so starting with r4.
                    out("r6") prev_hi,
                    out("r7") prev_lo,
                    out("r8") _, // new (hi)
                    out("r9") _, // new (lo)
                    out("cr0") _,
                    out("cr1") _,
                    options(nostack),
                )
            };
        }
        atomic_rmw!(umin, order);
        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
    }
}

atomic128!(int, AtomicI128, i128, atomic_max, atomic_min);
atomic128!(uint, AtomicU128, u128, atomic_umax, atomic_umin);

#[cfg(test)]
mod tests {
    use super::*;

    #[cfg(not(qemu))]
    test_atomic_int!(i128);
    #[cfg(not(qemu))]
    test_atomic_int!(u128);
    // As of qemu 7.0.0 , using lqarx/stqcx. with qemu-user hangs.
    // To test this, use real powerpc64le hardware or use POWER Functional
    // Simulator. See DEVELOPMENT.md for more.
    #[cfg(qemu)]
    test_atomic_int_load_store!(i128);
    #[cfg(qemu)]
    test_atomic_int_load_store!(u128);
}