#![allow(non_upper_case_globals, unused_macros, unused_imports)]
use crate::low::macros::*;
pub(crate) fn bignum_montmul_p256(z: &mut [u64; 4], x: &[u64; 4], y: &[u64; 4]) {
unsafe {
core::arch::asm!(
Q!(" ldr " "q20, [x2]"),
Q!(" ldp " "x7, x17, [x1]"),
Q!(" ldr " "q0, [x1]"),
Q!(" ldp " "x6, x10, [x2]"),
Q!(" ldp " "x11, x15, [x1, #16]"),
Q!(" rev64 " "v16.4S, v20.4S"),
Q!(" subs " "x4, x7, x17"),
Q!(" csetm " "x3, cc"),
Q!(" cneg " "x13, x4, cc"),
Q!(" mul " "v16.4S, v16.4S, v0.4S"),
Q!(" umulh " "x12, x17, x10"),
Q!(" uzp1 " "v28.4S, v20.4S, v0.4S"),
Q!(" subs " "x14, x11, x7"),
Q!(" ldr " "q20, [x2, #16]"),
Q!(" sbcs " "x5, x15, x17"),
Q!(" ngc " "x17, xzr"),
Q!(" subs " "x8, x11, x15"),
Q!(" uaddlp " "v27.2D, v16.4S"),
Q!(" umulh " "x4, x7, x6"),
Q!(" uzp1 " "v21.4S, v0.4S, v0.4S"),
Q!(" cneg " "x11, x8, cc"),
Q!(" shl " "v17.2D, v27.2D, #32"),
Q!(" csetm " "x15, cc"),
Q!(" subs " "x9, x10, x6"),
Q!(" eor " "x7, x14, x17"),
Q!(" umlal " "v17.2D, v21.2S, v28.2S"),
Q!(" cneg " "x8, x9, cc"),
Q!(" cinv " "x9, x3, cc"),
Q!(" cmn " "x17, #0x1"),
Q!(" ldr " "q28, [x1, #16]"),
Q!(" adcs " "x14, x7, xzr"),
Q!(" mul " "x7, x13, x8"),
Q!(" eor " "x1, x5, x17"),
Q!(" adcs " "x5, x1, xzr"),
Q!(" xtn " "v1.2S, v20.2D"),
Q!(" mov " "x1, v17.d[0]"),
Q!(" mov " "x3, v17.d[1]"),
Q!(" uzp2 " "v16.4S, v20.4S, v20.4S"),
Q!(" umulh " "x16, x13, x8"),
Q!(" eor " "x13, x7, x9"),
Q!(" adds " "x8, x1, x3"),
Q!(" adcs " "x7, x4, x12"),
Q!(" xtn " "v0.2S, v28.2D"),
Q!(" adcs " "x12, x12, xzr"),
Q!(" adds " "x8, x4, x8"),
Q!(" adcs " "x3, x3, x7"),
Q!(" ldp " "x7, x2, [x2, #16]"),
Q!(" adcs " "x12, x12, xzr"),
Q!(" cmn " "x9, #0x1"),
Q!(" adcs " "x8, x8, x13"),
Q!(" eor " "x13, x16, x9"),
Q!(" adcs " "x16, x3, x13"),
Q!(" lsl " "x3, x1, #32"),
Q!(" adc " "x13, x12, x9"),
Q!(" subs " "x12, x6, x7"),
Q!(" sbcs " "x9, x10, x2"),
Q!(" lsr " "x10, x1, #32"),
Q!(" ngc " "x4, xzr"),
Q!(" subs " "x6, x2, x7"),
Q!(" cinv " "x2, x15, cc"),
Q!(" cneg " "x6, x6, cc"),
Q!(" subs " "x7, x1, x3"),
Q!(" eor " "x9, x9, x4"),
Q!(" sbc " "x1, x1, x10"),
Q!(" adds " "x15, x8, x3"),
Q!(" adcs " "x3, x16, x10"),
Q!(" mul " "x16, x11, x6"),
Q!(" adcs " "x8, x13, x7"),
Q!(" eor " "x13, x12, x4"),
Q!(" adc " "x10, x1, xzr"),
Q!(" cmn " "x4, #0x1"),
Q!(" umulh " "x6, x11, x6"),
Q!(" adcs " "x11, x13, xzr"),
Q!(" adcs " "x1, x9, xzr"),
Q!(" lsl " "x13, x15, #32"),
Q!(" subs " "x12, x15, x13"),
Q!(" lsr " "x7, x15, #32"),
Q!(" sbc " "x15, x15, x7"),
Q!(" adds " "x9, x3, x13"),
Q!(" adcs " "x3, x8, x7"),
Q!(" umulh " "x8, x14, x11"),
Q!(" umull " "v21.2D, v0.2S, v1.2S"),
Q!(" adcs " "x12, x10, x12"),
Q!(" umull " "v3.2D, v0.2S, v16.2S"),
Q!(" adc " "x15, x15, xzr"),
Q!(" rev64 " "v24.4S, v20.4S"),
Q!(" stp " "x12, x15, [x0, #16]"),
Q!(" movi " "v2.2D, #0x00000000ffffffff"),
Q!(" mul " "x10, x14, x11"),
Q!(" mul " "v4.4S, v24.4S, v28.4S"),
Q!(" subs " "x13, x14, x5"),
Q!(" uzp2 " "v19.4S, v28.4S, v28.4S"),
Q!(" csetm " "x15, cc"),
Q!(" usra " "v3.2D, v21.2D, #32"),
Q!(" mul " "x7, x5, x1"),
Q!(" umull " "v21.2D, v19.2S, v16.2S"),
Q!(" cneg " "x13, x13, cc"),
Q!(" uaddlp " "v5.2D, v4.4S"),
Q!(" subs " "x11, x1, x11"),
Q!(" and " "v16.16B, v3.16B, v2.16B"),
Q!(" umulh " "x5, x5, x1"),
Q!(" shl " "v24.2D, v5.2D, #32"),
Q!(" cneg " "x11, x11, cc"),
Q!(" umlal " "v16.2D, v19.2S, v1.2S"),
Q!(" cinv " "x12, x15, cc"),
Q!(" umlal " "v24.2D, v0.2S, v1.2S"),
Q!(" adds " "x15, x10, x7"),
Q!(" mul " "x14, x13, x11"),
Q!(" eor " "x1, x6, x2"),
Q!(" adcs " "x6, x8, x5"),
Q!(" stp " "x9, x3, [x0]"),
Q!(" usra " "v21.2D, v3.2D, #32"),
Q!(" adcs " "x9, x5, xzr"),
Q!(" umulh " "x11, x13, x11"),
Q!(" adds " "x15, x8, x15"),
Q!(" adcs " "x7, x7, x6"),
Q!(" eor " "x8, x14, x12"),
Q!(" usra " "v21.2D, v16.2D, #32"),
Q!(" adcs " "x13, x9, xzr"),
Q!(" cmn " "x12, #0x1"),
Q!(" mov " "x9, v24.d[1]"),
Q!(" adcs " "x14, x15, x8"),
Q!(" eor " "x6, x11, x12"),
Q!(" adcs " "x6, x7, x6"),
Q!(" mov " "x5, v24.d[0]"),
Q!(" mov " "x11, v21.d[1]"),
Q!(" mov " "x7, v21.d[0]"),
Q!(" adc " "x3, x13, x12"),
Q!(" adds " "x12, x5, x9"),
Q!(" adcs " "x13, x7, x11"),
Q!(" ldp " "x15, x8, [x0]"),
Q!(" adcs " "x11, x11, xzr"),
Q!(" adds " "x12, x7, x12"),
Q!(" eor " "x16, x16, x2"),
Q!(" adcs " "x7, x9, x13"),
Q!(" adcs " "x11, x11, xzr"),
Q!(" cmn " "x2, #0x1"),
Q!(" ldp " "x9, x13, [x0, #16]"),
Q!(" adcs " "x16, x12, x16"),
Q!(" adcs " "x1, x7, x1"),
Q!(" adc " "x2, x11, x2"),
Q!(" adds " "x7, x5, x15"),
Q!(" adcs " "x15, x16, x8"),
Q!(" eor " "x5, x17, x4"),
Q!(" adcs " "x9, x1, x9"),
Q!(" eor " "x1, x10, x5"),
Q!(" adcs " "x16, x2, x13"),
Q!(" adc " "x2, xzr, xzr"),
Q!(" cmn " "x5, #0x1"),
Q!(" eor " "x13, x14, x5"),
Q!(" adcs " "x14, x1, x7"),
Q!(" eor " "x1, x6, x5"),
Q!(" adcs " "x6, x13, x15"),
Q!(" adcs " "x10, x1, x9"),
Q!(" eor " "x4, x3, x5"),
Q!(" mov " "x1, #0xffffffff"),
Q!(" adcs " "x8, x4, x16"),
Q!(" lsr " "x13, x14, #32"),
Q!(" adcs " "x17, x2, x5"),
Q!(" adcs " "x11, x5, xzr"),
Q!(" adc " "x4, x5, xzr"),
Q!(" adds " "x12, x10, x7"),
Q!(" adcs " "x7, x8, x15"),
Q!(" adcs " "x5, x17, x9"),
Q!(" adcs " "x9, x11, x16"),
Q!(" lsl " "x11, x14, #32"),
Q!(" adc " "x10, x4, x2"),
Q!(" subs " "x17, x14, x11"),
Q!(" sbc " "x4, x14, x13"),
Q!(" adds " "x11, x6, x11"),
Q!(" adcs " "x12, x12, x13"),
Q!(" lsl " "x15, x11, #32"),
Q!(" adcs " "x17, x7, x17"),
Q!(" lsr " "x7, x11, #32"),
Q!(" adc " "x13, x4, xzr"),
Q!(" subs " "x4, x11, x15"),
Q!(" sbc " "x11, x11, x7"),
Q!(" adds " "x8, x12, x15"),
Q!(" adcs " "x15, x17, x7"),
Q!(" adcs " "x4, x13, x4"),
Q!(" adc " "x11, x11, xzr"),
Q!(" adds " "x7, x5, x4"),
Q!(" adcs " "x17, x9, x11"),
Q!(" adc " "x13, x10, xzr"),
Q!(" add " "x12, x13, #0x1"),
Q!(" neg " "x11, x12"),
Q!(" lsl " "x4, x12, #32"),
Q!(" adds " "x17, x17, x4"),
Q!(" sub " "x4, x4, #0x1"),
Q!(" adc " "x13, x13, xzr"),
Q!(" subs " "x11, x8, x11"),
Q!(" sbcs " "x4, x15, x4"),
Q!(" sbcs " "x7, x7, xzr"),
Q!(" sbcs " "x17, x17, x12"),
Q!(" sbcs " "x13, x13, x12"),
Q!(" mov " "x12, #0xffffffff00000001"),
Q!(" adds " "x11, x11, x13"),
Q!(" and " "x1, x1, x13"),
Q!(" adcs " "x4, x4, x1"),
Q!(" and " "x1, x12, x13"),
Q!(" stp " "x11, x4, [x0]"),
Q!(" adcs " "x4, x7, xzr"),
Q!(" adc " "x1, x17, x1"),
Q!(" stp " "x4, x1, [x0, #16]"),
inout("x0") z.as_mut_ptr() => _,
inout("x1") x.as_ptr() => _,
inout("x2") y.as_ptr() => _,
out("v0") _,
out("v1") _,
out("v16") _,
out("v17") _,
out("v19") _,
out("v2") _,
out("v20") _,
out("v21") _,
out("v24") _,
out("v27") _,
out("v28") _,
out("v3") _,
out("v4") _,
out("v5") _,
out("x10") _,
out("x11") _,
out("x12") _,
out("x13") _,
out("x14") _,
out("x15") _,
out("x16") _,
out("x17") _,
out("x3") _,
out("x4") _,
out("x5") _,
out("x6") _,
out("x7") _,
out("x8") _,
out("x9") _,
)
};
}