#![allow(non_upper_case_globals, unused_macros, unused_imports)]
use crate::low::macros::*;
pub(crate) fn bignum_emontredc_8n(z: &mut [u64], m: &[u64], w: u64) -> u64 {
let ret: u64;
debug_assert!(z.len() == m.len() * 2);
debug_assert!(z.len().is_multiple_of(8));
unsafe {
core::arch::asm!(
Q!(" stp " "x19, x20, [sp, #-16] !"),
Q!(" stp " "x21, x22, [sp, #-16] !"),
Q!(" stp " "x23, x24, [sp, #-16] !"),
Q!(" stp " "x25, x26, [sp, #-16] !"),
Q!(" stp " "x27, x28, [sp, #-16] !"),
Q!(" lsr " "x0, x0, #2"),
Q!(" mov " "x26, x0"),
Q!(" subs " "x12, x0, #0x1"),
Q!(" b.cc " Label!("bignum_emontredc_8n_end", 2, After)),
Q!(" mov " "x28, xzr"),
Q!(" lsl " "x0, x12, #5"),
Q!(Label!("bignum_emontredc_8n_outerloop", 3) ":"),
Q!(" ldp " "x17, x19, [x1]"),
Q!(" ldp " "x20, x21, [x1, #16]"),
Q!(" ldp " "x8, x9, [x2]"),
Q!(" ldp " "x10, x11, [x2, #16]"),
Q!(" mul " "x4, x17, x3"),
Q!(" mul " "x12, x4, x8"),
Q!(" mul " "x13, x4, x9"),
Q!(" mul " "x14, x4, x10"),
Q!(" mul " "x15, x4, x11"),
Q!(" adds " "x17, x17, x12"),
Q!(" umulh " "x12, x4, x8"),
Q!(" adcs " "x19, x19, x13"),
Q!(" umulh " "x13, x4, x9"),
Q!(" adcs " "x20, x20, x14"),
Q!(" umulh " "x14, x4, x10"),
Q!(" adcs " "x21, x21, x15"),
Q!(" umulh " "x15, x4, x11"),
Q!(" adc " "x22, xzr, xzr"),
Q!(" adds " "x19, x19, x12"),
Q!(" adcs " "x20, x20, x13"),
Q!(" adcs " "x21, x21, x14"),
Q!(" adc " "x22, x22, x15"),
Q!(" mul " "x5, x19, x3"),
Q!(" mul " "x12, x5, x8"),
Q!(" mul " "x13, x5, x9"),
Q!(" mul " "x14, x5, x10"),
Q!(" mul " "x15, x5, x11"),
Q!(" adds " "x19, x19, x12"),
Q!(" umulh " "x12, x5, x8"),
Q!(" adcs " "x20, x20, x13"),
Q!(" umulh " "x13, x5, x9"),
Q!(" adcs " "x21, x21, x14"),
Q!(" umulh " "x14, x5, x10"),
Q!(" adcs " "x22, x22, x15"),
Q!(" umulh " "x15, x5, x11"),
Q!(" adc " "x23, xzr, xzr"),
Q!(" adds " "x20, x20, x12"),
Q!(" adcs " "x21, x21, x13"),
Q!(" adcs " "x22, x22, x14"),
Q!(" adc " "x23, x23, x15"),
Q!(" mul " "x6, x20, x3"),
Q!(" mul " "x12, x6, x8"),
Q!(" mul " "x13, x6, x9"),
Q!(" mul " "x14, x6, x10"),
Q!(" mul " "x15, x6, x11"),
Q!(" adds " "x20, x20, x12"),
Q!(" umulh " "x12, x6, x8"),
Q!(" adcs " "x21, x21, x13"),
Q!(" umulh " "x13, x6, x9"),
Q!(" adcs " "x22, x22, x14"),
Q!(" umulh " "x14, x6, x10"),
Q!(" adcs " "x23, x23, x15"),
Q!(" umulh " "x15, x6, x11"),
Q!(" adc " "x24, xzr, xzr"),
Q!(" adds " "x21, x21, x12"),
Q!(" adcs " "x22, x22, x13"),
Q!(" adcs " "x23, x23, x14"),
Q!(" adc " "x24, x24, x15"),
Q!(" mul " "x7, x21, x3"),
Q!(" mul " "x12, x7, x8"),
Q!(" mul " "x13, x7, x9"),
Q!(" mul " "x14, x7, x10"),
Q!(" mul " "x15, x7, x11"),
Q!(" adds " "x21, x21, x12"),
Q!(" umulh " "x12, x7, x8"),
Q!(" adcs " "x22, x22, x13"),
Q!(" umulh " "x13, x7, x9"),
Q!(" adcs " "x23, x23, x14"),
Q!(" umulh " "x14, x7, x10"),
Q!(" adcs " "x24, x24, x15"),
Q!(" umulh " "x15, x7, x11"),
Q!(" adc " "x25, xzr, xzr"),
Q!(" adds " "x12, x22, x12"),
Q!(" adcs " "x13, x23, x13"),
Q!(" adcs " "x14, x24, x14"),
Q!(" adc " "x15, x25, x15"),
Q!(" stp " "x4, x5, [x1]"),
Q!(" stp " "x6, x7, [x1, #16]"),
Q!(" mov " "x27, x0"),
Q!(Label!("bignum_emontredc_8n_maddloop", 4) ":"),
Q!(" add " "x2, x2, #0x20"),
Q!(" add " "x1, x1, #0x20"),
Q!(" ldp " "x8, x9, [x2]"),
Q!(" ldp " "x10, x11, [x2, #16]"),
Q!(" mul " "x17, x4, x8"),
Q!(" mul " "x22, x5, x9"),
Q!(" mul " "x23, x6, x10"),
Q!(" mul " "x24, x7, x11"),
Q!(" umulh " "x16, x4, x8"),
Q!(" adds " "x22, x22, x16"),
Q!(" umulh " "x16, x5, x9"),
Q!(" adcs " "x23, x23, x16"),
Q!(" umulh " "x16, x6, x10"),
Q!(" adcs " "x24, x24, x16"),
Q!(" umulh " "x16, x7, x11"),
Q!(" adc " "x25, x16, xzr"),
Q!(" ldp " "x20, x21, [x1]"),
Q!(" adds " "x12, x12, x20"),
Q!(" adcs " "x13, x13, x21"),
Q!(" ldp " "x20, x21, [x1, #16]"),
Q!(" adcs " "x14, x14, x20"),
Q!(" adcs " "x15, x15, x21"),
Q!(" adc " "x16, xzr, xzr"),
Q!(" adds " "x19, x22, x17"),
Q!(" adcs " "x22, x23, x22"),
Q!(" adcs " "x23, x24, x23"),
Q!(" adcs " "x24, x25, x24"),
Q!(" adc " "x25, xzr, x25"),
Q!(" adds " "x20, x22, x17"),
Q!(" adcs " "x21, x23, x19"),
Q!(" adcs " "x22, x24, x22"),
Q!(" adcs " "x23, x25, x23"),
Q!(" adcs " "x24, xzr, x24"),
Q!(" adc " "x25, xzr, x25"),
Q!(" adds " "x17, x17, x12"),
Q!(" adcs " "x19, x19, x13"),
Q!(" adcs " "x20, x20, x14"),
Q!(" adcs " "x21, x21, x15"),
Q!(" adcs " "x22, x22, x16"),
Q!(" adcs " "x23, x23, xzr"),
Q!(" adcs " "x24, x24, xzr"),
Q!(" adc " "x25, x25, xzr"),
Q!(" subs " "x15, x6, x7"),
Q!(" cneg " "x15, x15, cc"),
Q!(" csetm " "x12, cc"),
Q!(" subs " "x13, x11, x10"),
Q!(" cneg " "x13, x13, cc"),
Q!(" mul " "x14, x15, x13"),
Q!(" umulh " "x13, x15, x13"),
Q!(" cinv " "x12, x12, cc"),
Q!(" cmn " "x12, #0x1"),
Q!(" eor " "x14, x14, x12"),
Q!(" adcs " "x23, x23, x14"),
Q!(" eor " "x13, x13, x12"),
Q!(" adcs " "x24, x24, x13"),
Q!(" adc " "x25, x25, x12"),
Q!(" subs " "x15, x4, x5"),
Q!(" cneg " "x15, x15, cc"),
Q!(" csetm " "x12, cc"),
Q!(" subs " "x13, x9, x8"),
Q!(" cneg " "x13, x13, cc"),
Q!(" mul " "x14, x15, x13"),
Q!(" umulh " "x13, x15, x13"),
Q!(" cinv " "x12, x12, cc"),
Q!(" cmn " "x12, #0x1"),
Q!(" eor " "x14, x14, x12"),
Q!(" adcs " "x19, x19, x14"),
Q!(" eor " "x13, x13, x12"),
Q!(" adcs " "x20, x20, x13"),
Q!(" adcs " "x21, x21, x12"),
Q!(" adcs " "x22, x22, x12"),
Q!(" adcs " "x23, x23, x12"),
Q!(" adcs " "x24, x24, x12"),
Q!(" adc " "x25, x25, x12"),
Q!(" subs " "x15, x5, x7"),
Q!(" cneg " "x15, x15, cc"),
Q!(" csetm " "x12, cc"),
Q!(" subs " "x13, x11, x9"),
Q!(" cneg " "x13, x13, cc"),
Q!(" mul " "x14, x15, x13"),
Q!(" umulh " "x13, x15, x13"),
Q!(" cinv " "x12, x12, cc"),
Q!(" cmn " "x12, #0x1"),
Q!(" eor " "x14, x14, x12"),
Q!(" adcs " "x22, x22, x14"),
Q!(" eor " "x13, x13, x12"),
Q!(" adcs " "x23, x23, x13"),
Q!(" adcs " "x24, x24, x12"),
Q!(" adc " "x25, x25, x12"),
Q!(" subs " "x15, x4, x6"),
Q!(" cneg " "x15, x15, cc"),
Q!(" csetm " "x12, cc"),
Q!(" subs " "x13, x10, x8"),
Q!(" cneg " "x13, x13, cc"),
Q!(" mul " "x14, x15, x13"),
Q!(" umulh " "x13, x15, x13"),
Q!(" cinv " "x12, x12, cc"),
Q!(" cmn " "x12, #0x1"),
Q!(" eor " "x14, x14, x12"),
Q!(" adcs " "x20, x20, x14"),
Q!(" eor " "x13, x13, x12"),
Q!(" adcs " "x21, x21, x13"),
Q!(" adcs " "x22, x22, x12"),
Q!(" adcs " "x23, x23, x12"),
Q!(" adcs " "x24, x24, x12"),
Q!(" adc " "x25, x25, x12"),
Q!(" subs " "x15, x4, x7"),
Q!(" cneg " "x15, x15, cc"),
Q!(" csetm " "x12, cc"),
Q!(" subs " "x13, x11, x8"),
Q!(" cneg " "x13, x13, cc"),
Q!(" mul " "x14, x15, x13"),
Q!(" umulh " "x13, x15, x13"),
Q!(" cinv " "x12, x12, cc"),
Q!(" cmn " "x12, #0x1"),
Q!(" eor " "x14, x14, x12"),
Q!(" adcs " "x21, x21, x14"),
Q!(" eor " "x13, x13, x12"),
Q!(" adcs " "x22, x22, x13"),
Q!(" adcs " "x23, x23, x12"),
Q!(" adcs " "x24, x24, x12"),
Q!(" adc " "x25, x25, x12"),
Q!(" subs " "x15, x5, x6"),
Q!(" cneg " "x15, x15, cc"),
Q!(" csetm " "x12, cc"),
Q!(" subs " "x13, x10, x9"),
Q!(" cneg " "x13, x13, cc"),
Q!(" mul " "x14, x15, x13"),
Q!(" umulh " "x13, x15, x13"),
Q!(" cinv " "x12, x12, cc"),
Q!(" cmn " "x12, #0x1"),
Q!(" eor " "x14, x14, x12"),
Q!(" adcs " "x21, x21, x14"),
Q!(" eor " "x13, x13, x12"),
Q!(" adcs " "x22, x22, x13"),
Q!(" adcs " "x13, x23, x12"),
Q!(" adcs " "x14, x24, x12"),
Q!(" adc " "x15, x25, x12"),
Q!(" mov " "x12, x22"),
Q!(" stp " "x17, x19, [x1]"),
Q!(" stp " "x20, x21, [x1, #16]"),
Q!(" subs " "x27, x27, #0x20"),
Q!(" b.ne " Label!("bignum_emontredc_8n_maddloop", 4, Before)),
Q!(Label!("bignum_emontredc_8n_madddone", 5) ":"),
Q!(" ldp " "x17, x19, [x1, #32]"),
Q!(" ldp " "x20, x21, [x1, #48]"),
Q!(" cmn " "x28, x28"),
Q!(" adcs " "x17, x17, x12"),
Q!(" adcs " "x19, x19, x13"),
Q!(" adcs " "x20, x20, x14"),
Q!(" adcs " "x21, x21, x15"),
Q!(" csetm " "x28, cs"),
Q!(" stp " "x17, x19, [x1, #32]"),
Q!(" stp " "x20, x21, [x1, #48]"),
Q!(" sub " "x1, x1, x0"),
Q!(" sub " "x2, x2, x0"),
Q!(" add " "x1, x1, #0x20"),
Q!(" sub " "x26, x26, #0x1"),
Q!(" cbnz " "x26, " Label!("bignum_emontredc_8n_outerloop", 3, Before)),
Q!(" neg " "x0, x28"),
Q!(Label!("bignum_emontredc_8n_end", 2) ":"),
Q!(" ldp " "x27, x28, [sp], #16"),
Q!(" ldp " "x25, x26, [sp], #16"),
Q!(" ldp " "x23, x24, [sp], #16"),
Q!(" ldp " "x21, x22, [sp], #16"),
Q!(" ldp " "x19, x20, [sp], #16"),
inout("x0") m.len() => ret,
inout("x1") z.as_mut_ptr() => _,
inout("x2") m.as_ptr() => _,
inout("x3") w => _,
out("x10") _,
out("x11") _,
out("x12") _,
out("x13") _,
out("x14") _,
out("x15") _,
out("x16") _,
out("x17") _,
out("x20") _,
out("x21") _,
out("x22") _,
out("x23") _,
out("x24") _,
out("x25") _,
out("x26") _,
out("x27") _,
out("x28") _,
out("x4") _,
out("x5") _,
out("x6") _,
out("x7") _,
out("x8") _,
out("x9") _,
)
};
ret
}