graviola 0.3.4

graviola is a modern, fast cryptography library
Documentation
// generated source. do not edit.
#![allow(non_upper_case_globals, unused_macros, unused_imports)]
use crate::low::macros::*;

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0

// ----------------------------------------------------------------------------
// Extended Montgomery reduce in 8-digit blocks, results in input-output buffer
// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k]
//
//    extern uint64_t bignum_emontredc_8n
//     (uint64_t k, uint64_t *z, uint64_t *m, uint64_t w);
//
// Functionally equivalent to bignum_emontredc (see that file for more detail).
// But in general assumes that the input k is a multiple of 8.
//
// Standard ARM ABI: X0 = k, X1 = z, X2 = m, X3 = w, returns X0
// ----------------------------------------------------------------------------

/// Extended Montgomery reduce in 8-digit blocks, results in input-output buffer
///
/// Inputs z[2*k], m[k], w; outputs function return (extra result bit) and z[2*k]
///
/// Functionally equivalent to bignum_emontredc (see that file for more detail).
/// But in general assumes that the input k is a multiple of 8.
pub(crate) fn bignum_emontredc_8n(z: &mut [u64], m: &[u64], w: u64) -> u64 {
    let ret: u64;
    debug_assert!(z.len() == m.len() * 2);
    debug_assert!(z.len().is_multiple_of(8));
    // SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info.
    unsafe {
        core::arch::asm!(

        Q!("    stp             " "x19, x20, [sp, #-16] !"),
        Q!("    stp             " "x21, x22, [sp, #-16] !"),
        Q!("    stp             " "x23, x24, [sp, #-16] !"),
        Q!("    stp             " "x25, x26, [sp, #-16] !"),
        Q!("    stp             " "x27, x28, [sp, #-16] !"),
        Q!("    lsr             " "x0, x0, #2"),
        Q!("    mov             " "x26, x0"),
        Q!("    subs            " "x12, x0, #0x1"),
        Q!("    b.cc            " Label!("bignum_emontredc_8n_end", 2, After)),
        Q!("    mov             " "x28, xzr"),
        Q!("    lsl             " "x0, x12, #5"),

        Q!(Label!("bignum_emontredc_8n_outerloop", 3) ":"),
        Q!("    ldp             " "x17, x19, [x1]"),
        Q!("    ldp             " "x20, x21, [x1, #16]"),
        Q!("    ldp             " "x8, x9, [x2]"),
        Q!("    ldp             " "x10, x11, [x2, #16]"),
        Q!("    mul             " "x4, x17, x3"),
        Q!("    mul             " "x12, x4, x8"),
        Q!("    mul             " "x13, x4, x9"),
        Q!("    mul             " "x14, x4, x10"),
        Q!("    mul             " "x15, x4, x11"),
        Q!("    adds            " "x17, x17, x12"),
        Q!("    umulh           " "x12, x4, x8"),
        Q!("    adcs            " "x19, x19, x13"),
        Q!("    umulh           " "x13, x4, x9"),
        Q!("    adcs            " "x20, x20, x14"),
        Q!("    umulh           " "x14, x4, x10"),
        Q!("    adcs            " "x21, x21, x15"),
        Q!("    umulh           " "x15, x4, x11"),
        Q!("    adc             " "x22, xzr, xzr"),
        Q!("    adds            " "x19, x19, x12"),
        Q!("    adcs            " "x20, x20, x13"),
        Q!("    adcs            " "x21, x21, x14"),
        Q!("    adc             " "x22, x22, x15"),
        Q!("    mul             " "x5, x19, x3"),
        Q!("    mul             " "x12, x5, x8"),
        Q!("    mul             " "x13, x5, x9"),
        Q!("    mul             " "x14, x5, x10"),
        Q!("    mul             " "x15, x5, x11"),
        Q!("    adds            " "x19, x19, x12"),
        Q!("    umulh           " "x12, x5, x8"),
        Q!("    adcs            " "x20, x20, x13"),
        Q!("    umulh           " "x13, x5, x9"),
        Q!("    adcs            " "x21, x21, x14"),
        Q!("    umulh           " "x14, x5, x10"),
        Q!("    adcs            " "x22, x22, x15"),
        Q!("    umulh           " "x15, x5, x11"),
        Q!("    adc             " "x23, xzr, xzr"),
        Q!("    adds            " "x20, x20, x12"),
        Q!("    adcs            " "x21, x21, x13"),
        Q!("    adcs            " "x22, x22, x14"),
        Q!("    adc             " "x23, x23, x15"),
        Q!("    mul             " "x6, x20, x3"),
        Q!("    mul             " "x12, x6, x8"),
        Q!("    mul             " "x13, x6, x9"),
        Q!("    mul             " "x14, x6, x10"),
        Q!("    mul             " "x15, x6, x11"),
        Q!("    adds            " "x20, x20, x12"),
        Q!("    umulh           " "x12, x6, x8"),
        Q!("    adcs            " "x21, x21, x13"),
        Q!("    umulh           " "x13, x6, x9"),
        Q!("    adcs            " "x22, x22, x14"),
        Q!("    umulh           " "x14, x6, x10"),
        Q!("    adcs            " "x23, x23, x15"),
        Q!("    umulh           " "x15, x6, x11"),
        Q!("    adc             " "x24, xzr, xzr"),
        Q!("    adds            " "x21, x21, x12"),
        Q!("    adcs            " "x22, x22, x13"),
        Q!("    adcs            " "x23, x23, x14"),
        Q!("    adc             " "x24, x24, x15"),
        Q!("    mul             " "x7, x21, x3"),
        Q!("    mul             " "x12, x7, x8"),
        Q!("    mul             " "x13, x7, x9"),
        Q!("    mul             " "x14, x7, x10"),
        Q!("    mul             " "x15, x7, x11"),
        Q!("    adds            " "x21, x21, x12"),
        Q!("    umulh           " "x12, x7, x8"),
        Q!("    adcs            " "x22, x22, x13"),
        Q!("    umulh           " "x13, x7, x9"),
        Q!("    adcs            " "x23, x23, x14"),
        Q!("    umulh           " "x14, x7, x10"),
        Q!("    adcs            " "x24, x24, x15"),
        Q!("    umulh           " "x15, x7, x11"),
        Q!("    adc             " "x25, xzr, xzr"),
        Q!("    adds            " "x12, x22, x12"),
        Q!("    adcs            " "x13, x23, x13"),
        Q!("    adcs            " "x14, x24, x14"),
        Q!("    adc             " "x15, x25, x15"),
        Q!("    stp             " "x4, x5, [x1]"),
        Q!("    stp             " "x6, x7, [x1, #16]"),
        Q!("    mov             " "x27, x0"),

        Q!(Label!("bignum_emontredc_8n_maddloop", 4) ":"),
        Q!("    add             " "x2, x2, #0x20"),
        Q!("    add             " "x1, x1, #0x20"),
        Q!("    ldp             " "x8, x9, [x2]"),
        Q!("    ldp             " "x10, x11, [x2, #16]"),
        Q!("    mul             " "x17, x4, x8"),
        Q!("    mul             " "x22, x5, x9"),
        Q!("    mul             " "x23, x6, x10"),
        Q!("    mul             " "x24, x7, x11"),
        Q!("    umulh           " "x16, x4, x8"),
        Q!("    adds            " "x22, x22, x16"),
        Q!("    umulh           " "x16, x5, x9"),
        Q!("    adcs            " "x23, x23, x16"),
        Q!("    umulh           " "x16, x6, x10"),
        Q!("    adcs            " "x24, x24, x16"),
        Q!("    umulh           " "x16, x7, x11"),
        Q!("    adc             " "x25, x16, xzr"),
        Q!("    ldp             " "x20, x21, [x1]"),
        Q!("    adds            " "x12, x12, x20"),
        Q!("    adcs            " "x13, x13, x21"),
        Q!("    ldp             " "x20, x21, [x1, #16]"),
        Q!("    adcs            " "x14, x14, x20"),
        Q!("    adcs            " "x15, x15, x21"),
        Q!("    adc             " "x16, xzr, xzr"),
        Q!("    adds            " "x19, x22, x17"),
        Q!("    adcs            " "x22, x23, x22"),
        Q!("    adcs            " "x23, x24, x23"),
        Q!("    adcs            " "x24, x25, x24"),
        Q!("    adc             " "x25, xzr, x25"),
        Q!("    adds            " "x20, x22, x17"),
        Q!("    adcs            " "x21, x23, x19"),
        Q!("    adcs            " "x22, x24, x22"),
        Q!("    adcs            " "x23, x25, x23"),
        Q!("    adcs            " "x24, xzr, x24"),
        Q!("    adc             " "x25, xzr, x25"),
        Q!("    adds            " "x17, x17, x12"),
        Q!("    adcs            " "x19, x19, x13"),
        Q!("    adcs            " "x20, x20, x14"),
        Q!("    adcs            " "x21, x21, x15"),
        Q!("    adcs            " "x22, x22, x16"),
        Q!("    adcs            " "x23, x23, xzr"),
        Q!("    adcs            " "x24, x24, xzr"),
        Q!("    adc             " "x25, x25, xzr"),
        Q!("    subs            " "x15, x6, x7"),
        Q!("    cneg            " "x15, x15, cc"),
        Q!("    csetm           " "x12, cc"),
        Q!("    subs            " "x13, x11, x10"),
        Q!("    cneg            " "x13, x13, cc"),
        Q!("    mul             " "x14, x15, x13"),
        Q!("    umulh           " "x13, x15, x13"),
        Q!("    cinv            " "x12, x12, cc"),
        Q!("    cmn             " "x12, #0x1"),
        Q!("    eor             " "x14, x14, x12"),
        Q!("    adcs            " "x23, x23, x14"),
        Q!("    eor             " "x13, x13, x12"),
        Q!("    adcs            " "x24, x24, x13"),
        Q!("    adc             " "x25, x25, x12"),
        Q!("    subs            " "x15, x4, x5"),
        Q!("    cneg            " "x15, x15, cc"),
        Q!("    csetm           " "x12, cc"),
        Q!("    subs            " "x13, x9, x8"),
        Q!("    cneg            " "x13, x13, cc"),
        Q!("    mul             " "x14, x15, x13"),
        Q!("    umulh           " "x13, x15, x13"),
        Q!("    cinv            " "x12, x12, cc"),
        Q!("    cmn             " "x12, #0x1"),
        Q!("    eor             " "x14, x14, x12"),
        Q!("    adcs            " "x19, x19, x14"),
        Q!("    eor             " "x13, x13, x12"),
        Q!("    adcs            " "x20, x20, x13"),
        Q!("    adcs            " "x21, x21, x12"),
        Q!("    adcs            " "x22, x22, x12"),
        Q!("    adcs            " "x23, x23, x12"),
        Q!("    adcs            " "x24, x24, x12"),
        Q!("    adc             " "x25, x25, x12"),
        Q!("    subs            " "x15, x5, x7"),
        Q!("    cneg            " "x15, x15, cc"),
        Q!("    csetm           " "x12, cc"),
        Q!("    subs            " "x13, x11, x9"),
        Q!("    cneg            " "x13, x13, cc"),
        Q!("    mul             " "x14, x15, x13"),
        Q!("    umulh           " "x13, x15, x13"),
        Q!("    cinv            " "x12, x12, cc"),
        Q!("    cmn             " "x12, #0x1"),
        Q!("    eor             " "x14, x14, x12"),
        Q!("    adcs            " "x22, x22, x14"),
        Q!("    eor             " "x13, x13, x12"),
        Q!("    adcs            " "x23, x23, x13"),
        Q!("    adcs            " "x24, x24, x12"),
        Q!("    adc             " "x25, x25, x12"),
        Q!("    subs            " "x15, x4, x6"),
        Q!("    cneg            " "x15, x15, cc"),
        Q!("    csetm           " "x12, cc"),
        Q!("    subs            " "x13, x10, x8"),
        Q!("    cneg            " "x13, x13, cc"),
        Q!("    mul             " "x14, x15, x13"),
        Q!("    umulh           " "x13, x15, x13"),
        Q!("    cinv            " "x12, x12, cc"),
        Q!("    cmn             " "x12, #0x1"),
        Q!("    eor             " "x14, x14, x12"),
        Q!("    adcs            " "x20, x20, x14"),
        Q!("    eor             " "x13, x13, x12"),
        Q!("    adcs            " "x21, x21, x13"),
        Q!("    adcs            " "x22, x22, x12"),
        Q!("    adcs            " "x23, x23, x12"),
        Q!("    adcs            " "x24, x24, x12"),
        Q!("    adc             " "x25, x25, x12"),
        Q!("    subs            " "x15, x4, x7"),
        Q!("    cneg            " "x15, x15, cc"),
        Q!("    csetm           " "x12, cc"),
        Q!("    subs            " "x13, x11, x8"),
        Q!("    cneg            " "x13, x13, cc"),
        Q!("    mul             " "x14, x15, x13"),
        Q!("    umulh           " "x13, x15, x13"),
        Q!("    cinv            " "x12, x12, cc"),
        Q!("    cmn             " "x12, #0x1"),
        Q!("    eor             " "x14, x14, x12"),
        Q!("    adcs            " "x21, x21, x14"),
        Q!("    eor             " "x13, x13, x12"),
        Q!("    adcs            " "x22, x22, x13"),
        Q!("    adcs            " "x23, x23, x12"),
        Q!("    adcs            " "x24, x24, x12"),
        Q!("    adc             " "x25, x25, x12"),
        Q!("    subs            " "x15, x5, x6"),
        Q!("    cneg            " "x15, x15, cc"),
        Q!("    csetm           " "x12, cc"),
        Q!("    subs            " "x13, x10, x9"),
        Q!("    cneg            " "x13, x13, cc"),
        Q!("    mul             " "x14, x15, x13"),
        Q!("    umulh           " "x13, x15, x13"),
        Q!("    cinv            " "x12, x12, cc"),
        Q!("    cmn             " "x12, #0x1"),
        Q!("    eor             " "x14, x14, x12"),
        Q!("    adcs            " "x21, x21, x14"),
        Q!("    eor             " "x13, x13, x12"),
        Q!("    adcs            " "x22, x22, x13"),
        Q!("    adcs            " "x13, x23, x12"),
        Q!("    adcs            " "x14, x24, x12"),
        Q!("    adc             " "x15, x25, x12"),
        Q!("    mov             " "x12, x22"),
        Q!("    stp             " "x17, x19, [x1]"),
        Q!("    stp             " "x20, x21, [x1, #16]"),
        Q!("    subs            " "x27, x27, #0x20"),
        Q!("    b.ne            " Label!("bignum_emontredc_8n_maddloop", 4, Before)),

        Q!(Label!("bignum_emontredc_8n_madddone", 5) ":"),
        Q!("    ldp             " "x17, x19, [x1, #32]"),
        Q!("    ldp             " "x20, x21, [x1, #48]"),
        Q!("    cmn             " "x28, x28"),
        Q!("    adcs            " "x17, x17, x12"),
        Q!("    adcs            " "x19, x19, x13"),
        Q!("    adcs            " "x20, x20, x14"),
        Q!("    adcs            " "x21, x21, x15"),
        Q!("    csetm           " "x28, cs"),
        Q!("    stp             " "x17, x19, [x1, #32]"),
        Q!("    stp             " "x20, x21, [x1, #48]"),
        Q!("    sub             " "x1, x1, x0"),
        Q!("    sub             " "x2, x2, x0"),
        Q!("    add             " "x1, x1, #0x20"),
        Q!("    sub             " "x26, x26, #0x1"),
        Q!("    cbnz            " "x26, " Label!("bignum_emontredc_8n_outerloop", 3, Before)),
        Q!("    neg             " "x0, x28"),

        Q!(Label!("bignum_emontredc_8n_end", 2) ":"),
        Q!("    ldp             " "x27, x28, [sp], #16"),
        Q!("    ldp             " "x25, x26, [sp], #16"),
        Q!("    ldp             " "x23, x24, [sp], #16"),
        Q!("    ldp             " "x21, x22, [sp], #16"),
        Q!("    ldp             " "x19, x20, [sp], #16"),
        inout("x0") m.len() => ret,
        inout("x1") z.as_mut_ptr() => _,
        inout("x2") m.as_ptr() => _,
        inout("x3") w => _,
        // clobbers
        out("x10") _,
        out("x11") _,
        out("x12") _,
        out("x13") _,
        out("x14") _,
        out("x15") _,
        out("x16") _,
        out("x17") _,
        out("x20") _,
        out("x21") _,
        out("x22") _,
        out("x23") _,
        out("x24") _,
        out("x25") _,
        out("x26") _,
        out("x27") _,
        out("x28") _,
        out("x4") _,
        out("x5") _,
        out("x6") _,
        out("x7") _,
        out("x8") _,
        out("x9") _,
            )
    };
    ret
}