#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#ifdef WOLFSSL_ARMASM_THUMB2
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#define WOLFSSL_NO_VAR_ASSIGN_REG
#endif
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif
#include <wolfssl/wolfcrypt/wc_mlkem.h>
#ifdef WOLFSSL_WC_MLKEM
XALIGNED(16) static const word16 L_mlkem_thumb2_ntt_zetas[] = {
0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca,
0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc,
0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f,
0x0a58, 0x03f9, 0x02dc, 0x0260, 0x06fb, 0x019b, 0x0c34, 0x06de,
0x04c7, 0x028c, 0x0ad9, 0x03f7, 0x07f4, 0x05d3, 0x0be7, 0x06f9,
0x0204, 0x0cf9, 0x0bc1, 0x0a67, 0x06af, 0x0877, 0x007e, 0x05bd,
0x09ac, 0x0ca7, 0x0bf2, 0x033e, 0x006b, 0x0774, 0x0c0a, 0x094a,
0x0b73, 0x03c1, 0x071d, 0x0a2c, 0x01c0, 0x08d8, 0x02a5, 0x0806,
0x08b2, 0x01ae, 0x022b, 0x034b, 0x081e, 0x0367, 0x060e, 0x0069,
0x01a6, 0x024b, 0x00b1, 0x0c16, 0x0bde, 0x0b35, 0x0626, 0x0675,
0x0c0b, 0x030a, 0x0487, 0x0c6e, 0x09f8, 0x05cb, 0x0aa7, 0x045f,
0x06cb, 0x0284, 0x0999, 0x015d, 0x01a2, 0x0149, 0x0c65, 0x0cb6,
0x0331, 0x0449, 0x025b, 0x0262, 0x052a, 0x07fc, 0x0748, 0x0180,
0x0842, 0x0c79, 0x04c2, 0x07ca, 0x0997, 0x00dc, 0x085e, 0x0686,
0x0860, 0x0707, 0x0803, 0x031a, 0x071b, 0x09ab, 0x099b, 0x01de,
0x0c95, 0x0bcd, 0x03e4, 0x03df, 0x03be, 0x074d, 0x05f2, 0x065c,
};
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r_p)
#else
WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register sword16* r __asm__ ("r0") = (sword16*)r_p;
register word16* L_mlkem_thumb2_ntt_zetas_c __asm__ ("r1") =
(word16*)&L_mlkem_thumb2_ntt_zetas;
#else
register word16* L_mlkem_thumb2_ntt_zetas_c =
(word16*)&L_mlkem_thumb2_ntt_zetas;
#endif
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"MOV r1, %[L_mlkem_thumb2_ntt_zetas]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif
"MOV r2, #0x10\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_123:\n\t"
#else
"L_mlkem_thumb2_ntt_loop_123_%=:\n\t"
#endif
"STR r2, [sp]\n\t"
"LDRH lr, [r1, #2]\n\t"
"LDR r2, [%[r]]\n\t"
"LDR r3, [%[r], #64]\n\t"
"LDR r4, [%[r], #128]\n\t"
"LDR r5, [%[r], #192]\n\t"
"LDR r6, [%[r], #256]\n\t"
"LDR r7, [%[r], #320]\n\t"
"LDR r8, [%[r], #384]\n\t"
"LDR r9, [%[r], #448]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r6\n\t"
"SMULBT r6, lr, r6\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r6\n\t"
"SMLABB r11, r12, r11, r6\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r6, r2, r10\n\t"
"SADD16 r2, r2, r10\n\t"
#else
"SBFX r10, r6, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r6, r6, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r6, r11, r6\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r6, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r6\n\t"
"SUB r6, r2, r11\n\t"
"ADD r2, r2, r11\n\t"
"SUB r11, r2, r10, LSR #16\n\t"
"ADD r10, r2, r10, LSR #16\n\t"
"BFI r6, r11, #0, #16\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r7\n\t"
"SMULBT r7, lr, r7\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r11, r12, r11, r7\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r7, r3, r10\n\t"
"SADD16 r3, r3, r10\n\t"
#else
"SBFX r10, r7, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r7, r7, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r7, r11, r7\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r7\n\t"
"SUB r7, r3, r11\n\t"
"ADD r3, r3, r11\n\t"
"SUB r11, r3, r10, LSR #16\n\t"
"ADD r10, r3, r10, LSR #16\n\t"
"BFI r7, r11, #0, #16\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r8\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r11, r12, r11, r8\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r8, r4, r10\n\t"
"SADD16 r4, r4, r10\n\t"
#else
"SBFX r10, r8, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r8, r11, r8\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r8\n\t"
"SUB r8, r4, r11\n\t"
"ADD r4, r4, r11\n\t"
"SUB r11, r4, r10, LSR #16\n\t"
"ADD r10, r4, r10, LSR #16\n\t"
"BFI r8, r11, #0, #16\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r9\n\t"
"SMULBT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r11, r12, r11, r9\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r9, r5, r10\n\t"
"SADD16 r5, r5, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r9, r9, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r9\n\t"
"SUB r9, r5, r11\n\t"
"ADD r5, r5, r11\n\t"
"SUB r11, r5, r10, LSR #16\n\t"
"ADD r10, r5, r10, LSR #16\n\t"
"BFI r9, r11, #0, #16\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
"LDR lr, [r1, #4]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r4\n\t"
"SMULBT r4, lr, r4\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r4\n\t"
"SMLABB r11, r12, r11, r4\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r4, r2, r10\n\t"
"SADD16 r2, r2, r10\n\t"
#else
"SBFX r10, r4, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r4, r4, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r4, r11, r4\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r4, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r4\n\t"
"SUB r4, r2, r11\n\t"
"ADD r2, r2, r11\n\t"
"SUB r11, r2, r10, LSR #16\n\t"
"ADD r10, r2, r10, LSR #16\n\t"
"BFI r4, r11, #0, #16\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r5\n\t"
"SMULBT r5, lr, r5\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r11, r12, r11, r5\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r5, r3, r10\n\t"
"SADD16 r3, r3, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r5, r5, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r5, r11, r5\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r5\n\t"
"SUB r5, r3, r11\n\t"
"ADD r3, r3, r11\n\t"
"SUB r11, r3, r10, LSR #16\n\t"
"ADD r10, r3, r10, LSR #16\n\t"
"BFI r5, r11, #0, #16\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r8\n\t"
"SMULTT r8, lr, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r11, r12, r11, r8\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r8, r6, r10\n\t"
"SADD16 r6, r6, r10\n\t"
#else
"SBFX r10, r8, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r8, r8, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r8, r11, r8\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r8\n\t"
"SUB r8, r6, r11\n\t"
"ADD r6, r6, r11\n\t"
"SUB r11, r6, r10, LSR #16\n\t"
"ADD r10, r6, r10, LSR #16\n\t"
"BFI r8, r11, #0, #16\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r9\n\t"
"SMULTT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r11, r12, r11, r9\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r9, r7, r10\n\t"
"SADD16 r7, r7, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r9, r9, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r9\n\t"
"SUB r9, r7, r11\n\t"
"ADD r7, r7, r11\n\t"
"SUB r11, r7, r10, LSR #16\n\t"
"ADD r10, r7, r10, LSR #16\n\t"
"BFI r9, r11, #0, #16\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
"LDR lr, [r1, #8]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r3\n\t"
"SMULBT r3, lr, r3\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r3\n\t"
"SMLABB r11, r12, r11, r3\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r3, r2, r10\n\t"
"SADD16 r2, r2, r10\n\t"
#else
"SBFX r10, r3, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r3, r3, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r3, r11, r3\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r3, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r3\n\t"
"SUB r3, r2, r11\n\t"
"ADD r2, r2, r11\n\t"
"SUB r11, r2, r10, LSR #16\n\t"
"ADD r10, r2, r10, LSR #16\n\t"
"BFI r3, r11, #0, #16\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r5\n\t"
"SMULTT r5, lr, r5\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r11, r12, r11, r5\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r5, r4, r10\n\t"
"SADD16 r4, r4, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r5, r5, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r5, r11, r5\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r5\n\t"
"SUB r5, r4, r11\n\t"
"ADD r4, r4, r11\n\t"
"SUB r11, r4, r10, LSR #16\n\t"
"ADD r10, r4, r10, LSR #16\n\t"
"BFI r5, r11, #0, #16\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
"LDR lr, [r1, #12]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r7\n\t"
"SMULBT r7, lr, r7\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r11, r12, r11, r7\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r7, r6, r10\n\t"
"SADD16 r6, r6, r10\n\t"
#else
"SBFX r10, r7, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r7, r7, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r7, r11, r7\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r7\n\t"
"SUB r7, r6, r11\n\t"
"ADD r6, r6, r11\n\t"
"SUB r11, r6, r10, LSR #16\n\t"
"ADD r10, r6, r10, LSR #16\n\t"
"BFI r7, r11, #0, #16\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r9\n\t"
"SMULTT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r11, r12, r11, r9\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r9, r8, r10\n\t"
"SADD16 r8, r8, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r9, r9, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r9\n\t"
"SUB r9, r8, r11\n\t"
"ADD r8, r8, r11\n\t"
"SUB r11, r8, r10, LSR #16\n\t"
"ADD r10, r8, r10, LSR #16\n\t"
"BFI r9, r11, #0, #16\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
"STR r2, [%[r]]\n\t"
"STR r3, [%[r], #64]\n\t"
"STR r4, [%[r], #128]\n\t"
"STR r5, [%[r], #192]\n\t"
"STR r6, [%[r], #256]\n\t"
"STR r7, [%[r], #320]\n\t"
"STR r8, [%[r], #384]\n\t"
"STR r9, [%[r], #448]\n\t"
"LDR r2, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_thumb2_ntt_loop_123\n\t"
#else
"BNE.N L_mlkem_thumb2_ntt_loop_123_%=\n\t"
#endif
"SUB %[r], %[r], #0x40\n\t"
"MOV r3, #0x0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_4_j:\n\t"
#else
"L_mlkem_thumb2_ntt_loop_4_j_%=:\n\t"
#endif
"STR r3, [sp, #4]\n\t"
"ADD lr, r1, r3, LSR #4\n\t"
"MOV r2, #0x4\n\t"
"LDR lr, [lr, #16]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_4_i:\n\t"
#else
"L_mlkem_thumb2_ntt_loop_4_i_%=:\n\t"
#endif
"STR r2, [sp]\n\t"
"LDR r2, [%[r]]\n\t"
"LDR r3, [%[r], #16]\n\t"
"LDR r4, [%[r], #32]\n\t"
"LDR r5, [%[r], #48]\n\t"
"LDR r6, [%[r], #64]\n\t"
"LDR r7, [%[r], #80]\n\t"
"LDR r8, [%[r], #96]\n\t"
"LDR r9, [%[r], #112]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r4\n\t"
"SMULBT r4, lr, r4\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r4\n\t"
"SMLABB r11, r12, r11, r4\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r4, r2, r10\n\t"
"SADD16 r2, r2, r10\n\t"
#else
"SBFX r10, r4, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r4, r4, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r4, r11, r4\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r4, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r4\n\t"
"SUB r4, r2, r11\n\t"
"ADD r2, r2, r11\n\t"
"SUB r11, r2, r10, LSR #16\n\t"
"ADD r10, r2, r10, LSR #16\n\t"
"BFI r4, r11, #0, #16\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r5\n\t"
"SMULBT r5, lr, r5\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r11, r12, r11, r5\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r5, r3, r10\n\t"
"SADD16 r3, r3, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r5, r5, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r5, r11, r5\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r5\n\t"
"SUB r5, r3, r11\n\t"
"ADD r3, r3, r11\n\t"
"SUB r11, r3, r10, LSR #16\n\t"
"ADD r10, r3, r10, LSR #16\n\t"
"BFI r5, r11, #0, #16\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r8\n\t"
"SMULTT r8, lr, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r11, r12, r11, r8\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r8, r6, r10\n\t"
"SADD16 r6, r6, r10\n\t"
#else
"SBFX r10, r8, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r8, r8, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r8, r11, r8\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r8\n\t"
"SUB r8, r6, r11\n\t"
"ADD r6, r6, r11\n\t"
"SUB r11, r6, r10, LSR #16\n\t"
"ADD r10, r6, r10, LSR #16\n\t"
"BFI r8, r11, #0, #16\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r9\n\t"
"SMULTT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r11, r12, r11, r9\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r9, r7, r10\n\t"
"SADD16 r7, r7, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r9, r9, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r9\n\t"
"SUB r9, r7, r11\n\t"
"ADD r7, r7, r11\n\t"
"SUB r11, r7, r10, LSR #16\n\t"
"ADD r10, r7, r10, LSR #16\n\t"
"BFI r9, r11, #0, #16\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
"STR r2, [%[r]]\n\t"
"STR r3, [%[r], #16]\n\t"
"STR r4, [%[r], #32]\n\t"
"STR r5, [%[r], #48]\n\t"
"STR r6, [%[r], #64]\n\t"
"STR r7, [%[r], #80]\n\t"
"STR r8, [%[r], #96]\n\t"
"STR r9, [%[r], #112]\n\t"
"LDRD r2, r3, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_thumb2_ntt_loop_4_i\n\t"
#else
"BNE.N L_mlkem_thumb2_ntt_loop_4_i_%=\n\t"
#endif
"ADD r3, r3, #0x40\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x70\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_4_j_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_thumb2_ntt_loop_4_j\n\t"
#else
"BNE.N L_mlkem_thumb2_ntt_loop_4_j_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r3, #0x0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_ntt_loop_567:\n\t"
#else
"L_mlkem_thumb2_ntt_loop_567_%=:\n\t"
#endif
"ADD lr, r1, r3, LSR #3\n\t"
"STR r3, [sp, #4]\n\t"
"LDRH lr, [lr, #32]\n\t"
"LDR r2, [%[r]]\n\t"
"LDR r3, [%[r], #4]\n\t"
"LDR r4, [%[r], #8]\n\t"
"LDR r5, [%[r], #12]\n\t"
"LDR r6, [%[r], #16]\n\t"
"LDR r7, [%[r], #20]\n\t"
"LDR r8, [%[r], #24]\n\t"
"LDR r9, [%[r], #28]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r6\n\t"
"SMULBT r6, lr, r6\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r6\n\t"
"SMLABB r11, r12, r11, r6\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r6, r2, r10\n\t"
"SADD16 r2, r2, r10\n\t"
#else
"SBFX r10, r6, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r6, r6, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r6, r11, r6\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r6, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r6\n\t"
"SUB r6, r2, r11\n\t"
"ADD r2, r2, r11\n\t"
"SUB r11, r2, r10, LSR #16\n\t"
"ADD r10, r2, r10, LSR #16\n\t"
"BFI r6, r11, #0, #16\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r7\n\t"
"SMULBT r7, lr, r7\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r11, r12, r11, r7\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r7, r3, r10\n\t"
"SADD16 r3, r3, r10\n\t"
#else
"SBFX r10, r7, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r7, r7, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r7, r11, r7\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r7\n\t"
"SUB r7, r3, r11\n\t"
"ADD r3, r3, r11\n\t"
"SUB r11, r3, r10, LSR #16\n\t"
"ADD r10, r3, r10, LSR #16\n\t"
"BFI r7, r11, #0, #16\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r8\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r11, r12, r11, r8\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r8, r4, r10\n\t"
"SADD16 r4, r4, r10\n\t"
#else
"SBFX r10, r8, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r8, r11, r8\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r8\n\t"
"SUB r8, r4, r11\n\t"
"ADD r4, r4, r11\n\t"
"SUB r11, r4, r10, LSR #16\n\t"
"ADD r10, r4, r10, LSR #16\n\t"
"BFI r8, r11, #0, #16\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r9\n\t"
"SMULBT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r11, r12, r11, r9\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r9, r5, r10\n\t"
"SADD16 r5, r5, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r9, r9, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r9\n\t"
"SUB r9, r5, r11\n\t"
"ADD r5, r5, r11\n\t"
"SUB r11, r5, r10, LSR #16\n\t"
"ADD r10, r5, r10, LSR #16\n\t"
"BFI r9, r11, #0, #16\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
"LDR lr, [sp, #4]\n\t"
"ADD lr, r1, lr, LSR #2\n\t"
"LDR lr, [lr, #64]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r4\n\t"
"SMULBT r4, lr, r4\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r4\n\t"
"SMLABB r11, r12, r11, r4\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r4, r2, r10\n\t"
"SADD16 r2, r2, r10\n\t"
#else
"SBFX r10, r4, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r4, r4, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r4, r11, r4\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r4, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r4\n\t"
"SUB r4, r2, r11\n\t"
"ADD r2, r2, r11\n\t"
"SUB r11, r2, r10, LSR #16\n\t"
"ADD r10, r2, r10, LSR #16\n\t"
"BFI r4, r11, #0, #16\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r5\n\t"
"SMULBT r5, lr, r5\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r11, r12, r11, r5\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r5, r3, r10\n\t"
"SADD16 r3, r3, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r5, r5, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r5, r11, r5\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r5\n\t"
"SUB r5, r3, r11\n\t"
"ADD r3, r3, r11\n\t"
"SUB r11, r3, r10, LSR #16\n\t"
"ADD r10, r3, r10, LSR #16\n\t"
"BFI r5, r11, #0, #16\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r8\n\t"
"SMULTT r8, lr, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r11, r12, r11, r8\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r8, r6, r10\n\t"
"SADD16 r6, r6, r10\n\t"
#else
"SBFX r10, r8, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r8, r8, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r8, r11, r8\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r8\n\t"
"SUB r8, r6, r11\n\t"
"ADD r6, r6, r11\n\t"
"SUB r11, r6, r10, LSR #16\n\t"
"ADD r10, r6, r10, LSR #16\n\t"
"BFI r8, r11, #0, #16\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r9\n\t"
"SMULTT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r11, r12, r11, r9\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r9, r7, r10\n\t"
"SADD16 r7, r7, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r9, r9, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r9\n\t"
"SUB r9, r7, r11\n\t"
"ADD r7, r7, r11\n\t"
"SUB r11, r7, r10, LSR #16\n\t"
"ADD r10, r7, r10, LSR #16\n\t"
"BFI r9, r11, #0, #16\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
"LDR lr, [sp, #4]\n\t"
"ADD lr, r1, lr, LSR #1\n\t"
"LDR lr, [lr, #128]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r3\n\t"
"SMULBT r3, lr, r3\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r3\n\t"
"SMLABB r11, r12, r11, r3\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r3, r2, r10\n\t"
"SADD16 r2, r2, r10\n\t"
#else
"SBFX r10, r3, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r3, r3, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r3, r11, r3\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r3, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r3\n\t"
"SUB r3, r2, r11\n\t"
"ADD r2, r2, r11\n\t"
"SUB r11, r2, r10, LSR #16\n\t"
"ADD r10, r2, r10, LSR #16\n\t"
"BFI r3, r11, #0, #16\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r5\n\t"
"SMULTT r5, lr, r5\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r11, r12, r11, r5\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r5, r4, r10\n\t"
"SADD16 r4, r4, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r5, r5, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r5, r11, r5\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r5\n\t"
"SUB r5, r4, r11\n\t"
"ADD r4, r4, r11\n\t"
"SUB r11, r4, r10, LSR #16\n\t"
"ADD r10, r4, r10, LSR #16\n\t"
"BFI r5, r11, #0, #16\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
"LDR lr, [sp, #4]\n\t"
"ADD lr, r1, lr, LSR #1\n\t"
"LDR lr, [lr, #132]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r7\n\t"
"SMULBT r7, lr, r7\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r11, r12, r11, r7\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r7, r6, r10\n\t"
"SADD16 r6, r6, r10\n\t"
#else
"SBFX r10, r7, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r7, r7, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r7, r11, r7\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r7\n\t"
"SUB r7, r6, r11\n\t"
"ADD r6, r6, r11\n\t"
"SUB r11, r6, r10, LSR #16\n\t"
"ADD r10, r6, r10, LSR #16\n\t"
"BFI r7, r11, #0, #16\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTB r10, lr, r9\n\t"
"SMULTT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r11, r12, r11, r9\n\t"
"PKHTB r10, r11, r10, ASR #16\n\t"
"SSUB16 r9, r8, r10\n\t"
"SADD16 r8, r8, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r9, r9, #16\n\t"
"MUL r10, r11, r10\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r11, r12, r11, r9\n\t"
"SUB r9, r8, r11\n\t"
"ADD r8, r8, r11\n\t"
"SUB r11, r8, r10, LSR #16\n\t"
"ADD r10, r8, r10, LSR #16\n\t"
"BFI r9, r11, #0, #16\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV lr, #0xafc0\n\t"
"MOVT lr, #0x13\n\t"
#else
"MOV lr, #0x4ebf\n\t"
"MOV r12, #0xd01\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r2\n\t"
"SMULWT r11, lr, r2\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r2, r2, r10\n\t"
#else
"SBFX r10, r2, #0, #16\n\t"
"SBFX r11, r2, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r2, r11, LSL #16\n\t"
"SUB r2, r2, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r2, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r3\n\t"
"SMULWT r11, lr, r3\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r3, r3, r10\n\t"
#else
"SBFX r10, r3, #0, #16\n\t"
"SBFX r11, r3, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r3, r11, LSL #16\n\t"
"SUB r3, r3, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r3, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r4\n\t"
"SMULWT r11, lr, r4\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r4, r4, r10\n\t"
#else
"SBFX r10, r4, #0, #16\n\t"
"SBFX r11, r4, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r4, r11, LSL #16\n\t"
"SUB r4, r4, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r4, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r5\n\t"
"SMULWT r11, lr, r5\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r5, r5, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, r5, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r5, r11, LSL #16\n\t"
"SUB r5, r5, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r5, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r6\n\t"
"SMULWT r11, lr, r6\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r6, r6, r10\n\t"
#else
"SBFX r10, r6, #0, #16\n\t"
"SBFX r11, r6, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r6, r11, LSL #16\n\t"
"SUB r6, r6, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r6, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r7\n\t"
"SMULWT r11, lr, r7\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r7, r7, r10\n\t"
#else
"SBFX r10, r7, #0, #16\n\t"
"SBFX r11, r7, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r7, r11, LSL #16\n\t"
"SUB r7, r7, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r7, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r8\n\t"
"SMULWT r11, lr, r8\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r8, r8, r10\n\t"
#else
"SBFX r10, r8, #0, #16\n\t"
"SBFX r11, r8, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r8, r11, LSL #16\n\t"
"SUB r8, r8, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r8, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r9\n\t"
"SMULWT r11, lr, r9\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r9, r9, r10\n\t"
#else
"SBFX r10, r9, #0, #16\n\t"
"SBFX r11, r9, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r9, r11, LSL #16\n\t"
"SUB r9, r9, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r9, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif
"STR r2, [%[r]]\n\t"
"STR r3, [%[r], #4]\n\t"
"STR r4, [%[r], #8]\n\t"
"STR r5, [%[r], #12]\n\t"
"STR r6, [%[r], #16]\n\t"
"STR r7, [%[r], #20]\n\t"
"STR r8, [%[r], #24]\n\t"
"STR r9, [%[r], #28]\n\t"
"LDR r3, [sp, #4]\n\t"
"ADD r3, r3, #0x10\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x20\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_thumb2_ntt_loop_567\n\t"
#else
"BNE.N L_mlkem_thumb2_ntt_loop_567_%=\n\t"
#endif
"ADD sp, sp, #0x8\n\t"
: [r] "+r" (r),
[L_mlkem_thumb2_ntt_zetas] "+r" (L_mlkem_thumb2_ntt_zetas_c)
:
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
}
XALIGNED(16) static const word16 L_mlkem_invntt_zetas_inv[] = {
0x06a5, 0x070f, 0x05b4, 0x0943, 0x0922, 0x091d, 0x0134, 0x006c,
0x0b23, 0x0366, 0x0356, 0x05e6, 0x09e7, 0x04fe, 0x05fa, 0x04a1,
0x067b, 0x04a3, 0x0c25, 0x036a, 0x0537, 0x083f, 0x0088, 0x04bf,
0x0b81, 0x05b9, 0x0505, 0x07d7, 0x0a9f, 0x0aa6, 0x08b8, 0x09d0,
0x004b, 0x009c, 0x0bb8, 0x0b5f, 0x0ba4, 0x0368, 0x0a7d, 0x0636,
0x08a2, 0x025a, 0x0736, 0x0309, 0x0093, 0x087a, 0x09f7, 0x00f6,
0x068c, 0x06db, 0x01cc, 0x0123, 0x00eb, 0x0c50, 0x0ab6, 0x0b5b,
0x0c98, 0x06f3, 0x099a, 0x04e3, 0x09b6, 0x0ad6, 0x0b53, 0x044f,
0x04fb, 0x0a5c, 0x0429, 0x0b41, 0x02d5, 0x05e4, 0x0940, 0x018e,
0x03b7, 0x00f7, 0x058d, 0x0c96, 0x09c3, 0x010f, 0x005a, 0x0355,
0x0744, 0x0c83, 0x048a, 0x0652, 0x029a, 0x0140, 0x0008, 0x0afd,
0x0608, 0x011a, 0x072e, 0x050d, 0x090a, 0x0228, 0x0a75, 0x083a,
0x0623, 0x00cd, 0x0b66, 0x0606, 0x0aa1, 0x0a25, 0x0908, 0x02a9,
0x0082, 0x0642, 0x074f, 0x033d, 0x0b82, 0x0bf9, 0x052d, 0x0ac4,
0x0745, 0x05c2, 0x04b2, 0x093f, 0x0c4b, 0x06d8, 0x0a93, 0x00ab,
0x0c37, 0x0be2, 0x0773, 0x072c, 0x05ed, 0x0167, 0x02f6, 0x05a1,
};
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r_p)
#else
WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register sword16* r __asm__ ("r0") = (sword16*)r_p;
register word16* L_mlkem_invntt_zetas_inv_c __asm__ ("r1") =
(word16*)&L_mlkem_invntt_zetas_inv;
#else
register word16* L_mlkem_invntt_zetas_inv_c =
(word16*)&L_mlkem_invntt_zetas_inv;
#endif
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"MOV r1, %[L_mlkem_invntt_zetas_inv]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif
"MOV r3, #0x0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_765:\n\t"
#else
"L_mlkem_invntt_loop_765_%=:\n\t"
#endif
"ADD lr, r1, r3, LSR #1\n\t"
"STR r3, [sp, #4]\n\t"
"LDR r2, [%[r]]\n\t"
"LDR r3, [%[r], #4]\n\t"
"LDR r4, [%[r], #8]\n\t"
"LDR r5, [%[r], #12]\n\t"
"LDR r6, [%[r], #16]\n\t"
"LDR r7, [%[r], #20]\n\t"
"LDR r8, [%[r], #24]\n\t"
"LDR r9, [%[r], #28]\n\t"
"LDR lr, [lr]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r2, r3\n\t"
"SADD16 r2, r2, r3\n\t"
"SMULBT r3, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r3\n\t"
"SMLABB r3, r12, r11, r3\n\t"
"PKHTB r3, r3, r10, ASR #16\n\t"
#else
"SUB r11, r2, r3\n\t"
"ADD r12, r2, r3\n\t"
"BFC r3, #0, #16\n\t"
"BFC r2, #0, #16\n\t"
"SUB r10, r2, r3\n\t"
"ADD r2, r2, r3\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r2, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r3, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r3, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r3, r12, r11, r3\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r4, r5\n\t"
"SADD16 r4, r4, r5\n\t"
"SMULTT r5, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r5, r12, r11, r5\n\t"
"PKHTB r5, r5, r10, ASR #16\n\t"
#else
"SUB r11, r4, r5\n\t"
"ADD r12, r4, r5\n\t"
"BFC r5, #0, #16\n\t"
"BFC r4, #0, #16\n\t"
"SUB r10, r4, r5\n\t"
"ADD r4, r4, r5\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r4, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r5, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r5, r12, r11, r5\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
"LDR lr, [sp, #4]\n\t"
"ADD lr, r1, lr, LSR #1\n\t"
"LDR lr, [lr, #4]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r6, r7\n\t"
"SADD16 r6, r6, r7\n\t"
"SMULBT r7, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r7, r12, r11, r7\n\t"
"PKHTB r7, r7, r10, ASR #16\n\t"
#else
"SUB r11, r6, r7\n\t"
"ADD r12, r6, r7\n\t"
"BFC r7, #0, #16\n\t"
"BFC r6, #0, #16\n\t"
"SUB r10, r6, r7\n\t"
"ADD r6, r6, r7\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r6, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r7, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r7, r12, r11, r7\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r8, r9\n\t"
"SADD16 r8, r8, r9\n\t"
"SMULTT r9, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SUB r11, r8, r9\n\t"
"ADD r12, r8, r9\n\t"
"BFC r9, #0, #16\n\t"
"BFC r8, #0, #16\n\t"
"SUB r10, r8, r9\n\t"
"ADD r8, r8, r9\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r8, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r9, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
"LDR lr, [sp, #4]\n\t"
"ADD lr, r1, lr, LSR #2\n\t"
"LDR lr, [lr, #128]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r2, r4\n\t"
"SADD16 r2, r2, r4\n\t"
"SMULBT r4, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r4\n\t"
"SMLABB r4, r12, r11, r4\n\t"
"PKHTB r4, r4, r10, ASR #16\n\t"
#else
"SUB r11, r2, r4\n\t"
"ADD r12, r2, r4\n\t"
"BFC r4, #0, #16\n\t"
"BFC r2, #0, #16\n\t"
"SUB r10, r2, r4\n\t"
"ADD r2, r2, r4\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r2, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r4, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r4, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r4, r12, r11, r4\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r3, r5\n\t"
"SADD16 r3, r3, r5\n\t"
"SMULBT r5, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r5, r12, r11, r5\n\t"
"PKHTB r5, r5, r10, ASR #16\n\t"
#else
"SUB r11, r3, r5\n\t"
"ADD r12, r3, r5\n\t"
"BFC r5, #0, #16\n\t"
"BFC r3, #0, #16\n\t"
"SUB r10, r3, r5\n\t"
"ADD r3, r3, r5\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r3, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r5, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r5, r12, r11, r5\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r6, r8\n\t"
"SADD16 r6, r6, r8\n\t"
"SMULTT r8, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r8, r12, r11, r8\n\t"
"PKHTB r8, r8, r10, ASR #16\n\t"
#else
"SUB r11, r6, r8\n\t"
"ADD r12, r6, r8\n\t"
"BFC r8, #0, #16\n\t"
"BFC r6, #0, #16\n\t"
"SUB r10, r6, r8\n\t"
"ADD r6, r6, r8\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r6, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r8, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r8, r12, r11, r8\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r7, r9\n\t"
"SADD16 r7, r7, r9\n\t"
"SMULTT r9, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SUB r11, r7, r9\n\t"
"ADD r12, r7, r9\n\t"
"BFC r9, #0, #16\n\t"
"BFC r7, #0, #16\n\t"
"SUB r10, r7, r9\n\t"
"ADD r7, r7, r9\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r7, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r9, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
"LDR lr, [sp, #4]\n\t"
"ADD lr, r1, lr, LSR #3\n\t"
"LDR lr, [lr, #192]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r2, r6\n\t"
"SADD16 r2, r2, r6\n\t"
"SMULBT r6, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r6\n\t"
"SMLABB r6, r12, r11, r6\n\t"
"PKHTB r6, r6, r10, ASR #16\n\t"
#else
"SUB r11, r2, r6\n\t"
"ADD r12, r2, r6\n\t"
"BFC r6, #0, #16\n\t"
"BFC r2, #0, #16\n\t"
"SUB r10, r2, r6\n\t"
"ADD r2, r2, r6\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r2, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r6, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r6, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r6, r12, r11, r6\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r3, r7\n\t"
"SADD16 r3, r3, r7\n\t"
"SMULBT r7, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r7, r12, r11, r7\n\t"
"PKHTB r7, r7, r10, ASR #16\n\t"
#else
"SUB r11, r3, r7\n\t"
"ADD r12, r3, r7\n\t"
"BFC r7, #0, #16\n\t"
"BFC r3, #0, #16\n\t"
"SUB r10, r3, r7\n\t"
"ADD r3, r3, r7\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r3, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r7, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r7, r12, r11, r7\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r4, r8\n\t"
"SADD16 r4, r4, r8\n\t"
"SMULBT r8, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r8, r12, r11, r8\n\t"
"PKHTB r8, r8, r10, ASR #16\n\t"
#else
"SUB r11, r4, r8\n\t"
"ADD r12, r4, r8\n\t"
"BFC r8, #0, #16\n\t"
"BFC r4, #0, #16\n\t"
"SUB r10, r4, r8\n\t"
"ADD r4, r4, r8\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r4, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r8, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r8, r12, r11, r8\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r5, r9\n\t"
"SADD16 r5, r5, r9\n\t"
"SMULBT r9, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SUB r11, r5, r9\n\t"
"ADD r12, r5, r9\n\t"
"BFC r9, #0, #16\n\t"
"BFC r5, #0, #16\n\t"
"SUB r10, r5, r9\n\t"
"ADD r5, r5, r9\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r5, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r9, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV lr, #0xafc0\n\t"
"MOVT lr, #0x13\n\t"
#else
"MOV lr, #0x4ebf\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r2\n\t"
"SMULWT r11, lr, r2\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r2, r2, r10\n\t"
#else
"SBFX r10, r2, #0, #16\n\t"
"SBFX r11, r2, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r2, r11, LSL #16\n\t"
"SUB r2, r2, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r2, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r3\n\t"
"SMULWT r11, lr, r3\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r3, r3, r10\n\t"
#else
"SBFX r10, r3, #0, #16\n\t"
"SBFX r11, r3, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r3, r11, LSL #16\n\t"
"SUB r3, r3, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r3, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r4\n\t"
"SMULWT r11, lr, r4\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r4, r4, r10\n\t"
#else
"SBFX r10, r4, #0, #16\n\t"
"SBFX r11, r4, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r4, r11, LSL #16\n\t"
"SUB r4, r4, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r4, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r5\n\t"
"SMULWT r11, lr, r5\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r5, r5, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, r5, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r5, r11, LSL #16\n\t"
"SUB r5, r5, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r5, r11, #16, #16\n\t"
#endif
"STR r2, [%[r]]\n\t"
"STR r3, [%[r], #4]\n\t"
"STR r4, [%[r], #8]\n\t"
"STR r5, [%[r], #12]\n\t"
"STR r6, [%[r], #16]\n\t"
"STR r7, [%[r], #20]\n\t"
"STR r8, [%[r], #24]\n\t"
"STR r9, [%[r], #28]\n\t"
"LDR r3, [sp, #4]\n\t"
"ADD r3, r3, #0x10\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x20\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_765_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_invntt_loop_765\n\t"
#else
"BNE.N L_mlkem_invntt_loop_765_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r3, #0x0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_4_j:\n\t"
#else
"L_mlkem_invntt_loop_4_j_%=:\n\t"
#endif
"STR r3, [sp, #4]\n\t"
"ADD lr, r1, r3, LSR #4\n\t"
"MOV r2, #0x4\n\t"
"LDR lr, [lr, #224]\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_4_i:\n\t"
#else
"L_mlkem_invntt_loop_4_i_%=:\n\t"
#endif
"STR r2, [sp]\n\t"
"LDR r2, [%[r]]\n\t"
"LDR r3, [%[r], #16]\n\t"
"LDR r4, [%[r], #32]\n\t"
"LDR r5, [%[r], #48]\n\t"
"LDR r6, [%[r], #64]\n\t"
"LDR r7, [%[r], #80]\n\t"
"LDR r8, [%[r], #96]\n\t"
"LDR r9, [%[r], #112]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r2, r4\n\t"
"SADD16 r2, r2, r4\n\t"
"SMULBT r4, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r4\n\t"
"SMLABB r4, r12, r11, r4\n\t"
"PKHTB r4, r4, r10, ASR #16\n\t"
#else
"SUB r11, r2, r4\n\t"
"ADD r12, r2, r4\n\t"
"BFC r4, #0, #16\n\t"
"BFC r2, #0, #16\n\t"
"SUB r10, r2, r4\n\t"
"ADD r2, r2, r4\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r2, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r4, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r4, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r4, r12, r11, r4\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r3, r5\n\t"
"SADD16 r3, r3, r5\n\t"
"SMULBT r5, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r5, r12, r11, r5\n\t"
"PKHTB r5, r5, r10, ASR #16\n\t"
#else
"SUB r11, r3, r5\n\t"
"ADD r12, r3, r5\n\t"
"BFC r5, #0, #16\n\t"
"BFC r3, #0, #16\n\t"
"SUB r10, r3, r5\n\t"
"ADD r3, r3, r5\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r3, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r5, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r5, r12, r11, r5\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r6, r8\n\t"
"SADD16 r6, r6, r8\n\t"
"SMULTT r8, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r8, r12, r11, r8\n\t"
"PKHTB r8, r8, r10, ASR #16\n\t"
#else
"SUB r11, r6, r8\n\t"
"ADD r12, r6, r8\n\t"
"BFC r8, #0, #16\n\t"
"BFC r6, #0, #16\n\t"
"SUB r10, r6, r8\n\t"
"ADD r6, r6, r8\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r6, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r8, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r8, r12, r11, r8\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r7, r9\n\t"
"SADD16 r7, r7, r9\n\t"
"SMULTT r9, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SUB r11, r7, r9\n\t"
"ADD r12, r7, r9\n\t"
"BFC r9, #0, #16\n\t"
"BFC r7, #0, #16\n\t"
"SUB r10, r7, r9\n\t"
"ADD r7, r7, r9\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r7, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r9, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
"STR r2, [%[r]]\n\t"
"STR r3, [%[r], #16]\n\t"
"STR r4, [%[r], #32]\n\t"
"STR r5, [%[r], #48]\n\t"
"STR r6, [%[r], #64]\n\t"
"STR r7, [%[r], #80]\n\t"
"STR r8, [%[r], #96]\n\t"
"STR r9, [%[r], #112]\n\t"
"LDRD r2, r3, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_4_i_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_invntt_loop_4_i\n\t"
#else
"BNE.N L_mlkem_invntt_loop_4_i_%=\n\t"
#endif
"ADD r3, r3, #0x40\n\t"
"RSBS r10, r3, #0x100\n\t"
"ADD %[r], %[r], #0x70\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_4_j_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_invntt_loop_4_j\n\t"
#else
"BNE.N L_mlkem_invntt_loop_4_j_%=\n\t"
#endif
"SUB %[r], %[r], #0x200\n\t"
"MOV r2, #0x10\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_invntt_loop_321:\n\t"
#else
"L_mlkem_invntt_loop_321_%=:\n\t"
#endif
"STR r2, [sp]\n\t"
"LDRH lr, [r1, #2]\n\t"
"LDR r2, [%[r]]\n\t"
"LDR r3, [%[r], #64]\n\t"
"LDR r4, [%[r], #128]\n\t"
"LDR r5, [%[r], #192]\n\t"
"LDR r6, [%[r], #256]\n\t"
"LDR r7, [%[r], #320]\n\t"
"LDR r8, [%[r], #384]\n\t"
"LDR r9, [%[r], #448]\n\t"
"LDR lr, [r1, #240]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r2, r3\n\t"
"SADD16 r2, r2, r3\n\t"
"SMULBT r3, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r3\n\t"
"SMLABB r3, r12, r11, r3\n\t"
"PKHTB r3, r3, r10, ASR #16\n\t"
#else
"SUB r11, r2, r3\n\t"
"ADD r12, r2, r3\n\t"
"BFC r3, #0, #16\n\t"
"BFC r2, #0, #16\n\t"
"SUB r10, r2, r3\n\t"
"ADD r2, r2, r3\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r2, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r3, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r3, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r3, r12, r11, r3\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r4, r5\n\t"
"SADD16 r4, r4, r5\n\t"
"SMULTT r5, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r5, r12, r11, r5\n\t"
"PKHTB r5, r5, r10, ASR #16\n\t"
#else
"SUB r11, r4, r5\n\t"
"ADD r12, r4, r5\n\t"
"BFC r5, #0, #16\n\t"
"BFC r4, #0, #16\n\t"
"SUB r10, r4, r5\n\t"
"ADD r4, r4, r5\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r4, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r5, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r5, r12, r11, r5\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
"LDR lr, [r1, #244]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r6, r7\n\t"
"SADD16 r6, r6, r7\n\t"
"SMULBT r7, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r7, r12, r11, r7\n\t"
"PKHTB r7, r7, r10, ASR #16\n\t"
#else
"SUB r11, r6, r7\n\t"
"ADD r12, r6, r7\n\t"
"BFC r7, #0, #16\n\t"
"BFC r6, #0, #16\n\t"
"SUB r10, r6, r7\n\t"
"ADD r6, r6, r7\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r6, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r7, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r7, r12, r11, r7\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r8, r9\n\t"
"SADD16 r8, r8, r9\n\t"
"SMULTT r9, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SUB r11, r8, r9\n\t"
"ADD r12, r8, r9\n\t"
"BFC r9, #0, #16\n\t"
"BFC r8, #0, #16\n\t"
"SUB r10, r8, r9\n\t"
"ADD r8, r8, r9\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r8, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r9, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
"LDR lr, [r1, #248]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r2, r4\n\t"
"SADD16 r2, r2, r4\n\t"
"SMULBT r4, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r4\n\t"
"SMLABB r4, r12, r11, r4\n\t"
"PKHTB r4, r4, r10, ASR #16\n\t"
#else
"SUB r11, r2, r4\n\t"
"ADD r12, r2, r4\n\t"
"BFC r4, #0, #16\n\t"
"BFC r2, #0, #16\n\t"
"SUB r10, r2, r4\n\t"
"ADD r2, r2, r4\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r2, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r4, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r4, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r4, r12, r11, r4\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r3, r5\n\t"
"SADD16 r3, r3, r5\n\t"
"SMULBT r5, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r5, r12, r11, r5\n\t"
"PKHTB r5, r5, r10, ASR #16\n\t"
#else
"SUB r11, r3, r5\n\t"
"ADD r12, r3, r5\n\t"
"BFC r5, #0, #16\n\t"
"BFC r3, #0, #16\n\t"
"SUB r10, r3, r5\n\t"
"ADD r3, r3, r5\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r3, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r5, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r5, r12, r11, r5\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r6, r8\n\t"
"SADD16 r6, r6, r8\n\t"
"SMULTT r8, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r8, r12, r11, r8\n\t"
"PKHTB r8, r8, r10, ASR #16\n\t"
#else
"SUB r11, r6, r8\n\t"
"ADD r12, r6, r8\n\t"
"BFC r8, #0, #16\n\t"
"BFC r6, #0, #16\n\t"
"SUB r10, r6, r8\n\t"
"ADD r6, r6, r8\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r6, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r8, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r8, r12, r11, r8\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r7, r9\n\t"
"SADD16 r7, r7, r9\n\t"
"SMULTT r9, lr, r10\n\t"
"SMULTB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SUB r11, r7, r9\n\t"
"ADD r12, r7, r9\n\t"
"BFC r9, #0, #16\n\t"
"BFC r7, #0, #16\n\t"
"SUB r10, r7, r9\n\t"
"ADD r7, r7, r9\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r7, r12, #0, #16\n\t"
"SBFX r11, lr, #16, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r9, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV lr, #0xafc0\n\t"
"MOVT lr, #0x13\n\t"
#else
"MOV lr, #0x4ebf\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r2\n\t"
"SMULWT r11, lr, r2\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r2, r2, r10\n\t"
#else
"SBFX r10, r2, #0, #16\n\t"
"SBFX r11, r2, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r2, r11, LSL #16\n\t"
"SUB r2, r2, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r2, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r3\n\t"
"SMULWT r11, lr, r3\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r3, r3, r10\n\t"
#else
"SBFX r10, r3, #0, #16\n\t"
"SBFX r11, r3, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r3, r11, LSL #16\n\t"
"SUB r3, r3, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r3, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r4\n\t"
"SMULWT r11, lr, r4\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r4, r4, r10\n\t"
#else
"SBFX r10, r4, #0, #16\n\t"
"SBFX r11, r4, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r4, r11, LSL #16\n\t"
"SUB r4, r4, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r4, r11, #16, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULWB r10, lr, r5\n\t"
"SMULWT r11, lr, r5\n\t"
"SMULBT r10, r12, r10\n\t"
"SMULBT r11, r12, r11\n\t"
"PKHBT r10, r10, r11, LSL #16\n\t"
"SSUB16 r5, r5, r10\n\t"
#else
"SBFX r10, r5, #0, #16\n\t"
"SBFX r11, r5, #16, #16\n\t"
"MUL r10, lr, r10\n\t"
"MUL r11, lr, r11\n\t"
"ASR r10, r10, #26\n\t"
"ASR r11, r11, #26\n\t"
"MUL r10, r12, r10\n\t"
"MUL r11, r12, r11\n\t"
"SUB r11, r5, r11, LSL #16\n\t"
"SUB r5, r5, r10\n\t"
"LSR r11, r11, #16\n\t"
"BFI r5, r11, #16, #16\n\t"
#endif
"LDR lr, [r1, #252]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r2, r6\n\t"
"SADD16 r2, r2, r6\n\t"
"SMULBT r6, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r6\n\t"
"SMLABB r6, r12, r11, r6\n\t"
"PKHTB r6, r6, r10, ASR #16\n\t"
#else
"SUB r11, r2, r6\n\t"
"ADD r12, r2, r6\n\t"
"BFC r6, #0, #16\n\t"
"BFC r2, #0, #16\n\t"
"SUB r10, r2, r6\n\t"
"ADD r2, r2, r6\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r2, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r6, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r6, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r6, r12, r11, r6\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r3, r7\n\t"
"SADD16 r3, r3, r7\n\t"
"SMULBT r7, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r7, r12, r11, r7\n\t"
"PKHTB r7, r7, r10, ASR #16\n\t"
#else
"SUB r11, r3, r7\n\t"
"ADD r12, r3, r7\n\t"
"BFC r7, #0, #16\n\t"
"BFC r3, #0, #16\n\t"
"SUB r10, r3, r7\n\t"
"ADD r3, r3, r7\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r3, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r7, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r7, r12, r11, r7\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r4, r8\n\t"
"SADD16 r4, r4, r8\n\t"
"SMULBT r8, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r8, r12, r11, r8\n\t"
"PKHTB r8, r8, r10, ASR #16\n\t"
#else
"SUB r11, r4, r8\n\t"
"ADD r12, r4, r8\n\t"
"BFC r8, #0, #16\n\t"
"BFC r4, #0, #16\n\t"
"SUB r10, r4, r8\n\t"
"ADD r4, r4, r8\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r4, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r8, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r8, r12, r11, r8\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r10, r5, r9\n\t"
"SADD16 r5, r5, r9\n\t"
"SMULBT r9, lr, r10\n\t"
"SMULBB r10, lr, r10\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SUB r11, r5, r9\n\t"
"ADD r12, r5, r9\n\t"
"BFC r9, #0, #16\n\t"
"BFC r5, #0, #16\n\t"
"SUB r10, r5, r9\n\t"
"ADD r5, r5, r9\n\t"
"BFI r10, r11, #0, #16\n\t"
"BFI r5, r12, #0, #16\n\t"
"SBFX r11, lr, #0, #16\n\t"
"ASR r12, r10, #16\n\t"
"MUL r9, r11, r12\n\t"
"SBFX r10, r10, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
"LDR lr, [r1, #254]\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r2\n\t"
"SMULBT r2, lr, r2\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r2\n\t"
"SMLABB r2, r12, r11, r2\n\t"
"PKHTB r2, r2, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r2, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r2, r2, #16, #16\n\t"
"MUL r2, r11, r2\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r2, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r2, r12, r11, r2\n\t"
"BFI r2, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r3\n\t"
"SMULBT r3, lr, r3\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r3\n\t"
"SMLABB r3, r12, r11, r3\n\t"
"PKHTB r3, r3, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r3, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r3, r3, #16, #16\n\t"
"MUL r3, r11, r3\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r3, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r3, r12, r11, r3\n\t"
"BFI r3, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r4\n\t"
"SMULBT r4, lr, r4\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r4\n\t"
"SMLABB r4, r12, r11, r4\n\t"
"PKHTB r4, r4, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r4, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r4, r4, #16, #16\n\t"
"MUL r4, r11, r4\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r4, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r4, r12, r11, r4\n\t"
"BFI r4, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r5\n\t"
"SMULBT r5, lr, r5\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r5\n\t"
"SMLABB r5, r12, r11, r5\n\t"
"PKHTB r5, r5, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r5, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r5, r5, #16, #16\n\t"
"MUL r5, r11, r5\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r5, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r5, r12, r11, r5\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r6\n\t"
"SMULBT r6, lr, r6\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r6\n\t"
"SMLABB r6, r12, r11, r6\n\t"
"PKHTB r6, r6, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r6, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r6, r6, #16, #16\n\t"
"MUL r6, r11, r6\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r6, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r6, r12, r11, r6\n\t"
"BFI r6, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r7\n\t"
"SMULBT r7, lr, r7\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r7\n\t"
"SMLABB r7, r12, r11, r7\n\t"
"PKHTB r7, r7, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r7, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r7, r7, #16, #16\n\t"
"MUL r7, r11, r7\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r7, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r7, r12, r11, r7\n\t"
"BFI r7, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r8\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r8\n\t"
"SMLABB r8, r12, r11, r8\n\t"
"PKHTB r8, r8, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r8, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r8, r8, #16, #16\n\t"
"MUL r8, r11, r8\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r8, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r8, r12, r11, r8\n\t"
"BFI r8, r10, #0, #16\n\t"
#endif
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULBB r10, lr, r9\n\t"
"SMULBT r9, lr, r9\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULTB r11, r12, r9\n\t"
"SMLABB r9, r12, r11, r9\n\t"
"PKHTB r9, r9, r10, ASR #16\n\t"
#else
"SBFX r11, lr, #0, #16\n\t"
"SBFX r10, r9, #0, #16\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r9, r9, #16, #16\n\t"
"MUL r9, r11, r9\n\t"
"MOV r12, #0xcff\n\t"
"MUL r11, r12, r10\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r10, r12, r11, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r11, r9, #0, #16\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r11, r11, #0, #16\n\t"
"LSR r10, r10, #16\n\t"
"MLA r9, r12, r11, r9\n\t"
"BFI r9, r10, #0, #16\n\t"
#endif
"STR r2, [%[r]]\n\t"
"STR r3, [%[r], #64]\n\t"
"STR r4, [%[r], #128]\n\t"
"STR r5, [%[r], #192]\n\t"
"STR r6, [%[r], #256]\n\t"
"STR r7, [%[r], #320]\n\t"
"STR r8, [%[r], #384]\n\t"
"STR r9, [%[r], #448]\n\t"
"LDR r2, [sp]\n\t"
"SUBS r2, r2, #0x1\n\t"
"ADD %[r], %[r], #0x4\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_invntt_loop_321_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_invntt_loop_321\n\t"
#else
"BNE.N L_mlkem_invntt_loop_321_%=\n\t"
#endif
"ADD sp, sp, #0x8\n\t"
: [r] "+r" (r),
[L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c)
:
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
}
XALIGNED(16) static const word16 L_mlkem_basemul_mont_zetas[] = {
0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca,
0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc,
0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f,
0x0a58, 0x03f9, 0x02dc, 0x0260, 0x06fb, 0x019b, 0x0c34, 0x06de,
0x04c7, 0x028c, 0x0ad9, 0x03f7, 0x07f4, 0x05d3, 0x0be7, 0x06f9,
0x0204, 0x0cf9, 0x0bc1, 0x0a67, 0x06af, 0x0877, 0x007e, 0x05bd,
0x09ac, 0x0ca7, 0x0bf2, 0x033e, 0x006b, 0x0774, 0x0c0a, 0x094a,
0x0b73, 0x03c1, 0x071d, 0x0a2c, 0x01c0, 0x08d8, 0x02a5, 0x0806,
0x08b2, 0x01ae, 0x022b, 0x034b, 0x081e, 0x0367, 0x060e, 0x0069,
0x01a6, 0x024b, 0x00b1, 0x0c16, 0x0bde, 0x0b35, 0x0626, 0x0675,
0x0c0b, 0x030a, 0x0487, 0x0c6e, 0x09f8, 0x05cb, 0x0aa7, 0x045f,
0x06cb, 0x0284, 0x0999, 0x015d, 0x01a2, 0x0149, 0x0c65, 0x0cb6,
0x0331, 0x0449, 0x025b, 0x0262, 0x052a, 0x07fc, 0x0748, 0x0180,
0x0842, 0x0c79, 0x04c2, 0x07ca, 0x0997, 0x00dc, 0x085e, 0x0686,
0x0860, 0x0707, 0x0803, 0x031a, 0x071b, 0x09ab, 0x099b, 0x01de,
0x0c95, 0x0bcd, 0x03e4, 0x03df, 0x03be, 0x074d, 0x05f2, 0x065c,
};
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r_p,
const sword16* a_p, const sword16* b_p)
#else
WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r,
const sword16* a, const sword16* b)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register sword16* r __asm__ ("r0") = (sword16*)r_p;
register const sword16* a __asm__ ("r1") = (const sword16*)a_p;
register const sword16* b __asm__ ("r2") = (const sword16*)b_p;
register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") =
(word16*)&L_mlkem_basemul_mont_zetas;
#else
register word16* L_mlkem_basemul_mont_zetas_c =
(word16*)&L_mlkem_basemul_mont_zetas;
#endif
__asm__ __volatile__ (
"MOV r3, %[L_mlkem_basemul_mont_zetas]\n\t"
"ADD r3, r3, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif
"MOV r8, #0x0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_basemul_mont_loop:\n\t"
#else
"L_mlkem_basemul_mont_loop_%=:\n\t"
#endif
"LDM %[a]!, {r4, r5}\n\t"
"LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t"
"PUSH {r8}\n\t"
"CMP r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTT r8, r4, r6\n\t"
"SMULTT r10, r5, r7\n\t"
"SMULTB r9, r12, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULBT r10, r11, r10\n\t"
"SMLABB r8, r4, r6, r8\n\t"
"SMLABB r10, r5, r7, r10\n\t"
"SMULTB r9, r12, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULBT r9, r4, r6\n\t"
"SMULBT r11, r5, r7\n\t"
"SMLATB r9, r4, r6, r9\n\t"
"SMLATB r11, r5, r7, r11\n\t"
"SMULTB r6, r12, r9\n\t"
"SMULTB r7, r12, r11\n\t"
"SMLABB r9, r12, r6, r9\n\t"
"SMLABB r11, r12, r7, r11\n\t"
"PKHTB r4, r9, r8, ASR #16\n\t"
"PKHTB r5, r11, r10, ASR #16\n\t"
#else
"ASR r8, r4, #16\n\t"
"ASR r10, r5, #16\n\t"
"ASR r9, r6, #16\n\t"
"ASR r11, r7, #16\n\t"
"MUL r8, r8, r9\n\t"
"MUL r10, r10, r11\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r9, r8, #0, #16\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r9, r12, r8\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r9, r9, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"SBFX r9, lr, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
"ASR r10, r10, #16\n\t"
"MUL r8, r9, r8\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r9, r4, #0, #16\n\t"
"SBFX r11, r5, #0, #16\n\t"
"SBFX r12, r6, #0, #16\n\t"
"MLA r8, r9, r12, r8\n\t"
"SBFX r12, r7, #0, #16\n\t"
"MLA r10, r11, r12, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r9, r8, #0, #16\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r9, r12, r9\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r9, r9, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"SBFX r9, r4, #0, #16\n\t"
"SBFX r11, r5, #0, #16\n\t"
"ASR r12, r6, #16\n\t"
"MUL r9, r9, r12\n\t"
"ASR r12, r7, #16\n\t"
"MUL r11, r11, r12\n\t"
"ASR r4, r4, #16\n\t"
"ASR r5, r5, #16\n\t"
"SBFX r12, r6, #0, #16\n\t"
"MLA r9, r4, r12, r9\n\t"
"SBFX r12, r7, #0, #16\n\t"
"MLA r11, r5, r12, r11\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r6, r9, #0, #16\n\t"
"SBFX r7, r11, #0, #16\n\t"
"MUL r6, r12, r6\n\t"
"MUL r7, r12, r7\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r4, r6, #0, #16\n\t"
"SBFX r5, r7, #0, #16\n\t"
"MLA r9, r12, r4, r9\n\t"
"MLA r11, r12, r5, r11\n\t"
"BFC r9, #0, #16\n\t"
"BFC r11, #0, #16\n\t"
"ORR r4, r9, r8, LSR #16\n\t"
"ORR r5, r11, r10, LSR #16\n\t"
#endif
"STM %[r]!, {r4, r5}\n\t"
"POP {r8}\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_basemul_mont_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_basemul_mont_loop\n\t"
#else
"BNE.N L_mlkem_basemul_mont_loop_%=\n\t"
#endif
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12", "lr"
);
}
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r_p,
const sword16* a_p, const sword16* b_p)
#else
WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r,
const sword16* a, const sword16* b)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register sword16* r __asm__ ("r0") = (sword16*)r_p;
register const sword16* a __asm__ ("r1") = (const sword16*)a_p;
register const sword16* b __asm__ ("r2") = (const sword16*)b_p;
register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") =
(word16*)&L_mlkem_basemul_mont_zetas;
#else
register word16* L_mlkem_basemul_mont_zetas_c =
(word16*)&L_mlkem_basemul_mont_zetas;
#endif
__asm__ __volatile__ (
"MOV r3, %[L_mlkem_basemul_mont_zetas]\n\t"
"ADD r3, r3, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOV r12, #0xd01\n\t"
"MOVT r12, #0xcff\n\t"
#endif
"MOV r8, #0x0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_basemul_mont_add_loop:\n\t"
#else
"L_mlkem_thumb2_basemul_mont_add_loop_%=:\n\t"
#endif
"LDM %[a]!, {r4, r5}\n\t"
"LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t"
"PUSH {r8}\n\t"
"CMP r8, #0x80\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SMULTT r8, r4, r6\n\t"
"SMULTT r10, r5, r7\n\t"
"SMULTB r9, r12, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"SMULBT r8, lr, r8\n\t"
"SMULBT r10, r11, r10\n\t"
"SMLABB r8, r4, r6, r8\n\t"
"SMLABB r10, r5, r7, r10\n\t"
"SMULTB r9, r12, r8\n\t"
"SMULTB r11, r12, r10\n\t"
"SMLABB r8, r12, r9, r8\n\t"
"SMLABB r10, r12, r11, r10\n\t"
"SMULBT r9, r4, r6\n\t"
"SMULBT r11, r5, r7\n\t"
"SMLATB r9, r4, r6, r9\n\t"
"SMLATB r11, r5, r7, r11\n\t"
"SMULTB r6, r12, r9\n\t"
"SMULTB r7, r12, r11\n\t"
"SMLABB r9, r12, r6, r9\n\t"
"SMLABB r11, r12, r7, r11\n\t"
"LDM %[r], {r4, r5}\n\t"
"PKHTB r9, r9, r8, ASR #16\n\t"
"PKHTB r11, r11, r10, ASR #16\n\t"
"SADD16 r4, r4, r9\n\t"
"SADD16 r5, r5, r11\n\t"
#else
"ASR r8, r4, #16\n\t"
"ASR r10, r5, #16\n\t"
"ASR r9, r6, #16\n\t"
"ASR r11, r7, #16\n\t"
"MUL r8, r8, r9\n\t"
"MUL r10, r10, r11\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r9, r8, #0, #16\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r9, r12, r8\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r9, r9, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"RSB r11, lr, #0x0\n\t"
"SBFX r9, lr, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"ASR r8, r8, #16\n\t"
"ASR r10, r10, #16\n\t"
"MUL r8, r9, r8\n\t"
"MUL r10, r11, r10\n\t"
"SBFX r9, r4, #0, #16\n\t"
"SBFX r11, r5, #0, #16\n\t"
"SBFX r12, r6, #0, #16\n\t"
"MLA r8, r9, r12, r8\n\t"
"SBFX r12, r7, #0, #16\n\t"
"MLA r10, r11, r12, r10\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r9, r8, #0, #16\n\t"
"SBFX r11, r10, #0, #16\n\t"
"MUL r9, r12, r9\n\t"
"MUL r11, r12, r11\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r9, r9, #0, #16\n\t"
"SBFX r11, r11, #0, #16\n\t"
"MLA r8, r12, r9, r8\n\t"
"MLA r10, r12, r11, r10\n\t"
"SBFX r9, r4, #0, #16\n\t"
"SBFX r11, r5, #0, #16\n\t"
"ASR r12, r6, #16\n\t"
"MUL r9, r9, r12\n\t"
"ASR r12, r7, #16\n\t"
"MUL r11, r11, r12\n\t"
"ASR r4, r4, #16\n\t"
"ASR r5, r5, #16\n\t"
"SBFX r12, r6, #0, #16\n\t"
"MLA r9, r4, r12, r9\n\t"
"SBFX r12, r7, #0, #16\n\t"
"MLA r11, r5, r12, r11\n\t"
"MOV r12, #0xcff\n\t"
"SBFX r6, r9, #0, #16\n\t"
"SBFX r7, r11, #0, #16\n\t"
"MUL r6, r12, r6\n\t"
"MUL r7, r12, r7\n\t"
"MOV r12, #0xd01\n\t"
"SBFX r4, r6, #0, #16\n\t"
"SBFX r5, r7, #0, #16\n\t"
"MLA r9, r12, r4, r9\n\t"
"MLA r11, r12, r5, r11\n\t"
"LDM %[r], {r4, r5}\n\t"
"BFC r9, #0, #16\n\t"
"BFC r11, #0, #16\n\t"
"ORR r9, r9, r8, LSR #16\n\t"
"ORR r11, r11, r10, LSR #16\n\t"
"ADD r8, r4, r9\n\t"
"ADD r10, r5, r11\n\t"
"BFC r9, #0, #16\n\t"
"BFC r11, #0, #16\n\t"
"ADD r4, r4, r9\n\t"
"ADD r5, r5, r11\n\t"
"BFI r4, r8, #0, #16\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
"STM %[r]!, {r4, r5}\n\t"
"POP {r8}\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_basemul_mont_add_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_thumb2_basemul_mont_add_loop\n\t"
#else
"BNE.N L_mlkem_thumb2_basemul_mont_add_loop_%=\n\t"
#endif
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12", "lr"
);
}
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void mlkem_thumb2_csubq(sword16* p_p)
#else
WC_OMIT_FRAME_POINTER void mlkem_thumb2_csubq(sword16* p)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register sword16* p __asm__ ("r0") = (sword16*)p_p;
register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r1") =
(word16*)&L_mlkem_basemul_mont_zetas;
#else
register word16* L_mlkem_basemul_mont_zetas_c =
(word16*)&L_mlkem_basemul_mont_zetas;
#endif
__asm__ __volatile__ (
"MOV r11, #0xd01\n\t"
"MOV r12, #0xd01\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"MOVT r12, #0xd01\n\t"
#endif
"MOV lr, #0x8000\n\t"
"MOVT lr, #0x8000\n\t"
"MOV r1, #0x100\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_csubq_loop:\n\t"
#else
"L_mlkem_thumb2_csubq_loop_%=:\n\t"
#endif
"LDM %[p], {r2, r3, r4, r5}\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r2, r2, r12\n\t"
"SSUB16 r3, r3, r12\n\t"
"SSUB16 r4, r4, r12\n\t"
"SSUB16 r5, r5, r12\n\t"
"AND r6, r2, lr\n\t"
"AND r7, r3, lr\n\t"
"AND r8, r4, lr\n\t"
"AND r9, r5, lr\n\t"
"LSR r6, r6, #15\n\t"
"LSR r7, r7, #15\n\t"
"LSR r8, r8, #15\n\t"
"LSR r9, r9, #15\n\t"
"MUL r6, r6, r11\n\t"
"MUL r7, r7, r11\n\t"
"MUL r8, r8, r11\n\t"
"MUL r9, r9, r11\n\t"
"SADD16 r2, r2, r6\n\t"
"SADD16 r3, r3, r7\n\t"
"SADD16 r4, r4, r8\n\t"
"SADD16 r5, r5, r9\n\t"
#else
"SUB r6, r2, r12\n\t"
"SUB r2, r2, r12, LSL #16\n\t"
"BFI r2, r6, #0, #16\n\t"
"SUB r7, r3, r12\n\t"
"SUB r3, r3, r12, LSL #16\n\t"
"BFI r3, r7, #0, #16\n\t"
"SUB r8, r4, r12\n\t"
"SUB r4, r4, r12, LSL #16\n\t"
"BFI r4, r8, #0, #16\n\t"
"SUB r9, r5, r12\n\t"
"SUB r5, r5, r12, LSL #16\n\t"
"BFI r5, r9, #0, #16\n\t"
"AND r6, r2, lr\n\t"
"AND r7, r3, lr\n\t"
"AND r8, r4, lr\n\t"
"AND r9, r5, lr\n\t"
"LSR r6, r6, #15\n\t"
"LSR r7, r7, #15\n\t"
"LSR r8, r8, #15\n\t"
"LSR r9, r9, #15\n\t"
"MUL r6, r6, r11\n\t"
"MUL r7, r7, r11\n\t"
"MUL r8, r8, r11\n\t"
"MUL r9, r9, r11\n\t"
"ADD r10, r2, r6\n\t"
"BFC r6, #0, #16\n\t"
"ADD r2, r2, r6\n\t"
"BFI r2, r10, #0, #16\n\t"
"ADD r10, r3, r7\n\t"
"BFC r7, #0, #16\n\t"
"ADD r3, r3, r7\n\t"
"BFI r3, r10, #0, #16\n\t"
"ADD r10, r4, r8\n\t"
"BFC r8, #0, #16\n\t"
"ADD r4, r4, r8\n\t"
"BFI r4, r10, #0, #16\n\t"
"ADD r10, r5, r9\n\t"
"BFC r9, #0, #16\n\t"
"ADD r5, r5, r9\n\t"
"BFI r5, r10, #0, #16\n\t"
#endif
"STM %[p]!, {r2, r3, r4, r5}\n\t"
"SUBS r1, r1, #0x8\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_csubq_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_thumb2_csubq_loop\n\t"
#else
"BNE.N L_mlkem_thumb2_csubq_loop_%=\n\t"
#endif
: [p] "+r" (p),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "lr"
);
}
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p_p,
unsigned int len_p, const byte* r_p, unsigned int rLen_p)
#else
WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p,
unsigned int len, const byte* r, unsigned int rLen)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register sword16* p __asm__ ("r0") = (sword16*)p_p;
register unsigned int len __asm__ ("r1") = (unsigned int)len_p;
register const byte* r __asm__ ("r2") = (const byte*)r_p;
register unsigned int rLen __asm__ ("r3") = (unsigned int)rLen_p;
register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r4") =
(word16*)&L_mlkem_basemul_mont_zetas;
#else
register word16* L_mlkem_basemul_mont_zetas_c =
(word16*)&L_mlkem_basemul_mont_zetas;
#endif
__asm__ __volatile__ (
"MOV r8, #0xd01\n\t"
"MOV r9, #0x0\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_loop_no_fail:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_loop_no_fail_%=:\n\t"
#endif
"CMP %[len], #0x8\n\t"
#if defined(__GNUC__)
"BLT L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BLT.N L_mlkem_thumb2_rej_uniform_done_no_fail\n\t"
#else
"BLT.N L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t"
#endif
"LDM %[r]!, {r4, r5, r6}\n\t"
"UBFX r7, r4, #0, #12\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"UBFX r7, r4, #12, #12\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"UBFX r7, r4, #24, #8\n\t"
"BFI r7, r5, #8, #4\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"UBFX r7, r5, #4, #12\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"UBFX r7, r5, #16, #12\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"UBFX r7, r5, #28, #4\n\t"
"BFI r7, r6, #4, #8\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"UBFX r7, r6, #8, #12\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"UBFX r7, r6, #20, #12\n\t"
"STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t"
"LSR r10, r10, #31\n\t"
"SUB %[len], %[len], r10\n\t"
"ADD r9, r9, r10, LSL #1\n\t"
"SUBS %[rLen], %[rLen], #0xc\n\t"
#if defined(__GNUC__)
"BNE L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BNE.N L_mlkem_thumb2_rej_uniform_loop_no_fail\n\t"
#else
"BNE.N L_mlkem_thumb2_rej_uniform_loop_no_fail_%=\n\t"
#endif
#if defined(__GNUC__)
"B L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"B.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"B.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_done_no_fail:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_done_no_fail_%=:\n\t"
#endif
"CMP %[len], #0x0\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_loop:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_loop_%=:\n\t"
#endif
"LDM %[r]!, {r4, r5, r6}\n\t"
"UBFX r7, r4, #0, #12\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_0\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_0_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_0:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_0_%=:\n\t"
#endif
"UBFX r7, r4, #12, #12\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_1\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_1_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_1:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_1_%=:\n\t"
#endif
"UBFX r7, r4, #24, #8\n\t"
"BFI r7, r5, #8, #4\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_2\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_2_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_2:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_2_%=:\n\t"
#endif
"UBFX r7, r5, #4, #12\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_3\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_3_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_3:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_3_%=:\n\t"
#endif
"UBFX r7, r5, #16, #12\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_4\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_4_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_4:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_4_%=:\n\t"
#endif
"UBFX r7, r5, #28, #4\n\t"
"BFI r7, r6, #4, #8\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_5\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_5_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_5:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_5_%=:\n\t"
#endif
"UBFX r7, r6, #8, #12\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_6\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_6_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_6:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_6_%=:\n\t"
#endif
"UBFX r7, r6, #20, #12\n\t"
"CMP r7, r8\n\t"
#if defined(__GNUC__)
"BGE L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGE.N L_mlkem_thumb2_rej_uniform_fail_7\n\t"
#else
"BGE.N L_mlkem_thumb2_rej_uniform_fail_7_%=\n\t"
#endif
"STRH r7, [%[p], r9]\n\t"
"SUBS %[len], %[len], #0x1\n\t"
"ADD r9, r9, #0x2\n\t"
#if defined(__GNUC__)
"BEQ L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_mlkem_thumb2_rej_uniform_done\n\t"
#else
"BEQ.N L_mlkem_thumb2_rej_uniform_done_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_fail_7:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_fail_7_%=:\n\t"
#endif
"SUBS %[rLen], %[rLen], #0xc\n\t"
#if defined(__GNUC__)
"BGT L_mlkem_thumb2_rej_uniform_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGT.N L_mlkem_thumb2_rej_uniform_loop\n\t"
#else
"BGT.N L_mlkem_thumb2_rej_uniform_loop_%=\n\t"
#endif
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_mlkem_thumb2_rej_uniform_done:\n\t"
#else
"L_mlkem_thumb2_rej_uniform_done_%=:\n\t"
#endif
"LSR r0, r9, #1\n\t"
: [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen),
[L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c)
:
: "memory", "cc", "r5", "r6", "r7", "r8", "r9", "r10"
);
return (word32)(size_t)p;
}
#endif
#endif
#endif
#endif