#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#ifdef WOLFSSL_ARMASM_THUMB2
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#define WOLFSSL_NO_VAR_ASSIGN_REG
#endif
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif
#ifdef HAVE_POLY1305
#include <wolfssl/wolfcrypt/poly1305.h>
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx_p,
const byte* m_p, word32 len_p, int notLast_p)
#else
WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx,
const byte* m, word32 len, int notLast)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p;
register const byte* m __asm__ ("r1") = (const byte*)m_p;
register word32 len __asm__ ("r2") = (word32)len_p;
register int notLast __asm__ ("r3") = (int)notLast_p;
#endif
__asm__ __volatile__ (
"SUB sp, sp, #0x1c\n\t"
"CMP %[len], #0x0\n\t"
#if defined(__GNUC__)
"BEQ L_poly1305_thumb2_16_done_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BEQ.N L_poly1305_thumb2_16_done\n\t"
#else
"BEQ.N L_poly1305_thumb2_16_done_%=\n\t"
#endif
"ADD lr, sp, #0xc\n\t"
"STM lr, {%[ctx], %[m], %[len], %[notLast]}\n\t"
"ADD lr, %[ctx], #0x10\n\t"
"LDM lr, {r4, r5, r6, r7, r8}\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_poly1305_thumb2_16_loop:\n\t"
#else
"L_poly1305_thumb2_16_loop_%=:\n\t"
#endif
"LDR %[m], [sp, #16]\n\t"
"LDR %[len], [%[m]]\n\t"
"LDR %[notLast], [%[m], #4]\n\t"
"LDR r9, [%[m], #8]\n\t"
"LDR r10, [%[m], #12]\n\t"
"LDR r11, [sp, #24]\n\t"
"ADDS r4, r4, %[len]\n\t"
"ADCS r5, r5, %[notLast]\n\t"
"ADCS r6, r6, r9\n\t"
"ADCS r7, r7, r10\n\t"
"ADD %[m], %[m], #0x10\n\t"
"ADC r8, r8, r11\n\t"
#ifdef WOLFSSL_ARM_ARCH_7M
"STM lr, {r4, r5, r6, r7, r8}\n\t"
#else
"STR r7, [lr, #12]\n\t"
"STR r8, [lr, #16]\n\t"
#endif
"STR %[m], [sp, #16]\n\t"
"LDR %[m], [sp, #12]\n\t"
#ifdef WOLFSSL_ARM_ARCH_7M
"LDR %[notLast], [%[m]]\n\t"
"EOR %[ctx], %[ctx], %[ctx]\n\t"
"UMULL r4, r5, %[notLast], r4\n\t"
"UMULL r6, r7, %[notLast], r6\n\t"
"MUL r8, %[notLast], r8\n\t"
"LDR %[len], [lr, #4]\n\t"
"MOV r12, %[ctx]\n\t"
"UMLAL r5, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #12]\n\t"
"ADDS r6, r6, r12\n\t"
"ADC r7, r7, %[ctx]\n\t"
"UMLAL r7, r8, %[notLast], %[len]\n\t"
"LDR %[notLast], [%[m], #4]\n\t"
"LDR %[len], [lr]\n\t"
"MOV r12, %[ctx]\n\t"
"UMLAL r5, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #4]\n\t"
"ADDS r6, r6, r12\n\t"
"ADC r12, %[ctx], %[ctx]\n\t"
"UMLAL r6, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #8]\n\t"
"ADDS r7, r7, r12\n\t"
"ADC r12, %[ctx], %[ctx]\n\t"
"UMLAL r7, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #12]\n\t"
"ADDS r8, r8, r12\n\t"
"ADC r9, %[ctx], %[ctx]\n\t"
"UMLAL r8, r9, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #16]\n\t"
"MLA r9, %[notLast], %[len], r9\n\t"
"LDR %[notLast], [%[m], #8]\n\t"
"LDR %[len], [lr]\n\t"
"MOV r12, %[ctx]\n\t"
"UMLAL r6, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #4]\n\t"
"ADDS r7, r7, r12\n\t"
"ADC r12, %[ctx], %[ctx]\n\t"
"UMLAL r7, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #8]\n\t"
"ADDS r8, r8, r12\n\t"
"ADC r12, %[ctx], %[ctx]\n\t"
"UMLAL r8, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #12]\n\t"
"ADDS r9, r9, r12\n\t"
"ADC r10, %[ctx], %[ctx]\n\t"
"UMLAL r9, r10, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #16]\n\t"
"MLA r10, %[notLast], %[len], r10\n\t"
"LDR %[notLast], [%[m], #12]\n\t"
"LDR %[len], [lr]\n\t"
"MOV r12, %[ctx]\n\t"
"UMLAL r7, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #4]\n\t"
"ADDS r8, r8, r12\n\t"
"ADC r12, %[ctx], %[ctx]\n\t"
"UMLAL r8, r12, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #8]\n\t"
"ADDS r9, r9, r12\n\t"
"ADC r10, r10, %[ctx]\n\t"
"UMLAL r9, r10, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #12]\n\t"
"MOV r11, %[ctx]\n\t"
"UMLAL r10, r11, %[notLast], %[len]\n\t"
"LDR %[len], [lr, #16]\n\t"
"MOV r12, %[ctx]\n\t"
"MLA r11, %[notLast], %[len], r11\n\t"
#else
"LDM %[m], {r0, r1, r2, r3}\n\t"
"UMULL r10, r11, %[ctx], r4\n\t"
"UMULL r12, r7, %[m], r4\n\t"
"UMAAL r11, r12, %[ctx], r5\n\t"
"UMULL r8, r9, %[len], r4\n\t"
"UMAAL r12, r8, %[m], r5\n\t"
"UMAAL r12, r7, %[ctx], r6\n\t"
"UMAAL r8, r9, %[notLast], r4\n\t"
"STM sp, {r10, r11, r12}\n\t"
"UMAAL r7, r8, %[len], r5\n\t"
"LDR r4, [lr, #12]\n\t"
"UMULL r10, r11, %[m], r6\n\t"
"UMAAL r8, r9, %[len], r6\n\t"
"UMAAL r7, r10, %[ctx], r4\n\t"
"UMAAL r8, r11, %[notLast], r5\n\t"
"UMAAL r8, r10, %[m], r4\n\t"
"UMAAL r9, r11, %[notLast], r6\n\t"
"UMAAL r9, r10, %[len], r4\n\t"
"LDR r5, [lr, #16]\n\t"
"UMAAL r10, r11, %[notLast], r4\n\t"
"MOV r12, #0x0\n\t"
"UMAAL r8, r12, %[ctx], r5\n\t"
"UMAAL r9, r12, %[m], r5\n\t"
"UMAAL r10, r12, %[len], r5\n\t"
"UMAAL r11, r12, %[notLast], r5\n\t"
"LDM sp, {r4, r5, r6}\n\t"
#endif
"LDR %[len], [sp, #20]\n\t"
"BIC %[notLast], r8, #0x3\n\t"
"AND r8, r8, #0x3\n\t"
"ADDS r4, r4, %[notLast]\n\t"
"LSR %[notLast], %[notLast], #2\n\t"
"ADCS r5, r5, r9\n\t"
"ORR %[notLast], %[notLast], r9, LSL #30\n\t"
"ADCS r6, r6, r10\n\t"
"LSR r9, r9, #2\n\t"
"ADCS r7, r7, r11\n\t"
"ORR r9, r9, r10, LSL #30\n\t"
"ADC r8, r8, r12\n\t"
"LSR r10, r10, #2\n\t"
"ADDS r4, r4, %[notLast]\n\t"
"ORR r10, r10, r11, LSL #30\n\t"
"ADCS r5, r5, r9\n\t"
"LSR r11, r11, #2\n\t"
"ADCS r6, r6, r10\n\t"
"ADCS r7, r7, r11\n\t"
"ADC r8, r8, r12\n\t"
"SUBS %[len], %[len], #0x10\n\t"
"STR %[len], [sp, #20]\n\t"
#if defined(__GNUC__)
"BGT L_poly1305_thumb2_16_loop_%=\n\t"
#elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"BGT.N L_poly1305_thumb2_16_loop\n\t"
#else
"BGT.N L_poly1305_thumb2_16_loop_%=\n\t"
#endif
"STM lr, {r4, r5, r6, r7, r8}\n\t"
"\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_poly1305_thumb2_16_done:\n\t"
#else
"L_poly1305_thumb2_16_done_%=:\n\t"
#endif
"ADD sp, sp, #0x1c\n\t"
: [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len),
[notLast] "+r" (notLast)
:
: "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11",
"r12", "lr"
);
}
XALIGNED(16) static const word32 L_poly1305_thumb2_clamp[] = {
0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc,
};
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx_p, const byte* key_p)
#else
WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p;
register const byte* key __asm__ ("r1") = (const byte*)key_p;
register word32* L_poly1305_thumb2_clamp_c __asm__ ("r2") =
(word32*)&L_poly1305_thumb2_clamp;
#else
register word32* L_poly1305_thumb2_clamp_c =
(word32*)&L_poly1305_thumb2_clamp;
#endif
__asm__ __volatile__ (
"MOV r10, %[L_poly1305_thumb2_clamp]\n\t"
"LDM r10, {r6, r7, r8, r9}\n\t"
"LDR r2, [%[key], #16]\n\t"
"LDR r3, [%[key], #20]\n\t"
"LDR r4, [%[key], #24]\n\t"
"LDR r5, [%[key], #28]\n\t"
"ADD r10, %[ctx], #0x24\n\t"
"STM r10, {r2, r3, r4, r5}\n\t"
"LDR r2, [%[key]]\n\t"
"LDR r3, [%[key], #4]\n\t"
"LDR r4, [%[key], #8]\n\t"
"LDR r5, [%[key], #12]\n\t"
"AND r2, r2, r6\n\t"
"AND r3, r3, r7\n\t"
"AND r4, r4, r8\n\t"
"AND r5, r5, r9\n\t"
"ADD r10, %[ctx], #0x0\n\t"
"STM r10, {r2, r3, r4, r5}\n\t"
"EOR r6, r6, r6\n\t"
"EOR r7, r7, r7\n\t"
"EOR r8, r8, r8\n\t"
"EOR r9, r9, r9\n\t"
"ADD r10, %[ctx], #0x10\n\t"
"EOR r5, r5, r5\n\t"
"STM r10, {r5, r6, r7, r8, r9}\n\t"
"STR r5, [%[ctx], #52]\n\t"
: [ctx] "+r" (ctx), [key] "+r" (key),
[L_poly1305_thumb2_clamp] "+r" (L_poly1305_thumb2_clamp_c)
:
: "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
}
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx_p, byte* mac_p)
#else
WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac)
#endif
{
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p;
register byte* mac __asm__ ("r1") = (byte*)mac_p;
#endif
__asm__ __volatile__ (
"ADD r11, %[ctx], #0x10\n\t"
"LDM r11, {r2, r3, r4, r5, r6}\n\t"
"ADDS r7, r2, #0x5\n\t"
"ADCS r7, r3, #0x0\n\t"
"ADCS r7, r4, #0x0\n\t"
"ADCS r7, r5, #0x0\n\t"
"ADC r7, r6, #0x0\n\t"
"SUB r7, r7, #0x4\n\t"
"LSR r7, r7, #31\n\t"
"SUB r7, r7, #0x1\n\t"
"AND r7, r7, #0x5\n\t"
"ADDS r2, r2, r7\n\t"
"ADCS r3, r3, #0x0\n\t"
"ADCS r4, r4, #0x0\n\t"
"ADC r5, r5, #0x0\n\t"
"ADD r11, %[ctx], #0x24\n\t"
"LDM r11, {r7, r8, r9, r10}\n\t"
"ADDS r2, r2, r7\n\t"
"ADCS r3, r3, r8\n\t"
"ADCS r4, r4, r9\n\t"
"ADC r5, r5, r10\n\t"
"STR r2, [%[mac]]\n\t"
"STR r3, [%[mac], #4]\n\t"
"STR r4, [%[mac], #8]\n\t"
"STR r5, [%[mac], #12]\n\t"
"EOR r2, r2, r2\n\t"
"EOR r3, r3, r3\n\t"
"EOR r4, r4, r4\n\t"
"EOR r5, r5, r5\n\t"
"EOR r6, r6, r6\n\t"
"ADD r11, %[ctx], #0x10\n\t"
"STM r11, {r2, r3, r4, r5, r6}\n\t"
"ADD r11, %[ctx], #0x0\n\t"
"STM r11, {r2, r3, r4, r5}\n\t"
"ADD r11, %[ctx], #0x24\n\t"
"STM r11, {r2, r3, r4, r5}\n\t"
: [ctx] "+r" (ctx), [mac] "+r" (mac)
:
: "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11"
);
}
#endif
#endif
#endif
#endif