#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifdef WOLFSSL_ARMASM_INLINE
#include <wolfssl/wolfcrypt/sha3.h>
#ifdef WOLFSSL_SHA3
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
static const word64 L_SHA3_transform_crypto_r[] = {
0x0000000000000001, 0x0000000000008082,
0x800000000000808a, 0x8000000080008000,
0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009,
0x000000000000008a, 0x0000000000000088,
0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b,
0x8000000000008089, 0x8000000000008003,
0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a,
0x8000000080008081, 0x8000000000008080,
0x0000000080000001, 0x8000000080008008,
};
void BlockSha3_crypto(word64* state)
{
const word64* r = L_SHA3_transform_crypto_r;
__asm__ __volatile__ (
#ifdef __APPLE__
".arch_extension sha3\n\t"
#endif
"ld4 {v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
"ld4 {v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
"ld4 {v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
"ld4 {v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
"ld4 {v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
"ld4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
"ld1 {v24.1d}, [%x[state]]\n\t"
"sub %x[state], %x[state], #0xc0\n\t"
"mov x2, #24\n\t"
"\n"
"L_sha3_crypto_begin_%=: \n\t"
"eor3 v31.16b, v0.16b, v5.16b, v10.16b\n\t"
"eor3 v27.16b, v1.16b, v6.16b, v11.16b\n\t"
"eor3 v28.16b, v2.16b, v7.16b, v12.16b\n\t"
"eor3 v29.16b, v3.16b, v8.16b, v13.16b\n\t"
"eor3 v30.16b, v4.16b, v9.16b, v14.16b\n\t"
"eor3 v31.16b, v31.16b, v15.16b, v20.16b\n\t"
"eor3 v27.16b, v27.16b, v16.16b, v21.16b\n\t"
"eor3 v28.16b, v28.16b, v17.16b, v22.16b\n\t"
"eor3 v29.16b, v29.16b, v18.16b, v23.16b\n\t"
"eor3 v30.16b, v30.16b, v19.16b, v24.16b\n\t"
"rax1 v25.2d, v30.2d, v27.2d\n\t"
"rax1 v26.2d, v31.2d, v28.2d\n\t"
"rax1 v27.2d, v27.2d, v29.2d\n\t"
"rax1 v28.2d, v28.2d, v30.2d\n\t"
"rax1 v29.2d, v29.2d, v31.2d\n\t"
"eor v0.16b, v0.16b, v25.16b\n\t"
"xar v30.2d, v1.2d, v26.2d, #63\n\t"
"xar v1.2d, v6.2d, v26.2d, #20\n\t"
"xar v6.2d, v9.2d, v29.2d, #44\n\t"
"xar v9.2d, v22.2d, v27.2d, #3\n\t"
"xar v22.2d, v14.2d, v29.2d, #25\n\t"
"xar v14.2d, v20.2d, v25.2d, #46\n\t"
"xar v20.2d, v2.2d, v27.2d, #2\n\t"
"xar v2.2d, v12.2d, v27.2d, #21\n\t"
"xar v12.2d, v13.2d, v28.2d, #39\n\t"
"xar v13.2d, v19.2d, v29.2d, #56\n\t"
"xar v19.2d, v23.2d, v28.2d, #8\n\t"
"xar v23.2d, v15.2d, v25.2d, #23\n\t"
"xar v15.2d, v4.2d, v29.2d, #37\n\t"
"xar v4.2d, v24.2d, v29.2d, #50\n\t"
"xar v24.2d, v21.2d, v26.2d, #62\n\t"
"xar v21.2d, v8.2d, v28.2d, #9\n\t"
"xar v8.2d, v16.2d, v26.2d, #19\n\t"
"xar v16.2d, v5.2d, v25.2d, #28\n\t"
"xar v5.2d, v3.2d, v28.2d, #36\n\t"
"xar v3.2d, v18.2d, v28.2d, #43\n\t"
"xar v18.2d, v17.2d, v27.2d, #49\n\t"
"xar v17.2d, v11.2d, v26.2d, #54\n\t"
"xar v11.2d, v7.2d, v27.2d, #58\n\t"
"xar v7.2d, v10.2d, v25.2d, #61\n\t"
"mov v25.16b, v0.16b\n\t"
"mov v26.16b, v1.16b\n\t"
"bcax v0.16b, v25.16b, v2.16b, v26.16b\n\t"
"bcax v1.16b, v26.16b, v3.16b, v2.16b\n\t"
"bcax v2.16b, v2.16b, v4.16b, v3.16b\n\t"
"bcax v3.16b, v3.16b, v25.16b, v4.16b\n\t"
"bcax v4.16b, v4.16b, v26.16b, v25.16b\n\t"
"mov v25.16b, v5.16b\n\t"
"mov v26.16b, v6.16b\n\t"
"bcax v5.16b, v25.16b, v7.16b, v26.16b\n\t"
"bcax v6.16b, v26.16b, v8.16b, v7.16b\n\t"
"bcax v7.16b, v7.16b, v9.16b, v8.16b\n\t"
"bcax v8.16b, v8.16b, v25.16b, v9.16b\n\t"
"bcax v9.16b, v9.16b, v26.16b, v25.16b\n\t"
"mov v26.16b, v11.16b\n\t"
"bcax v10.16b, v30.16b, v12.16b, v26.16b\n\t"
"bcax v11.16b, v26.16b, v13.16b, v12.16b\n\t"
"bcax v12.16b, v12.16b, v14.16b, v13.16b\n\t"
"bcax v13.16b, v13.16b, v30.16b, v14.16b\n\t"
"bcax v14.16b, v14.16b, v26.16b, v30.16b\n\t"
"mov v25.16b, v15.16b\n\t"
"mov v26.16b, v16.16b\n\t"
"bcax v15.16b, v25.16b, v17.16b, v26.16b\n\t"
"bcax v16.16b, v26.16b, v18.16b, v17.16b\n\t"
"bcax v17.16b, v17.16b, v19.16b, v18.16b\n\t"
"bcax v18.16b, v18.16b, v25.16b, v19.16b\n\t"
"bcax v19.16b, v19.16b, v26.16b, v25.16b\n\t"
"mov v25.16b, v20.16b\n\t"
"mov v26.16b, v21.16b\n\t"
"bcax v20.16b, v25.16b, v22.16b, v26.16b\n\t"
"bcax v21.16b, v26.16b, v23.16b, v22.16b\n\t"
"bcax v22.16b, v22.16b, v24.16b, v23.16b\n\t"
"bcax v23.16b, v23.16b, v25.16b, v24.16b\n\t"
"bcax v24.16b, v24.16b, v26.16b, v25.16b\n\t"
"ld1r {v30.2d}, [%[r]], #8\n\t"
"subs x2, x2, #1\n\t"
"eor v0.16b, v0.16b, v30.16b\n\t"
"b.ne L_sha3_crypto_begin_%=\n\t"
"st4 {v0.d, v1.d, v2.d, v3.d}[0], [%x[state]], #32\n\t"
"st4 {v4.d, v5.d, v6.d, v7.d}[0], [%x[state]], #32\n\t"
"st4 {v8.d, v9.d, v10.d, v11.d}[0], [%x[state]], #32\n\t"
"st4 {v12.d, v13.d, v14.d, v15.d}[0], [%x[state]], #32\n\t"
"st4 {v16.d, v17.d, v18.d, v19.d}[0], [%x[state]], #32\n\t"
"st4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
"st1 {v24.1d}, [%x[state]]\n\t"
: [state] "+r" (state)
: [r] "r" (r)
: "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
"v27", "v28", "v29", "v30", "v31"
);
}
#endif
static const word64 L_SHA3_transform_base_r[] = {
0x0000000000000001, 0x0000000000008082,
0x800000000000808a, 0x8000000080008000,
0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009,
0x000000000000008a, 0x0000000000000088,
0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b,
0x8000000000008089, 0x8000000000008003,
0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a,
0x8000000080008081, 0x8000000000008080,
0x0000000080000001, 0x8000000080008008,
};
void BlockSha3_base(word64* state)
{
const word64* r = L_SHA3_transform_base_r;
__asm__ __volatile__ (
"stp x29, x30, [sp, #-64]!\n\t"
"add x29, sp, #0\n\t"
"ldp x1, x2, [%x[state]]\n\t"
"ldp x3, x4, [%x[state], #16]\n\t"
"ldp x5, x6, [%x[state], #32]\n\t"
"ldp x7, x8, [%x[state], #48]\n\t"
"ldp x9, x10, [%x[state], #64]\n\t"
"ldp x11, x12, [%x[state], #80]\n\t"
"ldp x13, x14, [%x[state], #96]\n\t"
"ldp x15, x16, [%x[state], #112]\n\t"
"ldp x17, x19, [%x[state], #128]\n\t"
"ldp x20, x21, [%x[state], #144]\n\t"
"ldp x22, x23, [%x[state], #160]\n\t"
"ldp x24, x25, [%x[state], #176]\n\t"
"ldr x26, [%x[state], #192]\n\t"
"str %x[state], [x29, #40]\n\t"
"mov x28, #24\n\t"
"\n"
"L_SHA3_transform_base_begin_%=: \n\t"
"stp %[r], x28, [x29, #48]\n\t"
"eor %x[state], x5, x10\n\t"
"eor x30, x1, x6\n\t"
"eor x28, x3, x8\n\t"
"eor %x[state], %x[state], x15\n\t"
"eor x30, x30, x11\n\t"
"eor x28, x28, x13\n\t"
"eor %x[state], %x[state], x21\n\t"
"eor x30, x30, x16\n\t"
"eor x28, x28, x19\n\t"
"eor %x[state], %x[state], x26\n\t"
"eor x30, x30, x22\n\t"
"eor x28, x28, x24\n\t"
"str %x[state], [x29, #32]\n\t"
"str x28, [x29, #24]\n\t"
"eor %[r], x2, x7\n\t"
"eor x28, x4, x9\n\t"
"eor %[r], %[r], x12\n\t"
"eor x28, x28, x14\n\t"
"eor %[r], %[r], x17\n\t"
"eor x28, x28, x20\n\t"
"eor %[r], %[r], x23\n\t"
"eor x28, x28, x25\n\t"
"eor %x[state], %x[state], %[r], ror 63\n\t"
"eor %[r], %[r], x28, ror 63\n\t"
"eor x1, x1, %x[state]\n\t"
"eor x6, x6, %x[state]\n\t"
"eor x11, x11, %x[state]\n\t"
"eor x16, x16, %x[state]\n\t"
"eor x22, x22, %x[state]\n\t"
"eor x3, x3, %[r]\n\t"
"eor x8, x8, %[r]\n\t"
"eor x13, x13, %[r]\n\t"
"eor x19, x19, %[r]\n\t"
"eor x24, x24, %[r]\n\t"
"ldr %x[state], [x29, #32]\n\t"
"ldr %[r], [x29, #24]\n\t"
"eor x28, x28, x30, ror 63\n\t"
"eor x30, x30, %[r], ror 63\n\t"
"eor %[r], %[r], %x[state], ror 63\n\t"
"eor x5, x5, x28\n\t"
"eor x10, x10, x28\n\t"
"eor x15, x15, x28\n\t"
"eor x21, x21, x28\n\t"
"eor x26, x26, x28\n\t"
"eor x2, x2, x30\n\t"
"eor x7, x7, x30\n\t"
"eor x12, x12, x30\n\t"
"eor x17, x17, x30\n\t"
"eor x23, x23, x30\n\t"
"eor x4, x4, %[r]\n\t"
"eor x9, x9, %[r]\n\t"
"eor x14, x14, %[r]\n\t"
"eor x20, x20, %[r]\n\t"
"eor x25, x25, %[r]\n\t"
"ror %x[state], x2, #63\n\t"
"ror x2, x7, #20\n\t"
"ror x7, x10, #44\n\t"
"ror x10, x24, #3\n\t"
"ror x24, x15, #25\n\t"
"ror x15, x22, #46\n\t"
"ror x22, x3, #2\n\t"
"ror x3, x13, #21\n\t"
"ror x13, x14, #39\n\t"
"ror x14, x21, #56\n\t"
"ror x21, x25, #8\n\t"
"ror x25, x16, #23\n\t"
"ror x16, x5, #37\n\t"
"ror x5, x26, #50\n\t"
"ror x26, x23, #62\n\t"
"ror x23, x9, #9\n\t"
"ror x9, x17, #19\n\t"
"ror x17, x6, #28\n\t"
"ror x6, x4, #36\n\t"
"ror x4, x20, #43\n\t"
"ror x20, x19, #49\n\t"
"ror x19, x12, #54\n\t"
"ror x12, x8, #58\n\t"
"ror x8, x11, #61\n\t"
"bic x11, x3, x2\n\t"
"bic %[r], x4, x3\n\t"
"bic x28, x1, x5\n\t"
"bic x30, x2, x1\n\t"
"eor x1, x1, x11\n\t"
"eor x2, x2, %[r]\n\t"
"bic x11, x5, x4\n\t"
"eor x4, x4, x28\n\t"
"eor x3, x3, x11\n\t"
"eor x5, x5, x30\n\t"
"bic x11, x8, x7\n\t"
"bic %[r], x9, x8\n\t"
"bic x28, x6, x10\n\t"
"bic x30, x7, x6\n\t"
"eor x6, x6, x11\n\t"
"eor x7, x7, %[r]\n\t"
"bic x11, x10, x9\n\t"
"eor x9, x9, x28\n\t"
"eor x8, x8, x11\n\t"
"eor x10, x10, x30\n\t"
"bic x11, x13, x12\n\t"
"bic %[r], x14, x13\n\t"
"bic x28, %x[state], x15\n\t"
"bic x30, x12, %x[state]\n\t"
"eor x11, %x[state], x11\n\t"
"eor x12, x12, %[r]\n\t"
"bic %x[state], x15, x14\n\t"
"eor x14, x14, x28\n\t"
"eor x13, x13, %x[state]\n\t"
"eor x15, x15, x30\n\t"
"bic %x[state], x19, x17\n\t"
"bic %[r], x20, x19\n\t"
"bic x28, x16, x21\n\t"
"bic x30, x17, x16\n\t"
"eor x16, x16, %x[state]\n\t"
"eor x17, x17, %[r]\n\t"
"bic %x[state], x21, x20\n\t"
"eor x20, x20, x28\n\t"
"eor x19, x19, %x[state]\n\t"
"eor x21, x21, x30\n\t"
"bic %x[state], x24, x23\n\t"
"bic %[r], x25, x24\n\t"
"bic x28, x22, x26\n\t"
"bic x30, x23, x22\n\t"
"eor x22, x22, %x[state]\n\t"
"eor x23, x23, %[r]\n\t"
"bic %x[state], x26, x25\n\t"
"eor x25, x25, x28\n\t"
"eor x24, x24, %x[state]\n\t"
"eor x26, x26, x30\n\t"
"ldp %[r], x28, [x29, #48]\n\t"
"ldr %x[state], [%[r]], #8\n\t"
"subs x28, x28, #1\n\t"
"eor x1, x1, %x[state]\n\t"
"b.ne L_SHA3_transform_base_begin_%=\n\t"
"ldr %x[state], [x29, #40]\n\t"
"stp x1, x2, [%x[state]]\n\t"
"stp x3, x4, [%x[state], #16]\n\t"
"stp x5, x6, [%x[state], #32]\n\t"
"stp x7, x8, [%x[state], #48]\n\t"
"stp x9, x10, [%x[state], #64]\n\t"
"stp x11, x12, [%x[state], #80]\n\t"
"stp x13, x14, [%x[state], #96]\n\t"
"stp x15, x16, [%x[state], #112]\n\t"
"stp x17, x19, [%x[state], #128]\n\t"
"stp x20, x21, [%x[state], #144]\n\t"
"stp x22, x23, [%x[state], #160]\n\t"
"stp x24, x25, [%x[state], #176]\n\t"
"str x26, [%x[state], #192]\n\t"
"ldp x29, x30, [sp], #0x40\n\t"
: [state] "+r" (state)
: [r] "r" (r)
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
"x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x28"
);
}
#endif
#endif
#endif
#endif