#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifdef WOLFSSL_ARMASM_INLINE
#include <wolfssl/wolfcrypt/sha256.h>
#if !defined(NO_SHA256) || defined(WOLFSSL_SHA224)
static const word32 L_SHA256_transform_neon_len_k[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};
void Transform_Sha256_Len_neon(wc_Sha256* sha256, const byte* data, word32 len)
{
const word32* k = L_SHA256_transform_neon_len_k;
__asm__ __volatile__ (
"ldr w4, [%x[sha256]]\n\t"
"ldr w5, [%x[sha256], #4]\n\t"
"ldr w6, [%x[sha256], #8]\n\t"
"ldr w7, [%x[sha256], #12]\n\t"
"ldr w8, [%x[sha256], #16]\n\t"
"ldr w9, [%x[sha256], #20]\n\t"
"ldr w10, [%x[sha256], #24]\n\t"
"ldr w11, [%x[sha256], #28]\n\t"
"\n"
"L_sha256_len_neon_begin_%=: \n\t"
"ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [%x[data]], #32\n\t"
"mov w15, w4\n\t"
"ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [%x[data]], #32\n\t"
"mov w16, w5\n\t"
"rev32 v0.8b, v0.8b\n\t"
"mov w17, w6\n\t"
"rev32 v1.8b, v1.8b\n\t"
"mov w19, w7\n\t"
"rev32 v2.8b, v2.8b\n\t"
"mov w20, w8\n\t"
"rev32 v3.8b, v3.8b\n\t"
"mov w21, w9\n\t"
"rev32 v4.8b, v4.8b\n\t"
"mov w22, w10\n\t"
"rev32 v5.8b, v5.8b\n\t"
"mov w23, w11\n\t"
"rev32 v6.8b, v6.8b\n\t"
"rev32 v7.8b, v7.8b\n\t"
"mov x24, #3\n\t"
"\n"
"L_sha256_len_neon_start_%=: \n\t"
"mov w14, v0.s[0]\n\t"
"ror w12, w8, #6\n\t"
"eor w13, w9, w10\n\t"
"eor w12, w12, w8, ror 11\n\t"
"and w13, w13, w8\n\t"
"eor w12, w12, w8, ror 25\n\t"
"eor w13, w13, w10\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"ldr w12, [%[k]]\n\t"
"add w11, w11, w14\n\t"
"add w11, w11, w12\n\t"
"add w7, w7, w11\n\t"
"ror w12, w4, #2\n\t"
"eor w13, w4, w5\n\t"
"eor w12, w12, w4, ror 13\n\t"
"eor w14, w5, w6\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w4, ror 22\n\t"
"eor w13, w13, w5\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"mov w14, v0.s[1]\n\t"
"ext v10.8b, v0.8b, v1.8b, #4\n\t"
"ror w12, w7, #6\n\t"
"shl v8.2s, v7.2s, #15\n\t"
"eor w13, w8, w9\n\t"
"sri v8.2s, v7.2s, #17\n\t"
"eor w12, w12, w7, ror 11\n\t"
"shl v9.2s, v7.2s, #13\n\t"
"and w13, w13, w7\n\t"
"sri v9.2s, v7.2s, #19\n\t"
"eor w12, w12, w7, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w9\n\t"
"ushr v8.2s, v7.2s, #10\n\t"
"add w10, w10, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w10, w10, w13\n\t"
"add v0.2s, v0.2s, v9.2s\n\t"
"ldr w12, [%[k], #4]\n\t"
"ext v11.8b, v4.8b, v5.8b, #4\n\t"
"add w10, w10, w14\n\t"
"add v0.2s, v0.2s, v11.2s\n\t"
"add w10, w10, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w6, w6, w10\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w11, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w11, w4\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w11, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w4, w5\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w11, ror 22\n\t"
"add v0.2s, v0.2s, v9.2s\n\t"
"eor w13, w13, w4\n\t"
"add w10, w10, w12\n\t"
"add w10, w10, w13\n\t"
"mov w14, v1.s[0]\n\t"
"ror w12, w6, #6\n\t"
"eor w13, w7, w8\n\t"
"eor w12, w12, w6, ror 11\n\t"
"and w13, w13, w6\n\t"
"eor w12, w12, w6, ror 25\n\t"
"eor w13, w13, w8\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"ldr w12, [%[k], #8]\n\t"
"add w9, w9, w14\n\t"
"add w9, w9, w12\n\t"
"add w5, w5, w9\n\t"
"ror w12, w10, #2\n\t"
"eor w13, w10, w11\n\t"
"eor w12, w12, w10, ror 13\n\t"
"eor w14, w11, w4\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w10, ror 22\n\t"
"eor w13, w13, w11\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"mov w14, v1.s[1]\n\t"
"ext v10.8b, v1.8b, v2.8b, #4\n\t"
"ror w12, w5, #6\n\t"
"shl v8.2s, v0.2s, #15\n\t"
"eor w13, w6, w7\n\t"
"sri v8.2s, v0.2s, #17\n\t"
"eor w12, w12, w5, ror 11\n\t"
"shl v9.2s, v0.2s, #13\n\t"
"and w13, w13, w5\n\t"
"sri v9.2s, v0.2s, #19\n\t"
"eor w12, w12, w5, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w7\n\t"
"ushr v8.2s, v0.2s, #10\n\t"
"add w8, w8, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w8, w8, w13\n\t"
"add v1.2s, v1.2s, v9.2s\n\t"
"ldr w12, [%[k], #12]\n\t"
"ext v11.8b, v5.8b, v6.8b, #4\n\t"
"add w8, w8, w14\n\t"
"add v1.2s, v1.2s, v11.2s\n\t"
"add w8, w8, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w4, w4, w8\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w9, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w9, w10\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w9, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w10, w11\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w9, ror 22\n\t"
"add v1.2s, v1.2s, v9.2s\n\t"
"eor w13, w13, w10\n\t"
"add w8, w8, w12\n\t"
"add w8, w8, w13\n\t"
"mov w14, v2.s[0]\n\t"
"ror w12, w4, #6\n\t"
"eor w13, w5, w6\n\t"
"eor w12, w12, w4, ror 11\n\t"
"and w13, w13, w4\n\t"
"eor w12, w12, w4, ror 25\n\t"
"eor w13, w13, w6\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"ldr w12, [%[k], #16]\n\t"
"add w7, w7, w14\n\t"
"add w7, w7, w12\n\t"
"add w11, w11, w7\n\t"
"ror w12, w8, #2\n\t"
"eor w13, w8, w9\n\t"
"eor w12, w12, w8, ror 13\n\t"
"eor w14, w9, w10\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w8, ror 22\n\t"
"eor w13, w13, w9\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"mov w14, v2.s[1]\n\t"
"ext v10.8b, v2.8b, v3.8b, #4\n\t"
"ror w12, w11, #6\n\t"
"shl v8.2s, v1.2s, #15\n\t"
"eor w13, w4, w5\n\t"
"sri v8.2s, v1.2s, #17\n\t"
"eor w12, w12, w11, ror 11\n\t"
"shl v9.2s, v1.2s, #13\n\t"
"and w13, w13, w11\n\t"
"sri v9.2s, v1.2s, #19\n\t"
"eor w12, w12, w11, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w5\n\t"
"ushr v8.2s, v1.2s, #10\n\t"
"add w6, w6, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w6, w6, w13\n\t"
"add v2.2s, v2.2s, v9.2s\n\t"
"ldr w12, [%[k], #20]\n\t"
"ext v11.8b, v6.8b, v7.8b, #4\n\t"
"add w6, w6, w14\n\t"
"add v2.2s, v2.2s, v11.2s\n\t"
"add w6, w6, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w10, w10, w6\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w7, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w7, w8\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w7, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w8, w9\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w7, ror 22\n\t"
"add v2.2s, v2.2s, v9.2s\n\t"
"eor w13, w13, w8\n\t"
"add w6, w6, w12\n\t"
"add w6, w6, w13\n\t"
"mov w14, v3.s[0]\n\t"
"ror w12, w10, #6\n\t"
"eor w13, w11, w4\n\t"
"eor w12, w12, w10, ror 11\n\t"
"and w13, w13, w10\n\t"
"eor w12, w12, w10, ror 25\n\t"
"eor w13, w13, w4\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"ldr w12, [%[k], #24]\n\t"
"add w5, w5, w14\n\t"
"add w5, w5, w12\n\t"
"add w9, w9, w5\n\t"
"ror w12, w6, #2\n\t"
"eor w13, w6, w7\n\t"
"eor w12, w12, w6, ror 13\n\t"
"eor w14, w7, w8\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w6, ror 22\n\t"
"eor w13, w13, w7\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"mov w14, v3.s[1]\n\t"
"ext v10.8b, v3.8b, v4.8b, #4\n\t"
"ror w12, w9, #6\n\t"
"shl v8.2s, v2.2s, #15\n\t"
"eor w13, w10, w11\n\t"
"sri v8.2s, v2.2s, #17\n\t"
"eor w12, w12, w9, ror 11\n\t"
"shl v9.2s, v2.2s, #13\n\t"
"and w13, w13, w9\n\t"
"sri v9.2s, v2.2s, #19\n\t"
"eor w12, w12, w9, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w11\n\t"
"ushr v8.2s, v2.2s, #10\n\t"
"add w4, w4, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w4, w4, w13\n\t"
"add v3.2s, v3.2s, v9.2s\n\t"
"ldr w12, [%[k], #28]\n\t"
"ext v11.8b, v7.8b, v0.8b, #4\n\t"
"add w4, w4, w14\n\t"
"add v3.2s, v3.2s, v11.2s\n\t"
"add w4, w4, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w8, w8, w4\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w5, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w5, w6\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w5, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w6, w7\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w5, ror 22\n\t"
"add v3.2s, v3.2s, v9.2s\n\t"
"eor w13, w13, w6\n\t"
"add w4, w4, w12\n\t"
"add w4, w4, w13\n\t"
"mov w14, v4.s[0]\n\t"
"ror w12, w8, #6\n\t"
"eor w13, w9, w10\n\t"
"eor w12, w12, w8, ror 11\n\t"
"and w13, w13, w8\n\t"
"eor w12, w12, w8, ror 25\n\t"
"eor w13, w13, w10\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"ldr w12, [%[k], #32]\n\t"
"add w11, w11, w14\n\t"
"add w11, w11, w12\n\t"
"add w7, w7, w11\n\t"
"ror w12, w4, #2\n\t"
"eor w13, w4, w5\n\t"
"eor w12, w12, w4, ror 13\n\t"
"eor w14, w5, w6\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w4, ror 22\n\t"
"eor w13, w13, w5\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"mov w14, v4.s[1]\n\t"
"ext v10.8b, v4.8b, v5.8b, #4\n\t"
"ror w12, w7, #6\n\t"
"shl v8.2s, v3.2s, #15\n\t"
"eor w13, w8, w9\n\t"
"sri v8.2s, v3.2s, #17\n\t"
"eor w12, w12, w7, ror 11\n\t"
"shl v9.2s, v3.2s, #13\n\t"
"and w13, w13, w7\n\t"
"sri v9.2s, v3.2s, #19\n\t"
"eor w12, w12, w7, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w9\n\t"
"ushr v8.2s, v3.2s, #10\n\t"
"add w10, w10, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w10, w10, w13\n\t"
"add v4.2s, v4.2s, v9.2s\n\t"
"ldr w12, [%[k], #36]\n\t"
"ext v11.8b, v0.8b, v1.8b, #4\n\t"
"add w10, w10, w14\n\t"
"add v4.2s, v4.2s, v11.2s\n\t"
"add w10, w10, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w6, w6, w10\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w11, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w11, w4\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w11, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w4, w5\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w11, ror 22\n\t"
"add v4.2s, v4.2s, v9.2s\n\t"
"eor w13, w13, w4\n\t"
"add w10, w10, w12\n\t"
"add w10, w10, w13\n\t"
"mov w14, v5.s[0]\n\t"
"ror w12, w6, #6\n\t"
"eor w13, w7, w8\n\t"
"eor w12, w12, w6, ror 11\n\t"
"and w13, w13, w6\n\t"
"eor w12, w12, w6, ror 25\n\t"
"eor w13, w13, w8\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"ldr w12, [%[k], #40]\n\t"
"add w9, w9, w14\n\t"
"add w9, w9, w12\n\t"
"add w5, w5, w9\n\t"
"ror w12, w10, #2\n\t"
"eor w13, w10, w11\n\t"
"eor w12, w12, w10, ror 13\n\t"
"eor w14, w11, w4\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w10, ror 22\n\t"
"eor w13, w13, w11\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"mov w14, v5.s[1]\n\t"
"ext v10.8b, v5.8b, v6.8b, #4\n\t"
"ror w12, w5, #6\n\t"
"shl v8.2s, v4.2s, #15\n\t"
"eor w13, w6, w7\n\t"
"sri v8.2s, v4.2s, #17\n\t"
"eor w12, w12, w5, ror 11\n\t"
"shl v9.2s, v4.2s, #13\n\t"
"and w13, w13, w5\n\t"
"sri v9.2s, v4.2s, #19\n\t"
"eor w12, w12, w5, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w7\n\t"
"ushr v8.2s, v4.2s, #10\n\t"
"add w8, w8, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w8, w8, w13\n\t"
"add v5.2s, v5.2s, v9.2s\n\t"
"ldr w12, [%[k], #44]\n\t"
"ext v11.8b, v1.8b, v2.8b, #4\n\t"
"add w8, w8, w14\n\t"
"add v5.2s, v5.2s, v11.2s\n\t"
"add w8, w8, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w4, w4, w8\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w9, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w9, w10\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w9, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w10, w11\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w9, ror 22\n\t"
"add v5.2s, v5.2s, v9.2s\n\t"
"eor w13, w13, w10\n\t"
"add w8, w8, w12\n\t"
"add w8, w8, w13\n\t"
"mov w14, v6.s[0]\n\t"
"ror w12, w4, #6\n\t"
"eor w13, w5, w6\n\t"
"eor w12, w12, w4, ror 11\n\t"
"and w13, w13, w4\n\t"
"eor w12, w12, w4, ror 25\n\t"
"eor w13, w13, w6\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"ldr w12, [%[k], #48]\n\t"
"add w7, w7, w14\n\t"
"add w7, w7, w12\n\t"
"add w11, w11, w7\n\t"
"ror w12, w8, #2\n\t"
"eor w13, w8, w9\n\t"
"eor w12, w12, w8, ror 13\n\t"
"eor w14, w9, w10\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w8, ror 22\n\t"
"eor w13, w13, w9\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"mov w14, v6.s[1]\n\t"
"ext v10.8b, v6.8b, v7.8b, #4\n\t"
"ror w12, w11, #6\n\t"
"shl v8.2s, v5.2s, #15\n\t"
"eor w13, w4, w5\n\t"
"sri v8.2s, v5.2s, #17\n\t"
"eor w12, w12, w11, ror 11\n\t"
"shl v9.2s, v5.2s, #13\n\t"
"and w13, w13, w11\n\t"
"sri v9.2s, v5.2s, #19\n\t"
"eor w12, w12, w11, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w5\n\t"
"ushr v8.2s, v5.2s, #10\n\t"
"add w6, w6, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w6, w6, w13\n\t"
"add v6.2s, v6.2s, v9.2s\n\t"
"ldr w12, [%[k], #52]\n\t"
"ext v11.8b, v2.8b, v3.8b, #4\n\t"
"add w6, w6, w14\n\t"
"add v6.2s, v6.2s, v11.2s\n\t"
"add w6, w6, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w10, w10, w6\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w7, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w7, w8\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w7, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w8, w9\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w7, ror 22\n\t"
"add v6.2s, v6.2s, v9.2s\n\t"
"eor w13, w13, w8\n\t"
"add w6, w6, w12\n\t"
"add w6, w6, w13\n\t"
"mov w14, v7.s[0]\n\t"
"ror w12, w10, #6\n\t"
"eor w13, w11, w4\n\t"
"eor w12, w12, w10, ror 11\n\t"
"and w13, w13, w10\n\t"
"eor w12, w12, w10, ror 25\n\t"
"eor w13, w13, w4\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"ldr w12, [%[k], #56]\n\t"
"add w5, w5, w14\n\t"
"add w5, w5, w12\n\t"
"add w9, w9, w5\n\t"
"ror w12, w6, #2\n\t"
"eor w13, w6, w7\n\t"
"eor w12, w12, w6, ror 13\n\t"
"eor w14, w7, w8\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w6, ror 22\n\t"
"eor w13, w13, w7\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"mov w14, v7.s[1]\n\t"
"ext v10.8b, v7.8b, v0.8b, #4\n\t"
"ror w12, w9, #6\n\t"
"shl v8.2s, v6.2s, #15\n\t"
"eor w13, w10, w11\n\t"
"sri v8.2s, v6.2s, #17\n\t"
"eor w12, w12, w9, ror 11\n\t"
"shl v9.2s, v6.2s, #13\n\t"
"and w13, w13, w9\n\t"
"sri v9.2s, v6.2s, #19\n\t"
"eor w12, w12, w9, ror 25\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w13, w13, w11\n\t"
"ushr v8.2s, v6.2s, #10\n\t"
"add w4, w4, w12\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"add w4, w4, w13\n\t"
"add v7.2s, v7.2s, v9.2s\n\t"
"ldr w12, [%[k], #60]\n\t"
"ext v11.8b, v3.8b, v4.8b, #4\n\t"
"add w4, w4, w14\n\t"
"add v7.2s, v7.2s, v11.2s\n\t"
"add w4, w4, w12\n\t"
"shl v8.2s, v10.2s, #25\n\t"
"add w8, w8, w4\n\t"
"sri v8.2s, v10.2s, #7\n\t"
"ror w12, w5, #2\n\t"
"shl v9.2s, v10.2s, #14\n\t"
"eor w13, w5, w6\n\t"
"sri v9.2s, v10.2s, #18\n\t"
"eor w12, w12, w5, ror 13\n\t"
"eor v9.8b, v9.8b, v8.8b\n\t"
"eor w14, w6, w7\n\t"
"ushr v10.2s, v10.2s, #3\n\t"
"and w13, w13, w14\n\t"
"eor v9.8b, v9.8b, v10.8b\n\t"
"eor w12, w12, w5, ror 22\n\t"
"add v7.2s, v7.2s, v9.2s\n\t"
"eor w13, w13, w6\n\t"
"add w4, w4, w12\n\t"
"add w4, w4, w13\n\t"
"add %[k], %[k], #0x40\n\t"
"subs x24, x24, #1\n\t"
"b.ne L_sha256_len_neon_start_%=\n\t"
"mov w14, v0.s[0]\n\t"
"ror w12, w8, #6\n\t"
"eor w13, w9, w10\n\t"
"eor w12, w12, w8, ror 11\n\t"
"and w13, w13, w8\n\t"
"eor w12, w12, w8, ror 25\n\t"
"eor w13, w13, w10\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"ldr w12, [%[k]]\n\t"
"add w11, w11, w14\n\t"
"add w11, w11, w12\n\t"
"add w7, w7, w11\n\t"
"ror w12, w4, #2\n\t"
"eor w13, w4, w5\n\t"
"eor w12, w12, w4, ror 13\n\t"
"eor w14, w5, w6\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w4, ror 22\n\t"
"eor w13, w13, w5\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"mov w14, v0.s[1]\n\t"
"ror w12, w7, #6\n\t"
"eor w13, w8, w9\n\t"
"eor w12, w12, w7, ror 11\n\t"
"and w13, w13, w7\n\t"
"eor w12, w12, w7, ror 25\n\t"
"eor w13, w13, w9\n\t"
"add w10, w10, w12\n\t"
"add w10, w10, w13\n\t"
"ldr w12, [%[k], #4]\n\t"
"add w10, w10, w14\n\t"
"add w10, w10, w12\n\t"
"add w6, w6, w10\n\t"
"ror w12, w11, #2\n\t"
"eor w13, w11, w4\n\t"
"eor w12, w12, w11, ror 13\n\t"
"eor w14, w4, w5\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w11, ror 22\n\t"
"eor w13, w13, w4\n\t"
"add w10, w10, w12\n\t"
"add w10, w10, w13\n\t"
"mov w14, v1.s[0]\n\t"
"ror w12, w6, #6\n\t"
"eor w13, w7, w8\n\t"
"eor w12, w12, w6, ror 11\n\t"
"and w13, w13, w6\n\t"
"eor w12, w12, w6, ror 25\n\t"
"eor w13, w13, w8\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"ldr w12, [%[k], #8]\n\t"
"add w9, w9, w14\n\t"
"add w9, w9, w12\n\t"
"add w5, w5, w9\n\t"
"ror w12, w10, #2\n\t"
"eor w13, w10, w11\n\t"
"eor w12, w12, w10, ror 13\n\t"
"eor w14, w11, w4\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w10, ror 22\n\t"
"eor w13, w13, w11\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"mov w14, v1.s[1]\n\t"
"ror w12, w5, #6\n\t"
"eor w13, w6, w7\n\t"
"eor w12, w12, w5, ror 11\n\t"
"and w13, w13, w5\n\t"
"eor w12, w12, w5, ror 25\n\t"
"eor w13, w13, w7\n\t"
"add w8, w8, w12\n\t"
"add w8, w8, w13\n\t"
"ldr w12, [%[k], #12]\n\t"
"add w8, w8, w14\n\t"
"add w8, w8, w12\n\t"
"add w4, w4, w8\n\t"
"ror w12, w9, #2\n\t"
"eor w13, w9, w10\n\t"
"eor w12, w12, w9, ror 13\n\t"
"eor w14, w10, w11\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w9, ror 22\n\t"
"eor w13, w13, w10\n\t"
"add w8, w8, w12\n\t"
"add w8, w8, w13\n\t"
"mov w14, v2.s[0]\n\t"
"ror w12, w4, #6\n\t"
"eor w13, w5, w6\n\t"
"eor w12, w12, w4, ror 11\n\t"
"and w13, w13, w4\n\t"
"eor w12, w12, w4, ror 25\n\t"
"eor w13, w13, w6\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"ldr w12, [%[k], #16]\n\t"
"add w7, w7, w14\n\t"
"add w7, w7, w12\n\t"
"add w11, w11, w7\n\t"
"ror w12, w8, #2\n\t"
"eor w13, w8, w9\n\t"
"eor w12, w12, w8, ror 13\n\t"
"eor w14, w9, w10\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w8, ror 22\n\t"
"eor w13, w13, w9\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"mov w14, v2.s[1]\n\t"
"ror w12, w11, #6\n\t"
"eor w13, w4, w5\n\t"
"eor w12, w12, w11, ror 11\n\t"
"and w13, w13, w11\n\t"
"eor w12, w12, w11, ror 25\n\t"
"eor w13, w13, w5\n\t"
"add w6, w6, w12\n\t"
"add w6, w6, w13\n\t"
"ldr w12, [%[k], #20]\n\t"
"add w6, w6, w14\n\t"
"add w6, w6, w12\n\t"
"add w10, w10, w6\n\t"
"ror w12, w7, #2\n\t"
"eor w13, w7, w8\n\t"
"eor w12, w12, w7, ror 13\n\t"
"eor w14, w8, w9\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w7, ror 22\n\t"
"eor w13, w13, w8\n\t"
"add w6, w6, w12\n\t"
"add w6, w6, w13\n\t"
"mov w14, v3.s[0]\n\t"
"ror w12, w10, #6\n\t"
"eor w13, w11, w4\n\t"
"eor w12, w12, w10, ror 11\n\t"
"and w13, w13, w10\n\t"
"eor w12, w12, w10, ror 25\n\t"
"eor w13, w13, w4\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"ldr w12, [%[k], #24]\n\t"
"add w5, w5, w14\n\t"
"add w5, w5, w12\n\t"
"add w9, w9, w5\n\t"
"ror w12, w6, #2\n\t"
"eor w13, w6, w7\n\t"
"eor w12, w12, w6, ror 13\n\t"
"eor w14, w7, w8\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w6, ror 22\n\t"
"eor w13, w13, w7\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"mov w14, v3.s[1]\n\t"
"ror w12, w9, #6\n\t"
"eor w13, w10, w11\n\t"
"eor w12, w12, w9, ror 11\n\t"
"and w13, w13, w9\n\t"
"eor w12, w12, w9, ror 25\n\t"
"eor w13, w13, w11\n\t"
"add w4, w4, w12\n\t"
"add w4, w4, w13\n\t"
"ldr w12, [%[k], #28]\n\t"
"add w4, w4, w14\n\t"
"add w4, w4, w12\n\t"
"add w8, w8, w4\n\t"
"ror w12, w5, #2\n\t"
"eor w13, w5, w6\n\t"
"eor w12, w12, w5, ror 13\n\t"
"eor w14, w6, w7\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w5, ror 22\n\t"
"eor w13, w13, w6\n\t"
"add w4, w4, w12\n\t"
"add w4, w4, w13\n\t"
"mov w14, v4.s[0]\n\t"
"ror w12, w8, #6\n\t"
"eor w13, w9, w10\n\t"
"eor w12, w12, w8, ror 11\n\t"
"and w13, w13, w8\n\t"
"eor w12, w12, w8, ror 25\n\t"
"eor w13, w13, w10\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"ldr w12, [%[k], #32]\n\t"
"add w11, w11, w14\n\t"
"add w11, w11, w12\n\t"
"add w7, w7, w11\n\t"
"ror w12, w4, #2\n\t"
"eor w13, w4, w5\n\t"
"eor w12, w12, w4, ror 13\n\t"
"eor w14, w5, w6\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w4, ror 22\n\t"
"eor w13, w13, w5\n\t"
"add w11, w11, w12\n\t"
"add w11, w11, w13\n\t"
"mov w14, v4.s[1]\n\t"
"ror w12, w7, #6\n\t"
"eor w13, w8, w9\n\t"
"eor w12, w12, w7, ror 11\n\t"
"and w13, w13, w7\n\t"
"eor w12, w12, w7, ror 25\n\t"
"eor w13, w13, w9\n\t"
"add w10, w10, w12\n\t"
"add w10, w10, w13\n\t"
"ldr w12, [%[k], #36]\n\t"
"add w10, w10, w14\n\t"
"add w10, w10, w12\n\t"
"add w6, w6, w10\n\t"
"ror w12, w11, #2\n\t"
"eor w13, w11, w4\n\t"
"eor w12, w12, w11, ror 13\n\t"
"eor w14, w4, w5\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w11, ror 22\n\t"
"eor w13, w13, w4\n\t"
"add w10, w10, w12\n\t"
"add w10, w10, w13\n\t"
"mov w14, v5.s[0]\n\t"
"ror w12, w6, #6\n\t"
"eor w13, w7, w8\n\t"
"eor w12, w12, w6, ror 11\n\t"
"and w13, w13, w6\n\t"
"eor w12, w12, w6, ror 25\n\t"
"eor w13, w13, w8\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"ldr w12, [%[k], #40]\n\t"
"add w9, w9, w14\n\t"
"add w9, w9, w12\n\t"
"add w5, w5, w9\n\t"
"ror w12, w10, #2\n\t"
"eor w13, w10, w11\n\t"
"eor w12, w12, w10, ror 13\n\t"
"eor w14, w11, w4\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w10, ror 22\n\t"
"eor w13, w13, w11\n\t"
"add w9, w9, w12\n\t"
"add w9, w9, w13\n\t"
"mov w14, v5.s[1]\n\t"
"ror w12, w5, #6\n\t"
"eor w13, w6, w7\n\t"
"eor w12, w12, w5, ror 11\n\t"
"and w13, w13, w5\n\t"
"eor w12, w12, w5, ror 25\n\t"
"eor w13, w13, w7\n\t"
"add w8, w8, w12\n\t"
"add w8, w8, w13\n\t"
"ldr w12, [%[k], #44]\n\t"
"add w8, w8, w14\n\t"
"add w8, w8, w12\n\t"
"add w4, w4, w8\n\t"
"ror w12, w9, #2\n\t"
"eor w13, w9, w10\n\t"
"eor w12, w12, w9, ror 13\n\t"
"eor w14, w10, w11\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w9, ror 22\n\t"
"eor w13, w13, w10\n\t"
"add w8, w8, w12\n\t"
"add w8, w8, w13\n\t"
"mov w14, v6.s[0]\n\t"
"ror w12, w4, #6\n\t"
"eor w13, w5, w6\n\t"
"eor w12, w12, w4, ror 11\n\t"
"and w13, w13, w4\n\t"
"eor w12, w12, w4, ror 25\n\t"
"eor w13, w13, w6\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"ldr w12, [%[k], #48]\n\t"
"add w7, w7, w14\n\t"
"add w7, w7, w12\n\t"
"add w11, w11, w7\n\t"
"ror w12, w8, #2\n\t"
"eor w13, w8, w9\n\t"
"eor w12, w12, w8, ror 13\n\t"
"eor w14, w9, w10\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w8, ror 22\n\t"
"eor w13, w13, w9\n\t"
"add w7, w7, w12\n\t"
"add w7, w7, w13\n\t"
"mov w14, v6.s[1]\n\t"
"ror w12, w11, #6\n\t"
"eor w13, w4, w5\n\t"
"eor w12, w12, w11, ror 11\n\t"
"and w13, w13, w11\n\t"
"eor w12, w12, w11, ror 25\n\t"
"eor w13, w13, w5\n\t"
"add w6, w6, w12\n\t"
"add w6, w6, w13\n\t"
"ldr w12, [%[k], #52]\n\t"
"add w6, w6, w14\n\t"
"add w6, w6, w12\n\t"
"add w10, w10, w6\n\t"
"ror w12, w7, #2\n\t"
"eor w13, w7, w8\n\t"
"eor w12, w12, w7, ror 13\n\t"
"eor w14, w8, w9\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w7, ror 22\n\t"
"eor w13, w13, w8\n\t"
"add w6, w6, w12\n\t"
"add w6, w6, w13\n\t"
"mov w14, v7.s[0]\n\t"
"ror w12, w10, #6\n\t"
"eor w13, w11, w4\n\t"
"eor w12, w12, w10, ror 11\n\t"
"and w13, w13, w10\n\t"
"eor w12, w12, w10, ror 25\n\t"
"eor w13, w13, w4\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"ldr w12, [%[k], #56]\n\t"
"add w5, w5, w14\n\t"
"add w5, w5, w12\n\t"
"add w9, w9, w5\n\t"
"ror w12, w6, #2\n\t"
"eor w13, w6, w7\n\t"
"eor w12, w12, w6, ror 13\n\t"
"eor w14, w7, w8\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w6, ror 22\n\t"
"eor w13, w13, w7\n\t"
"add w5, w5, w12\n\t"
"add w5, w5, w13\n\t"
"mov w14, v7.s[1]\n\t"
"ror w12, w9, #6\n\t"
"eor w13, w10, w11\n\t"
"eor w12, w12, w9, ror 11\n\t"
"and w13, w13, w9\n\t"
"eor w12, w12, w9, ror 25\n\t"
"eor w13, w13, w11\n\t"
"add w4, w4, w12\n\t"
"add w4, w4, w13\n\t"
"ldr w12, [%[k], #60]\n\t"
"add w4, w4, w14\n\t"
"add w4, w4, w12\n\t"
"add w8, w8, w4\n\t"
"ror w12, w5, #2\n\t"
"eor w13, w5, w6\n\t"
"eor w12, w12, w5, ror 13\n\t"
"eor w14, w6, w7\n\t"
"and w13, w13, w14\n\t"
"eor w12, w12, w5, ror 22\n\t"
"eor w13, w13, w6\n\t"
"add w4, w4, w12\n\t"
"add w4, w4, w13\n\t"
"add w11, w11, w23\n\t"
"add w10, w10, w22\n\t"
"add w9, w9, w21\n\t"
"add w8, w8, w20\n\t"
"add w7, w7, w19\n\t"
"add w6, w6, w17\n\t"
"add w5, w5, w16\n\t"
"add w4, w4, w15\n\t"
"subs %w[len], %w[len], #0x40\n\t"
"sub %[k], %[k], #0xc0\n\t"
"b.ne L_sha256_len_neon_begin_%=\n\t"
"str w4, [%x[sha256]]\n\t"
"str w5, [%x[sha256], #4]\n\t"
"str w6, [%x[sha256], #8]\n\t"
"str w7, [%x[sha256], #12]\n\t"
"str w8, [%x[sha256], #16]\n\t"
"str w9, [%x[sha256], #20]\n\t"
"str w10, [%x[sha256], #24]\n\t"
"str w11, [%x[sha256], #28]\n\t"
: [sha256] "+r" (sha256), [len] "+r" (len)
: [data] "r" (data), [k] "r" (k)
: "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
"x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21",
"x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11"
);
}
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
static const word32 L_SHA256_trans_crypto_len_k[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};
void Transform_Sha256_Len_crypto(wc_Sha256* sha256, const byte* data,
word32 len);
void Transform_Sha256_Len_crypto(wc_Sha256* sha256, const byte* data,
word32 len)
{
const word32* k = L_SHA256_trans_crypto_len_k;
__asm__ __volatile__ (
"ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%[k]], #0x40\n\t"
"ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%[k]], #0x40\n\t"
"ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[k]], #0x40\n\t"
"ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[k]], #0x40\n\t"
"ld1 {v0.4s, v1.4s}, [%x[sha256]]\n\t"
"\n"
"L_sha256_len_crypto_begin_%=: \n\t"
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%x[data]], #0x40\n\t"
"rev32 v4.16b, v4.16b\n\t"
"rev32 v5.16b, v5.16b\n\t"
"rev32 v6.16b, v6.16b\n\t"
"rev32 v7.16b, v7.16b\n\t"
"mov v2.16b, v0.16b\n\t"
"mov v3.16b, v1.16b\n\t"
"add v24.4s, v4.4s, v8.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v4.4s, v5.4s\n\t"
"add v24.4s, v5.4s, v9.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v4.4s, v6.4s, v7.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v5.4s, v6.4s\n\t"
"add v24.4s, v6.4s, v10.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v5.4s, v7.4s, v4.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v6.4s, v7.4s\n\t"
"add v24.4s, v7.4s, v11.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v6.4s, v4.4s, v5.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v7.4s, v4.4s\n\t"
"add v24.4s, v4.4s, v12.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v7.4s, v5.4s, v6.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v4.4s, v5.4s\n\t"
"add v24.4s, v5.4s, v13.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v4.4s, v6.4s, v7.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v5.4s, v6.4s\n\t"
"add v24.4s, v6.4s, v14.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v5.4s, v7.4s, v4.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v6.4s, v7.4s\n\t"
"add v24.4s, v7.4s, v15.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v6.4s, v4.4s, v5.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v7.4s, v4.4s\n\t"
"add v24.4s, v4.4s, v16.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v7.4s, v5.4s, v6.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v4.4s, v5.4s\n\t"
"add v24.4s, v5.4s, v17.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v4.4s, v6.4s, v7.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v5.4s, v6.4s\n\t"
"add v24.4s, v6.4s, v18.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v5.4s, v7.4s, v4.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v6.4s, v7.4s\n\t"
"add v24.4s, v7.4s, v19.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v6.4s, v4.4s, v5.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"sha256su0 v7.4s, v4.4s\n\t"
"add v24.4s, v4.4s, v20.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256su1 v7.4s, v5.4s, v6.4s\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"add v24.4s, v5.4s, v21.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"add v24.4s, v6.4s, v22.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"add v24.4s, v7.4s, v23.4s\n\t"
"mov v25.16b, v0.16b\n\t"
"sha256h q0, q1, v24.4s\n\t"
"sha256h2 q1, q25, v24.4s\n\t"
"add v0.4s, v0.4s, v2.4s\n\t"
"add v1.4s, v1.4s, v3.4s\n\t"
"subs %w[len], %w[len], #0x40\n\t"
"b.ne L_sha256_len_crypto_begin_%=\n\t"
"st1 {v0.4s, v1.4s}, [%x[sha256]]\n\t"
: [sha256] "+r" (sha256), [len] "+r" (len)
: [data] "r" (data), [k] "r" (k)
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v19", "v20", "v21", "v22", "v23", "v24", "v25"
);
}
#endif
#endif
#endif
#endif
#endif