#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifdef WOLFSSL_ARMASM_INLINE
#include <wolfssl/wolfcrypt/sha512.h>
#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)
static const word64 L_SHA512_transform_neon_len_k[] = {
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
};
static const word64 L_SHA512_transform_neon_len_r8[] = {
0x0007060504030201, 0x080f0e0d0c0b0a09,
};
void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len)
{
const word64* k = L_SHA512_transform_neon_len_k;
const word64* r8 = L_SHA512_transform_neon_len_r8;
__asm__ __volatile__ (
"ld1 {v11.16b}, [%[r8]]\n\t"
"ldp x4, x5, [%x[sha512]]\n\t"
"ldp x6, x7, [%x[sha512], #16]\n\t"
"ldp x8, x9, [%x[sha512], #32]\n\t"
"ldp x10, x11, [%x[sha512], #48]\n\t"
"\n"
"L_sha512_len_neon_begin_%=: \n\t"
"ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[data]], #0x40\n\t"
"mov x19, x4\n\t"
"ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[data]], #0x40\n\t"
"mov x20, x5\n\t"
"rev64 v0.16b, v0.16b\n\t"
"mov x21, x6\n\t"
"rev64 v1.16b, v1.16b\n\t"
"mov x22, x7\n\t"
"rev64 v2.16b, v2.16b\n\t"
"mov x23, x8\n\t"
"rev64 v3.16b, v3.16b\n\t"
"mov x24, x9\n\t"
"rev64 v4.16b, v4.16b\n\t"
"mov x25, x10\n\t"
"rev64 v5.16b, v5.16b\n\t"
"mov x26, x11\n\t"
"rev64 v6.16b, v6.16b\n\t"
"rev64 v7.16b, v7.16b\n\t"
"eor x16, x5, x6\n\t"
"mov %[r8], #4\n\t"
"\n"
"L_sha512_len_neon_start_%=: \n\t"
"mov x13, v0.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t"
"eor x12, x12, x8, ror 18\n\t"
"eor x14, x14, x4, ror 34\n\t"
"eor x12, x12, x8, ror 41\n\t"
"eor x14, x14, x4, ror 39\n\t"
"add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x8\n\t"
"add x11, x11, x13\n\t"
"eor x12, x12, x10\n\t"
"add x11, x11, x15\n\t"
"eor x16, x16, x5\n\t"
"add x11, x11, x12\n\t"
"add x14, x14, x16\n\t"
"add x7, x7, x11\n\t"
"add x11, x11, x14\n\t"
"mov x13, v0.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v0.16b, v1.16b, #8\n\t"
"ror x12, x7, #14\n\t"
"shl v8.2d, v7.2d, #45\n\t"
"ror x14, x11, #28\n\t"
"sri v8.2d, v7.2d, #19\n\t"
"eor x12, x12, x7, ror 18\n\t"
"shl v9.2d, v7.2d, #3\n\t"
"eor x14, x14, x11, ror 34\n\t"
"sri v9.2d, v7.2d, #61\n\t"
"eor x12, x12, x7, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x11, ror 39\n\t"
"ushr v8.2d, v7.2d, #6\n\t"
"add x10, x10, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x11, x4\n\t"
"add v0.2d, v0.2d, v9.2d\n\t"
"eor x12, x8, x9\n\t"
"ext v9.16b, v4.16b, v5.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v0.2d, v0.2d, v9.2d\n\t"
"and x12, x12, x7\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x10, x10, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x9\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x10, x10, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x4\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x10, x10, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v0.2d, v0.2d, v9.2d\n\t"
"add x6, x6, x10\n\t"
"add x10, x10, x14\n\t"
"mov x13, v1.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t"
"eor x12, x12, x6, ror 18\n\t"
"eor x14, x14, x10, ror 34\n\t"
"eor x12, x12, x6, ror 41\n\t"
"eor x14, x14, x10, ror 39\n\t"
"add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x6\n\t"
"add x9, x9, x13\n\t"
"eor x12, x12, x8\n\t"
"add x9, x9, x15\n\t"
"eor x16, x16, x11\n\t"
"add x9, x9, x12\n\t"
"add x14, x14, x16\n\t"
"add x5, x5, x9\n\t"
"add x9, x9, x14\n\t"
"mov x13, v1.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v1.16b, v2.16b, #8\n\t"
"ror x12, x5, #14\n\t"
"shl v8.2d, v0.2d, #45\n\t"
"ror x14, x9, #28\n\t"
"sri v8.2d, v0.2d, #19\n\t"
"eor x12, x12, x5, ror 18\n\t"
"shl v9.2d, v0.2d, #3\n\t"
"eor x14, x14, x9, ror 34\n\t"
"sri v9.2d, v0.2d, #61\n\t"
"eor x12, x12, x5, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x9, ror 39\n\t"
"ushr v8.2d, v0.2d, #6\n\t"
"add x8, x8, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x9, x10\n\t"
"add v1.2d, v1.2d, v9.2d\n\t"
"eor x12, x6, x7\n\t"
"ext v9.16b, v5.16b, v6.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v1.2d, v1.2d, v9.2d\n\t"
"and x12, x12, x5\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x8, x8, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x7\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x8, x8, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x10\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x8, x8, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v1.2d, v1.2d, v9.2d\n\t"
"add x4, x4, x8\n\t"
"add x8, x8, x14\n\t"
"mov x13, v2.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t"
"eor x12, x12, x4, ror 18\n\t"
"eor x14, x14, x8, ror 34\n\t"
"eor x12, x12, x4, ror 41\n\t"
"eor x14, x14, x8, ror 39\n\t"
"add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x4\n\t"
"add x7, x7, x13\n\t"
"eor x12, x12, x6\n\t"
"add x7, x7, x15\n\t"
"eor x16, x16, x9\n\t"
"add x7, x7, x12\n\t"
"add x14, x14, x16\n\t"
"add x11, x11, x7\n\t"
"add x7, x7, x14\n\t"
"mov x13, v2.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v2.16b, v3.16b, #8\n\t"
"ror x12, x11, #14\n\t"
"shl v8.2d, v1.2d, #45\n\t"
"ror x14, x7, #28\n\t"
"sri v8.2d, v1.2d, #19\n\t"
"eor x12, x12, x11, ror 18\n\t"
"shl v9.2d, v1.2d, #3\n\t"
"eor x14, x14, x7, ror 34\n\t"
"sri v9.2d, v1.2d, #61\n\t"
"eor x12, x12, x11, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x7, ror 39\n\t"
"ushr v8.2d, v1.2d, #6\n\t"
"add x6, x6, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x7, x8\n\t"
"add v2.2d, v2.2d, v9.2d\n\t"
"eor x12, x4, x5\n\t"
"ext v9.16b, v6.16b, v7.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v2.2d, v2.2d, v9.2d\n\t"
"and x12, x12, x11\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x6, x6, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x5\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x6, x6, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x8\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x6, x6, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v2.2d, v2.2d, v9.2d\n\t"
"add x10, x10, x6\n\t"
"add x6, x6, x14\n\t"
"mov x13, v3.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t"
"eor x12, x12, x10, ror 18\n\t"
"eor x14, x14, x6, ror 34\n\t"
"eor x12, x12, x10, ror 41\n\t"
"eor x14, x14, x6, ror 39\n\t"
"add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x10\n\t"
"add x5, x5, x13\n\t"
"eor x12, x12, x4\n\t"
"add x5, x5, x15\n\t"
"eor x16, x16, x7\n\t"
"add x5, x5, x12\n\t"
"add x14, x14, x16\n\t"
"add x9, x9, x5\n\t"
"add x5, x5, x14\n\t"
"mov x13, v3.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v3.16b, v4.16b, #8\n\t"
"ror x12, x9, #14\n\t"
"shl v8.2d, v2.2d, #45\n\t"
"ror x14, x5, #28\n\t"
"sri v8.2d, v2.2d, #19\n\t"
"eor x12, x12, x9, ror 18\n\t"
"shl v9.2d, v2.2d, #3\n\t"
"eor x14, x14, x5, ror 34\n\t"
"sri v9.2d, v2.2d, #61\n\t"
"eor x12, x12, x9, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x5, ror 39\n\t"
"ushr v8.2d, v2.2d, #6\n\t"
"add x4, x4, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x5, x6\n\t"
"add v3.2d, v3.2d, v9.2d\n\t"
"eor x12, x10, x11\n\t"
"ext v9.16b, v7.16b, v0.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v3.2d, v3.2d, v9.2d\n\t"
"and x12, x12, x9\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x4, x4, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x11\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x4, x4, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x6\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x4, x4, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v3.2d, v3.2d, v9.2d\n\t"
"add x8, x8, x4\n\t"
"add x4, x4, x14\n\t"
"mov x13, v4.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t"
"eor x12, x12, x8, ror 18\n\t"
"eor x14, x14, x4, ror 34\n\t"
"eor x12, x12, x8, ror 41\n\t"
"eor x14, x14, x4, ror 39\n\t"
"add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x8\n\t"
"add x11, x11, x13\n\t"
"eor x12, x12, x10\n\t"
"add x11, x11, x15\n\t"
"eor x16, x16, x5\n\t"
"add x11, x11, x12\n\t"
"add x14, x14, x16\n\t"
"add x7, x7, x11\n\t"
"add x11, x11, x14\n\t"
"mov x13, v4.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v4.16b, v5.16b, #8\n\t"
"ror x12, x7, #14\n\t"
"shl v8.2d, v3.2d, #45\n\t"
"ror x14, x11, #28\n\t"
"sri v8.2d, v3.2d, #19\n\t"
"eor x12, x12, x7, ror 18\n\t"
"shl v9.2d, v3.2d, #3\n\t"
"eor x14, x14, x11, ror 34\n\t"
"sri v9.2d, v3.2d, #61\n\t"
"eor x12, x12, x7, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x11, ror 39\n\t"
"ushr v8.2d, v3.2d, #6\n\t"
"add x10, x10, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x11, x4\n\t"
"add v4.2d, v4.2d, v9.2d\n\t"
"eor x12, x8, x9\n\t"
"ext v9.16b, v0.16b, v1.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v4.2d, v4.2d, v9.2d\n\t"
"and x12, x12, x7\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x10, x10, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x9\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x10, x10, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x4\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x10, x10, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v4.2d, v4.2d, v9.2d\n\t"
"add x6, x6, x10\n\t"
"add x10, x10, x14\n\t"
"mov x13, v5.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t"
"eor x12, x12, x6, ror 18\n\t"
"eor x14, x14, x10, ror 34\n\t"
"eor x12, x12, x6, ror 41\n\t"
"eor x14, x14, x10, ror 39\n\t"
"add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x6\n\t"
"add x9, x9, x13\n\t"
"eor x12, x12, x8\n\t"
"add x9, x9, x15\n\t"
"eor x16, x16, x11\n\t"
"add x9, x9, x12\n\t"
"add x14, x14, x16\n\t"
"add x5, x5, x9\n\t"
"add x9, x9, x14\n\t"
"mov x13, v5.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v5.16b, v6.16b, #8\n\t"
"ror x12, x5, #14\n\t"
"shl v8.2d, v4.2d, #45\n\t"
"ror x14, x9, #28\n\t"
"sri v8.2d, v4.2d, #19\n\t"
"eor x12, x12, x5, ror 18\n\t"
"shl v9.2d, v4.2d, #3\n\t"
"eor x14, x14, x9, ror 34\n\t"
"sri v9.2d, v4.2d, #61\n\t"
"eor x12, x12, x5, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x9, ror 39\n\t"
"ushr v8.2d, v4.2d, #6\n\t"
"add x8, x8, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x9, x10\n\t"
"add v5.2d, v5.2d, v9.2d\n\t"
"eor x12, x6, x7\n\t"
"ext v9.16b, v1.16b, v2.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v5.2d, v5.2d, v9.2d\n\t"
"and x12, x12, x5\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x8, x8, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x7\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x8, x8, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x10\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x8, x8, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v5.2d, v5.2d, v9.2d\n\t"
"add x4, x4, x8\n\t"
"add x8, x8, x14\n\t"
"mov x13, v6.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t"
"eor x12, x12, x4, ror 18\n\t"
"eor x14, x14, x8, ror 34\n\t"
"eor x12, x12, x4, ror 41\n\t"
"eor x14, x14, x8, ror 39\n\t"
"add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x4\n\t"
"add x7, x7, x13\n\t"
"eor x12, x12, x6\n\t"
"add x7, x7, x15\n\t"
"eor x16, x16, x9\n\t"
"add x7, x7, x12\n\t"
"add x14, x14, x16\n\t"
"add x11, x11, x7\n\t"
"add x7, x7, x14\n\t"
"mov x13, v6.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v6.16b, v7.16b, #8\n\t"
"ror x12, x11, #14\n\t"
"shl v8.2d, v5.2d, #45\n\t"
"ror x14, x7, #28\n\t"
"sri v8.2d, v5.2d, #19\n\t"
"eor x12, x12, x11, ror 18\n\t"
"shl v9.2d, v5.2d, #3\n\t"
"eor x14, x14, x7, ror 34\n\t"
"sri v9.2d, v5.2d, #61\n\t"
"eor x12, x12, x11, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x7, ror 39\n\t"
"ushr v8.2d, v5.2d, #6\n\t"
"add x6, x6, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x7, x8\n\t"
"add v6.2d, v6.2d, v9.2d\n\t"
"eor x12, x4, x5\n\t"
"ext v9.16b, v2.16b, v3.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v6.2d, v6.2d, v9.2d\n\t"
"and x12, x12, x11\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x6, x6, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x5\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x6, x6, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x8\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x6, x6, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v6.2d, v6.2d, v9.2d\n\t"
"add x10, x10, x6\n\t"
"add x6, x6, x14\n\t"
"mov x13, v7.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t"
"eor x12, x12, x10, ror 18\n\t"
"eor x14, x14, x6, ror 34\n\t"
"eor x12, x12, x10, ror 41\n\t"
"eor x14, x14, x6, ror 39\n\t"
"add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x10\n\t"
"add x5, x5, x13\n\t"
"eor x12, x12, x4\n\t"
"add x5, x5, x15\n\t"
"eor x16, x16, x7\n\t"
"add x5, x5, x12\n\t"
"add x14, x14, x16\n\t"
"add x9, x9, x5\n\t"
"add x5, x5, x14\n\t"
"mov x13, v7.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ext v10.16b, v7.16b, v0.16b, #8\n\t"
"ror x12, x9, #14\n\t"
"shl v8.2d, v6.2d, #45\n\t"
"ror x14, x5, #28\n\t"
"sri v8.2d, v6.2d, #19\n\t"
"eor x12, x12, x9, ror 18\n\t"
"shl v9.2d, v6.2d, #3\n\t"
"eor x14, x14, x5, ror 34\n\t"
"sri v9.2d, v6.2d, #61\n\t"
"eor x12, x12, x9, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x14, x14, x5, ror 39\n\t"
"ushr v8.2d, v6.2d, #6\n\t"
"add x4, x4, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x5, x6\n\t"
"add v7.2d, v7.2d, v9.2d\n\t"
"eor x12, x10, x11\n\t"
"ext v9.16b, v3.16b, v4.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v7.2d, v7.2d, v9.2d\n\t"
"and x12, x12, x9\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x4, x4, x13\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x11\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x4, x4, x15\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x6\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x4, x4, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x14, x14, x17\n\t"
"add v7.2d, v7.2d, v9.2d\n\t"
"add x8, x8, x4\n\t"
"add x4, x4, x14\n\t"
"subs %[r8], %[r8], #1\n\t"
"b.ne L_sha512_len_neon_start_%=\n\t"
"mov x13, v0.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t"
"eor x12, x12, x8, ror 18\n\t"
"eor x14, x14, x4, ror 34\n\t"
"eor x12, x12, x8, ror 41\n\t"
"eor x14, x14, x4, ror 39\n\t"
"add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x8\n\t"
"add x11, x11, x13\n\t"
"eor x12, x12, x10\n\t"
"add x11, x11, x15\n\t"
"eor x16, x16, x5\n\t"
"add x11, x11, x12\n\t"
"add x14, x14, x16\n\t"
"add x7, x7, x11\n\t"
"add x11, x11, x14\n\t"
"mov x13, v0.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x7, #14\n\t"
"ror x14, x11, #28\n\t"
"eor x12, x12, x7, ror 18\n\t"
"eor x14, x14, x11, ror 34\n\t"
"eor x12, x12, x7, ror 41\n\t"
"eor x14, x14, x11, ror 39\n\t"
"add x10, x10, x12\n\t"
"eor x16, x11, x4\n\t"
"eor x12, x8, x9\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x7\n\t"
"add x10, x10, x13\n\t"
"eor x12, x12, x9\n\t"
"add x10, x10, x15\n\t"
"eor x17, x17, x4\n\t"
"add x10, x10, x12\n\t"
"add x14, x14, x17\n\t"
"add x6, x6, x10\n\t"
"add x10, x10, x14\n\t"
"mov x13, v1.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t"
"eor x12, x12, x6, ror 18\n\t"
"eor x14, x14, x10, ror 34\n\t"
"eor x12, x12, x6, ror 41\n\t"
"eor x14, x14, x10, ror 39\n\t"
"add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x6\n\t"
"add x9, x9, x13\n\t"
"eor x12, x12, x8\n\t"
"add x9, x9, x15\n\t"
"eor x16, x16, x11\n\t"
"add x9, x9, x12\n\t"
"add x14, x14, x16\n\t"
"add x5, x5, x9\n\t"
"add x9, x9, x14\n\t"
"mov x13, v1.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x5, #14\n\t"
"ror x14, x9, #28\n\t"
"eor x12, x12, x5, ror 18\n\t"
"eor x14, x14, x9, ror 34\n\t"
"eor x12, x12, x5, ror 41\n\t"
"eor x14, x14, x9, ror 39\n\t"
"add x8, x8, x12\n\t"
"eor x16, x9, x10\n\t"
"eor x12, x6, x7\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x5\n\t"
"add x8, x8, x13\n\t"
"eor x12, x12, x7\n\t"
"add x8, x8, x15\n\t"
"eor x17, x17, x10\n\t"
"add x8, x8, x12\n\t"
"add x14, x14, x17\n\t"
"add x4, x4, x8\n\t"
"add x8, x8, x14\n\t"
"mov x13, v2.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t"
"eor x12, x12, x4, ror 18\n\t"
"eor x14, x14, x8, ror 34\n\t"
"eor x12, x12, x4, ror 41\n\t"
"eor x14, x14, x8, ror 39\n\t"
"add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x4\n\t"
"add x7, x7, x13\n\t"
"eor x12, x12, x6\n\t"
"add x7, x7, x15\n\t"
"eor x16, x16, x9\n\t"
"add x7, x7, x12\n\t"
"add x14, x14, x16\n\t"
"add x11, x11, x7\n\t"
"add x7, x7, x14\n\t"
"mov x13, v2.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x11, #14\n\t"
"ror x14, x7, #28\n\t"
"eor x12, x12, x11, ror 18\n\t"
"eor x14, x14, x7, ror 34\n\t"
"eor x12, x12, x11, ror 41\n\t"
"eor x14, x14, x7, ror 39\n\t"
"add x6, x6, x12\n\t"
"eor x16, x7, x8\n\t"
"eor x12, x4, x5\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x11\n\t"
"add x6, x6, x13\n\t"
"eor x12, x12, x5\n\t"
"add x6, x6, x15\n\t"
"eor x17, x17, x8\n\t"
"add x6, x6, x12\n\t"
"add x14, x14, x17\n\t"
"add x10, x10, x6\n\t"
"add x6, x6, x14\n\t"
"mov x13, v3.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t"
"eor x12, x12, x10, ror 18\n\t"
"eor x14, x14, x6, ror 34\n\t"
"eor x12, x12, x10, ror 41\n\t"
"eor x14, x14, x6, ror 39\n\t"
"add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x10\n\t"
"add x5, x5, x13\n\t"
"eor x12, x12, x4\n\t"
"add x5, x5, x15\n\t"
"eor x16, x16, x7\n\t"
"add x5, x5, x12\n\t"
"add x14, x14, x16\n\t"
"add x9, x9, x5\n\t"
"add x5, x5, x14\n\t"
"mov x13, v3.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x9, #14\n\t"
"ror x14, x5, #28\n\t"
"eor x12, x12, x9, ror 18\n\t"
"eor x14, x14, x5, ror 34\n\t"
"eor x12, x12, x9, ror 41\n\t"
"eor x14, x14, x5, ror 39\n\t"
"add x4, x4, x12\n\t"
"eor x16, x5, x6\n\t"
"eor x12, x10, x11\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x9\n\t"
"add x4, x4, x13\n\t"
"eor x12, x12, x11\n\t"
"add x4, x4, x15\n\t"
"eor x17, x17, x6\n\t"
"add x4, x4, x12\n\t"
"add x14, x14, x17\n\t"
"add x8, x8, x4\n\t"
"add x4, x4, x14\n\t"
"mov x13, v4.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t"
"eor x12, x12, x8, ror 18\n\t"
"eor x14, x14, x4, ror 34\n\t"
"eor x12, x12, x8, ror 41\n\t"
"eor x14, x14, x4, ror 39\n\t"
"add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x8\n\t"
"add x11, x11, x13\n\t"
"eor x12, x12, x10\n\t"
"add x11, x11, x15\n\t"
"eor x16, x16, x5\n\t"
"add x11, x11, x12\n\t"
"add x14, x14, x16\n\t"
"add x7, x7, x11\n\t"
"add x11, x11, x14\n\t"
"mov x13, v4.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x7, #14\n\t"
"ror x14, x11, #28\n\t"
"eor x12, x12, x7, ror 18\n\t"
"eor x14, x14, x11, ror 34\n\t"
"eor x12, x12, x7, ror 41\n\t"
"eor x14, x14, x11, ror 39\n\t"
"add x10, x10, x12\n\t"
"eor x16, x11, x4\n\t"
"eor x12, x8, x9\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x7\n\t"
"add x10, x10, x13\n\t"
"eor x12, x12, x9\n\t"
"add x10, x10, x15\n\t"
"eor x17, x17, x4\n\t"
"add x10, x10, x12\n\t"
"add x14, x14, x17\n\t"
"add x6, x6, x10\n\t"
"add x10, x10, x14\n\t"
"mov x13, v5.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t"
"eor x12, x12, x6, ror 18\n\t"
"eor x14, x14, x10, ror 34\n\t"
"eor x12, x12, x6, ror 41\n\t"
"eor x14, x14, x10, ror 39\n\t"
"add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x6\n\t"
"add x9, x9, x13\n\t"
"eor x12, x12, x8\n\t"
"add x9, x9, x15\n\t"
"eor x16, x16, x11\n\t"
"add x9, x9, x12\n\t"
"add x14, x14, x16\n\t"
"add x5, x5, x9\n\t"
"add x9, x9, x14\n\t"
"mov x13, v5.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x5, #14\n\t"
"ror x14, x9, #28\n\t"
"eor x12, x12, x5, ror 18\n\t"
"eor x14, x14, x9, ror 34\n\t"
"eor x12, x12, x5, ror 41\n\t"
"eor x14, x14, x9, ror 39\n\t"
"add x8, x8, x12\n\t"
"eor x16, x9, x10\n\t"
"eor x12, x6, x7\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x5\n\t"
"add x8, x8, x13\n\t"
"eor x12, x12, x7\n\t"
"add x8, x8, x15\n\t"
"eor x17, x17, x10\n\t"
"add x8, x8, x12\n\t"
"add x14, x14, x17\n\t"
"add x4, x4, x8\n\t"
"add x8, x8, x14\n\t"
"mov x13, v6.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t"
"eor x12, x12, x4, ror 18\n\t"
"eor x14, x14, x8, ror 34\n\t"
"eor x12, x12, x4, ror 41\n\t"
"eor x14, x14, x8, ror 39\n\t"
"add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x4\n\t"
"add x7, x7, x13\n\t"
"eor x12, x12, x6\n\t"
"add x7, x7, x15\n\t"
"eor x16, x16, x9\n\t"
"add x7, x7, x12\n\t"
"add x14, x14, x16\n\t"
"add x11, x11, x7\n\t"
"add x7, x7, x14\n\t"
"mov x13, v6.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x11, #14\n\t"
"ror x14, x7, #28\n\t"
"eor x12, x12, x11, ror 18\n\t"
"eor x14, x14, x7, ror 34\n\t"
"eor x12, x12, x11, ror 41\n\t"
"eor x14, x14, x7, ror 39\n\t"
"add x6, x6, x12\n\t"
"eor x16, x7, x8\n\t"
"eor x12, x4, x5\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x11\n\t"
"add x6, x6, x13\n\t"
"eor x12, x12, x5\n\t"
"add x6, x6, x15\n\t"
"eor x17, x17, x8\n\t"
"add x6, x6, x12\n\t"
"add x14, x14, x17\n\t"
"add x10, x10, x6\n\t"
"add x6, x6, x14\n\t"
"mov x13, v7.d[0]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t"
"eor x12, x12, x10, ror 18\n\t"
"eor x14, x14, x6, ror 34\n\t"
"eor x12, x12, x10, ror 41\n\t"
"eor x14, x14, x6, ror 39\n\t"
"add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t"
"and x16, x17, x16\n\t"
"and x12, x12, x10\n\t"
"add x5, x5, x13\n\t"
"eor x12, x12, x4\n\t"
"add x5, x5, x15\n\t"
"eor x16, x16, x7\n\t"
"add x5, x5, x12\n\t"
"add x14, x14, x16\n\t"
"add x9, x9, x5\n\t"
"add x5, x5, x14\n\t"
"mov x13, v7.d[1]\n\t"
"ldr x15, [%[k]], #8\n\t"
"ror x12, x9, #14\n\t"
"ror x14, x5, #28\n\t"
"eor x12, x12, x9, ror 18\n\t"
"eor x14, x14, x5, ror 34\n\t"
"eor x12, x12, x9, ror 41\n\t"
"eor x14, x14, x5, ror 39\n\t"
"add x4, x4, x12\n\t"
"eor x16, x5, x6\n\t"
"eor x12, x10, x11\n\t"
"and x17, x16, x17\n\t"
"and x12, x12, x9\n\t"
"add x4, x4, x13\n\t"
"eor x12, x12, x11\n\t"
"add x4, x4, x15\n\t"
"eor x17, x17, x6\n\t"
"add x4, x4, x12\n\t"
"add x14, x14, x17\n\t"
"add x8, x8, x4\n\t"
"add x4, x4, x14\n\t"
"add x11, x11, x26\n\t"
"add x10, x10, x25\n\t"
"add x9, x9, x24\n\t"
"add x8, x8, x23\n\t"
"add x7, x7, x22\n\t"
"add x6, x6, x21\n\t"
"add x5, x5, x20\n\t"
"add x4, x4, x19\n\t"
"subs %w[len], %w[len], #0x80\n\t"
"sub %[k], %[k], #0x280\n\t"
"b.ne L_sha512_len_neon_begin_%=\n\t"
"stp x4, x5, [%x[sha512]]\n\t"
"stp x6, x7, [%x[sha512], #16]\n\t"
"stp x8, x9, [%x[sha512], #32]\n\t"
"stp x10, x11, [%x[sha512], #48]\n\t"
: [sha512] "+r" (sha512), [len] "+r" (len)
: [data] "r" (data), [k] "r" (k), [r8] "r" (r8)
: "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
"x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21",
"x22", "x23", "x24", "x25", "x26", "v0", "v1", "v2", "v3", "v4",
"v5", "v6", "v7", "v8", "v9", "v10", "v11"
);
}
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
static const word64 L_SHA512_trans_crypto_len_k[] = {
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
};
void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
word32 len);
void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
word32 len)
{
const word64* k = L_SHA512_trans_crypto_len_k;
__asm__ __volatile__ (
#ifdef __APPLE__
".arch_extension sha3\n\t"
#endif
"ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%[k]], #0x40\n\t"
"ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [%[k]], #0x40\n\t"
"ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t"
"\n"
"L_sha512_len_crypto_begin_%=: \n\t"
"mov x3, %[k]\n\t"
"ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[data]], #0x40\n\t"
"ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[data]], #0x40\n\t"
"rev64 v0.16b, v0.16b\n\t"
"rev64 v1.16b, v1.16b\n\t"
"rev64 v2.16b, v2.16b\n\t"
"rev64 v3.16b, v3.16b\n\t"
"rev64 v4.16b, v4.16b\n\t"
"rev64 v5.16b, v5.16b\n\t"
"rev64 v6.16b, v6.16b\n\t"
"rev64 v7.16b, v7.16b\n\t"
"mov v28.16b, v24.16b\n\t"
"mov v29.16b, v25.16b\n\t"
"mov v30.16b, v26.16b\n\t"
"mov v31.16b, v27.16b\n\t"
"add v20.2d, v0.2d, v8.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"add v20.2d, v1.2d, v9.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"add v20.2d, v2.2d, v10.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"add v20.2d, v3.2d, v11.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"add v20.2d, v4.2d, v12.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"add v20.2d, v5.2d, v13.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"add v20.2d, v6.2d, v14.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"add v20.2d, v7.2d, v15.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v0.2d, v1.2d\n\t"
"ext v21.16b, v4.16b, v5.16b, #8\n\t"
"sha512su1 v0.2d, v7.2d, v21.2d\n\t"
"add v20.2d, v0.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"sha512su0 v1.2d, v2.2d\n\t"
"ext v21.16b, v5.16b, v6.16b, #8\n\t"
"sha512su1 v1.2d, v0.2d, v21.2d\n\t"
"add v20.2d, v1.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"sha512su0 v2.2d, v3.2d\n\t"
"ext v21.16b, v6.16b, v7.16b, #8\n\t"
"sha512su1 v2.2d, v1.2d, v21.2d\n\t"
"add v20.2d, v2.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"sha512su0 v3.2d, v4.2d\n\t"
"ext v21.16b, v7.16b, v0.16b, #8\n\t"
"sha512su1 v3.2d, v2.2d, v21.2d\n\t"
"add v20.2d, v3.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v4.2d, v5.2d\n\t"
"ext v21.16b, v0.16b, v1.16b, #8\n\t"
"sha512su1 v4.2d, v3.2d, v21.2d\n\t"
"add v20.2d, v4.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"sha512su0 v5.2d, v6.2d\n\t"
"ext v21.16b, v1.16b, v2.16b, #8\n\t"
"sha512su1 v5.2d, v4.2d, v21.2d\n\t"
"add v20.2d, v5.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"sha512su0 v6.2d, v7.2d\n\t"
"ext v21.16b, v2.16b, v3.16b, #8\n\t"
"sha512su1 v6.2d, v5.2d, v21.2d\n\t"
"add v20.2d, v6.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"sha512su0 v7.2d, v0.2d\n\t"
"ext v21.16b, v3.16b, v4.16b, #8\n\t"
"sha512su1 v7.2d, v6.2d, v21.2d\n\t"
"add v20.2d, v7.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v0.2d, v1.2d\n\t"
"ext v21.16b, v4.16b, v5.16b, #8\n\t"
"sha512su1 v0.2d, v7.2d, v21.2d\n\t"
"add v20.2d, v0.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"sha512su0 v1.2d, v2.2d\n\t"
"ext v21.16b, v5.16b, v6.16b, #8\n\t"
"sha512su1 v1.2d, v0.2d, v21.2d\n\t"
"add v20.2d, v1.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"sha512su0 v2.2d, v3.2d\n\t"
"ext v21.16b, v6.16b, v7.16b, #8\n\t"
"sha512su1 v2.2d, v1.2d, v21.2d\n\t"
"add v20.2d, v2.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"sha512su0 v3.2d, v4.2d\n\t"
"ext v21.16b, v7.16b, v0.16b, #8\n\t"
"sha512su1 v3.2d, v2.2d, v21.2d\n\t"
"add v20.2d, v3.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v4.2d, v5.2d\n\t"
"ext v21.16b, v0.16b, v1.16b, #8\n\t"
"sha512su1 v4.2d, v3.2d, v21.2d\n\t"
"add v20.2d, v4.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"sha512su0 v5.2d, v6.2d\n\t"
"ext v21.16b, v1.16b, v2.16b, #8\n\t"
"sha512su1 v5.2d, v4.2d, v21.2d\n\t"
"add v20.2d, v5.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"sha512su0 v6.2d, v7.2d\n\t"
"ext v21.16b, v2.16b, v3.16b, #8\n\t"
"sha512su1 v6.2d, v5.2d, v21.2d\n\t"
"add v20.2d, v6.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"sha512su0 v7.2d, v0.2d\n\t"
"ext v21.16b, v3.16b, v4.16b, #8\n\t"
"sha512su1 v7.2d, v6.2d, v21.2d\n\t"
"add v20.2d, v7.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v0.2d, v1.2d\n\t"
"ext v21.16b, v4.16b, v5.16b, #8\n\t"
"sha512su1 v0.2d, v7.2d, v21.2d\n\t"
"add v20.2d, v0.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"sha512su0 v1.2d, v2.2d\n\t"
"ext v21.16b, v5.16b, v6.16b, #8\n\t"
"sha512su1 v1.2d, v0.2d, v21.2d\n\t"
"add v20.2d, v1.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"sha512su0 v2.2d, v3.2d\n\t"
"ext v21.16b, v6.16b, v7.16b, #8\n\t"
"sha512su1 v2.2d, v1.2d, v21.2d\n\t"
"add v20.2d, v2.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"sha512su0 v3.2d, v4.2d\n\t"
"ext v21.16b, v7.16b, v0.16b, #8\n\t"
"sha512su1 v3.2d, v2.2d, v21.2d\n\t"
"add v20.2d, v3.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v4.2d, v5.2d\n\t"
"ext v21.16b, v0.16b, v1.16b, #8\n\t"
"sha512su1 v4.2d, v3.2d, v21.2d\n\t"
"add v20.2d, v4.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"sha512su0 v5.2d, v6.2d\n\t"
"ext v21.16b, v1.16b, v2.16b, #8\n\t"
"sha512su1 v5.2d, v4.2d, v21.2d\n\t"
"add v20.2d, v5.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"sha512su0 v6.2d, v7.2d\n\t"
"ext v21.16b, v2.16b, v3.16b, #8\n\t"
"sha512su1 v6.2d, v5.2d, v21.2d\n\t"
"add v20.2d, v6.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"sha512su0 v7.2d, v0.2d\n\t"
"ext v21.16b, v3.16b, v4.16b, #8\n\t"
"sha512su1 v7.2d, v6.2d, v21.2d\n\t"
"add v20.2d, v7.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v0.2d, v1.2d\n\t"
"ext v21.16b, v4.16b, v5.16b, #8\n\t"
"sha512su1 v0.2d, v7.2d, v21.2d\n\t"
"add v20.2d, v0.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"sha512su0 v1.2d, v2.2d\n\t"
"ext v21.16b, v5.16b, v6.16b, #8\n\t"
"sha512su1 v1.2d, v0.2d, v21.2d\n\t"
"add v20.2d, v1.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"sha512su0 v2.2d, v3.2d\n\t"
"ext v21.16b, v6.16b, v7.16b, #8\n\t"
"sha512su1 v2.2d, v1.2d, v21.2d\n\t"
"add v20.2d, v2.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"sha512su0 v3.2d, v4.2d\n\t"
"ext v21.16b, v7.16b, v0.16b, #8\n\t"
"sha512su1 v3.2d, v2.2d, v21.2d\n\t"
"add v20.2d, v3.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v26.16b, v27.16b, #8\n\t"
"ext v22.16b, v25.16b, v26.16b, #8\n\t"
"add v27.2d, v27.2d, v20.2d\n\t"
"sha512h q27, q21, v22.2d\n\t"
"add v23.2d, v25.2d, v27.2d\n\t"
"sha512h2 q27, q25, v24.2d\n\t"
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t"
"sha512su0 v4.2d, v5.2d\n\t"
"ext v21.16b, v0.16b, v1.16b, #8\n\t"
"sha512su1 v4.2d, v3.2d, v21.2d\n\t"
"add v20.2d, v4.2d, v16.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v23.16b, v26.16b, #8\n\t"
"ext v22.16b, v24.16b, v23.16b, #8\n\t"
"add v26.2d, v26.2d, v20.2d\n\t"
"sha512h q26, q21, v22.2d\n\t"
"add v25.2d, v24.2d, v26.2d\n\t"
"sha512h2 q26, q24, v27.2d\n\t"
"sha512su0 v5.2d, v6.2d\n\t"
"ext v21.16b, v1.16b, v2.16b, #8\n\t"
"sha512su1 v5.2d, v4.2d, v21.2d\n\t"
"add v20.2d, v5.2d, v17.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v25.16b, v23.16b, #8\n\t"
"ext v22.16b, v27.16b, v25.16b, #8\n\t"
"add v23.2d, v23.2d, v20.2d\n\t"
"sha512h q23, q21, v22.2d\n\t"
"add v24.2d, v27.2d, v23.2d\n\t"
"sha512h2 q23, q27, v26.2d\n\t"
"sha512su0 v6.2d, v7.2d\n\t"
"ext v21.16b, v2.16b, v3.16b, #8\n\t"
"sha512su1 v6.2d, v5.2d, v21.2d\n\t"
"add v20.2d, v6.2d, v18.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v24.16b, v25.16b, #8\n\t"
"ext v22.16b, v26.16b, v24.16b, #8\n\t"
"add v25.2d, v25.2d, v20.2d\n\t"
"sha512h q25, q21, v22.2d\n\t"
"add v27.2d, v26.2d, v25.2d\n\t"
"sha512h2 q25, q26, v23.2d\n\t"
"sha512su0 v7.2d, v0.2d\n\t"
"ext v21.16b, v3.16b, v4.16b, #8\n\t"
"sha512su1 v7.2d, v6.2d, v21.2d\n\t"
"add v20.2d, v7.2d, v19.2d\n\t"
"ext v20.16b, v20.16b, v20.16b, #8\n\t"
"ext v21.16b, v27.16b, v24.16b, #8\n\t"
"ext v22.16b, v23.16b, v27.16b, #8\n\t"
"add v24.2d, v24.2d, v20.2d\n\t"
"sha512h q24, q21, v22.2d\n\t"
"add v26.2d, v23.2d, v24.2d\n\t"
"sha512h2 q24, q23, v25.2d\n\t"
"add v27.2d, v27.2d, v31.2d\n\t"
"add v26.2d, v26.2d, v30.2d\n\t"
"add v25.2d, v25.2d, v29.2d\n\t"
"add v24.2d, v24.2d, v28.2d\n\t"
"subs %w[len], %w[len], #0x80\n\t"
"b.ne L_sha512_len_crypto_begin_%=\n\t"
"st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t"
: [sha512] "+r" (sha512), [len] "+r" (len)
: [data] "r" (data), [k] "r" (k)
: "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
"v27", "v28", "v29", "v30", "v31"
);
}
#endif
#endif
#endif
#endif
#endif