#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
#include <wolfssl/wolfcrypt/port/riscv/riscv-64-asm.h>
#ifdef WOLFSSL_RISCV_ASM
#if defined(WOLFSSL_SHA3) && !defined(WOLFSSL_XILINX_CRYPT) && \
!defined(WOLFSSL_AFALG_XILINX_SHA3)
#if FIPS_VERSION3_GE(2,0,0)
#define FIPS_NO_WRAPPERS
#ifdef USE_WINDOWS_API
#pragma code_seg(".fipsA$n")
#pragma const_seg(".fipsB$n")
#endif
#endif
#include <wolfssl/wolfcrypt/sha3.h>
static const word64 hash_keccak_r[24] =
{
0x0000000000000001UL, 0x0000000000008082UL,
0x800000000000808aUL, 0x8000000080008000UL,
0x000000000000808bUL, 0x0000000080000001UL,
0x8000000080008081UL, 0x8000000000008009UL,
0x000000000000008aUL, 0x0000000000000088UL,
0x0000000080008009UL, 0x000000008000000aUL,
0x000000008000808bUL, 0x800000000000008bUL,
0x8000000000008089UL, 0x8000000000008003UL,
0x8000000000008002UL, 0x8000000000000080UL,
0x000000000000800aUL, 0x800000008000000aUL,
0x8000000080008081UL, 0x8000000000008080UL,
0x0000000080000001UL, 0x8000000080008008UL
};
#ifndef WOLFSSL_RISCV_VECTOR
#define S0_0 "a1"
#define S0_1 "a2"
#define S0_2 "a3"
#define S0_3 "a4"
#define S0_4 "a5"
#define S1_0 "s1"
#define S1_1 "s2"
#define S1_2 "s3"
#define S1_3 "s4"
#define S1_4 "s5"
#define S2_0 "s6"
#define S2_1 "s7"
#define S2_2 "s8"
#define S2_3 "s9"
#define S2_4 "s10"
#define S3_0 "t0"
#define S3_1 "t1"
#define S3_2 "t2"
#define S3_3 "t3"
#define S3_4 "t4"
#define T_0 "a6"
#define T_1 "a7"
#define T_2 "t5"
#define T_3 "t6"
#define T_4 "s11"
#define SR0_0 REG_A1
#define SR0_1 REG_A2
#define SR0_2 REG_A3
#define SR0_3 REG_A4
#define SR0_4 REG_A5
#define SR1_0 REG_S1
#define SR1_1 REG_S2
#define SR1_2 REG_S3
#define SR1_3 REG_S4
#define SR1_4 REG_S5
#define SR2_0 REG_S6
#define SR2_1 REG_S7
#define SR2_2 REG_S8
#define SR2_3 REG_S9
#define SR2_4 REG_S10
#define SR3_0 REG_T0
#define SR3_1 REG_T1
#define SR3_2 REG_T2
#define SR3_3 REG_T3
#define SR3_4 REG_T4
#define TR_0 REG_A6
#define TR_1 REG_A7
#define TR_2 REG_T5
#define TR_3 REG_T6
#define TR_4 REG_S11
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
#define SWAP_ROTL(t0, tr0, t1, s, sr, rr, rl) \
"mv " t1 ", " s "\n\t" \
"srli " s ", " t0 ", " #rr "\n\t" \
"slli " t0 ", " t0 ", " #rl "\n\t" \
"or " s ", " s ", " t0 "\n\t"
#define SWAP_ROTL_MEM(t0, tr0, t1, t2, s, rr, rl) \
"ld " t1 ", " #s "(%[s])\n\t" \
"srli " t2 ", " t0 ", " #rr "\n\t" \
"slli " t0 ", " t0 ", " #rl "\n\t" \
"or " t0 ", " t0 ", " t2 "\n\t" \
"sd " t0 ", " #s "(%[s])\n\t"
#else
#define SWAP_ROTL(t0, tr0, t1, s, sr, rr, rl) \
"mv " t1 ", " s "\n\t" \
RORI(sr, tr0, rr)
#define SWAP_ROTL_MEM(t0, tr0, t1, t2, s, rr, rl) \
"ld " t1 ", " #s "(%[s])\n\t" \
RORI(tr0, tr0, rr) \
"sd " t0 ", " #s "(%[s])\n\t"
#endif
void BlockSha3(word64* s)
{
const word64* r = hash_keccak_r;
__asm__ __volatile__ (
"addi sp, sp, -24\n\t"
"li " T_4 ", 24\n\t"
"ld " S0_0 ", 0(%[s])\n\t"
"ld " S0_1 ", 8(%[s])\n\t"
"ld " S0_2 ", 16(%[s])\n\t"
"ld " S0_3 ", 24(%[s])\n\t"
"ld " S0_4 ", 32(%[s])\n\t"
"ld " S1_0 ", 40(%[s])\n\t"
"ld " S1_1 ", 48(%[s])\n\t"
"ld " S1_2 ", 56(%[s])\n\t"
"ld " S1_3 ", 64(%[s])\n\t"
"ld " S1_4 ", 72(%[s])\n\t"
"ld " S2_0 ", 80(%[s])\n\t"
"ld " S2_1 ", 88(%[s])\n\t"
"ld " S2_2 ", 96(%[s])\n\t"
"ld " S2_3 ", 104(%[s])\n\t"
"ld " S2_4 ", 112(%[s])\n\t"
"ld " S3_0 ", 120(%[s])\n\t"
"ld " S3_1 ", 128(%[s])\n\t"
"ld " S3_2 ", 136(%[s])\n\t"
"ld " S3_3 ", 144(%[s])\n\t"
"ld " S3_4 ", 152(%[s])\n\t"
"ld " T_0 ", 160(%[s])\n\t"
"ld " T_1 ", 168(%[s])\n\t"
"ld " T_2 ", 176(%[s])\n\t"
"\n"
"L_riscv_64_block_sha3_loop:\n\t"
"sd " T_4 ", 16(sp)\n\t"
"ld " T_3 ", 184(%[s])\n\t"
"ld " T_4 ", 192(%[s])\n\t"
"xor " T_0 ", " T_0 ", " S0_0 "\n\t"
"xor " T_1 ", " T_1 ", " S0_1 "\n\t"
"xor " T_2 ", " T_2 ", " S0_2 "\n\t"
"xor " T_3 ", " T_3 ", " S0_3 "\n\t"
"xor " T_4 ", " T_4 ", " S0_4 "\n\t"
"xor " T_0 ", " T_0 ", " S1_0 "\n\t"
"xor " T_1 ", " T_1 ", " S1_1 "\n\t"
"xor " T_2 ", " T_2 ", " S1_2 "\n\t"
"xor " T_3 ", " T_3 ", " S1_3 "\n\t"
"xor " T_4 ", " T_4 ", " S1_4 "\n\t"
"xor " T_0 ", " T_0 ", " S2_0 "\n\t"
"xor " T_1 ", " T_1 ", " S2_1 "\n\t"
"xor " T_2 ", " T_2 ", " S2_2 "\n\t"
"xor " T_3 ", " T_3 ", " S2_3 "\n\t"
"xor " T_4 ", " T_4 ", " S2_4 "\n\t"
"xor " T_0 ", " T_0 ", " S3_0 "\n\t"
"xor " T_1 ", " T_1 ", " S3_1 "\n\t"
"xor " T_2 ", " T_2 ", " S3_2 "\n\t"
"xor " T_3 ", " T_3 ", " S3_3 "\n\t"
"xor " T_4 ", " T_4 ", " S3_4 "\n\t"
"sd " T_1 ", 0(sp)\n\t"
"sd " T_3 ", 8(sp)\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"srli " T_3 ", " T_1 ", 63\n\t"
"slli " T_1 ", " T_1 ", 1\n\t"
"or " T_1 ", " T_1 ", " T_3 "\n\t"
#else
RORI(TR_1, TR_1, 63)
#endif
"ld " T_3 ", 160(%[s])\n\t"
"xor " T_1 ", " T_1 ", " T_4 "\n\t"
"xor " S0_0 ", " S0_0 ", " T_1 "\n\t"
"xor " S1_0 ", " S1_0 ", " T_1 "\n\t"
"xor " T_3 ", " T_3 ", " T_1 "\n\t"
"xor " S2_0 ", " S2_0 ", " T_1 "\n\t"
"xor " S3_0 ", " S3_0 ", " T_1 "\n\t"
"sd " T_3 ", 160(%[s])\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"srli " T_3 ", " T_2 ", 63\n\t"
"slli " T_1 ", " T_2 ", 1\n\t"
"or " T_1 ", " T_1 ", " T_3 "\n\t"
#else
RORI(TR_1, TR_2, 63)
#endif
"ld " T_3 ", 168(%[s])\n\t"
"xor " T_1 ", " T_1 ", " T_0 "\n\t"
"xor " S0_1 ", " S0_1 ", " T_1 "\n\t"
"xor " S1_1 ", " S1_1 ", " T_1 "\n\t"
"xor " T_3 ", " T_3 ", " T_1 "\n\t"
"xor " S2_1 ", " S2_1 ", " T_1 "\n\t"
"xor " S3_1 ", " S3_1 ", " T_1 "\n\t"
"sd " T_3 ", 168(%[s])\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"srli " T_3 ", " T_4 ", 63\n\t"
"slli " T_4 ", " T_4 ", 1\n\t"
"or " T_4 ", " T_4 ", " T_3 "\n\t"
#else
RORI(TR_4, TR_4, 63)
#endif
"ld " T_3 ", 184(%[s])\n\t"
"xor " T_4 ", " T_4 ", " T_2 "\n\t"
"xor " S0_3 ", " S0_3 ", " T_4 "\n\t"
"xor " S1_3 ", " S1_3 ", " T_4 "\n\t"
"xor " T_3 ", " T_3 ", " T_4 "\n\t"
"xor " S2_3 ", " S2_3 ", " T_4 "\n\t"
"xor " S3_3 ", " S3_3 ", " T_4 "\n\t"
"sd " T_3 ", 184(%[s])\n\t"
"ld " T_3 ", 8(sp)\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"srli " T_2 ", " T_0 ", 63\n\t"
"slli " T_0 ", " T_0 ", 1\n\t"
"or " T_0 ", " T_0 ", " T_2 "\n\t"
#else
RORI(TR_0, TR_0, 63)
#endif
"ld " T_4 ", 192(%[s])\n\t"
"xor " T_0 ", " T_0 ", " T_3 "\n\t"
"xor " S0_4 ", " S0_4 ", " T_0 "\n\t"
"xor " S1_4 ", " S1_4 ", " T_0 "\n\t"
"xor " T_4 ", " T_4 ", " T_0 "\n\t"
"xor " S2_4 ", " S2_4 ", " T_0 "\n\t"
"xor " S3_4 ", " S3_4 ", " T_0 "\n\t"
"ld " T_1 ", 0(sp)\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"srli " T_2 ", " T_3 ", 63\n\t"
"slli " T_3 ", " T_3 ", 1\n\t"
"or " T_3 ", " T_3 ", " T_2 "\n\t"
#else
RORI(TR_3, TR_3, 63)
#endif
"ld " T_2 ", 176(%[s])\n\t"
"xor " T_3 ", " T_3 ", " T_1 "\n\t"
"xor " S0_2 ", " S0_2 ", " T_3 "\n\t"
"xor " S1_2 ", " S1_2 ", " T_3 "\n\t"
"xor " T_2 ", " T_2 ", " T_3 "\n\t"
"xor " S2_2 ", " S2_2 ", " T_3 "\n\t"
"xor " S3_2 ", " S3_2 ", " T_3 "\n\t"
"mv " T_0 ", " S2_0 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"srli " T_1 ", " S0_1 ", 63\n\t"
"slli " S2_0 ", " S0_1 ", 1\n\t"
"or " S2_0 ", " S2_0 ", " T_1 "\n\t"
#else
RORI(SR2_0, SR0_1, 63)
#endif
SWAP_ROTL(T_0, TR_0, T_1, S1_2, SR1_2, 61, 3)
SWAP_ROTL(T_1, TR_1, T_0, S2_1, SR2_1, 58, 6)
SWAP_ROTL(T_0, TR_0, T_1, S3_2, SR3_2, 54, 10)
SWAP_ROTL(T_1, TR_1, T_0, S3_3, SR3_3, 49, 15)
SWAP_ROTL(T_0, TR_0, T_1, S0_3, SR0_3, 43, 21)
SWAP_ROTL(T_1, TR_1, T_0, S1_0, SR1_0, 36, 28)
SWAP_ROTL(T_0, TR_0, T_1, S3_1, SR3_1, 28, 36)
SWAP_ROTL(T_1, TR_1, T_0, S1_3, SR1_3, 19, 45)
SWAP_ROTL_MEM(T_0, TR_0, T_1, T_3, 168, 9, 55)
SWAP_ROTL(T_1, TR_1, T_0, T_4, TR_4, 62, 2)
SWAP_ROTL(T_0, TR_0, T_1, S0_4, SR0_4, 50, 14)
SWAP_ROTL(T_1, TR_1, T_0, S3_0, SR3_0, 37, 27)
SWAP_ROTL_MEM(T_0, TR_0, T_1, T_3, 184, 23, 41)
SWAP_ROTL(T_1, TR_1, T_0, S3_4, SR3_4, 8, 56)
SWAP_ROTL(T_0, TR_0, T_1, S2_3, SR2_3, 56, 8)
SWAP_ROTL(T_1, TR_1, T_0, S2_2, SR2_2, 39, 25)
SWAP_ROTL(T_0, TR_0, T_1, S0_2, SR0_2, 21, 43)
SWAP_ROTL_MEM(T_1, TR_1, T_0, T_3, 160, 2, 62)
SWAP_ROTL(T_0, TR_0, T_1, S2_4, SR2_4, 46, 18)
SWAP_ROTL(T_1, TR_1, T_0, T_2, TR_2, 25, 39)
SWAP_ROTL(T_0, TR_0, T_1, S1_4, SR1_4, 3, 61)
SWAP_ROTL(T_1, TR_1, T_0, S1_1, SR1_1, 44, 20)
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"srli " S0_1 ", " T_0 ", 20\n\t"
"slli " T_0 ", " T_0 ", 44\n\t"
"or " S0_1 ", " S0_1 ", " T_0 "\n\t"
#else
RORI(SR0_1, TR_0, 20)
#endif
"mv " T_0 ", " S0_0 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S0_1 "\n\t"
"and " T_3 ", " T_3 ", " S0_2 "\n\t"
#else
ANDN(TR_3, SR0_2, SR0_1)
#endif
"xor " S0_0 ", " S0_0 ", " T_3 "\n\t"
"mv " T_1 ", " S0_1 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S0_2 "\n\t"
"and " T_3 ", " T_3 ", " S0_3 "\n\t"
#else
ANDN(TR_3, SR0_3, SR0_2)
#endif
"xor " S0_1 ", " S0_1 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S0_3 "\n\t"
"and " T_3 ", " T_3 ", " S0_4 "\n\t"
#else
ANDN(TR_3, SR0_4, SR0_3)
#endif
"xor " S0_2 ", " S0_2 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S0_4 "\n\t"
"and " T_3 ", " T_3 ", " T_0 "\n\t"
#else
ANDN(TR_3, TR_0, SR0_4)
#endif
"xor " S0_3 ", " S0_3 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " T_0 "\n\t"
"and " T_3 ", " T_3 ", " T_1 "\n\t"
#else
ANDN(TR_3, TR_1, TR_0)
#endif
"xor " S0_4 ", " S0_4 ", " T_3 "\n\t"
"mv " T_0 ", " S1_0 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S1_1 "\n\t"
"and " T_3 ", " T_3 ", " S1_2 "\n\t"
#else
ANDN(TR_3, SR1_2, SR1_1)
#endif
"xor " S1_0 ", " S1_0 ", " T_3 "\n\t"
"mv " T_1 ", " S1_1 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S1_2 "\n\t"
"and " T_3 ", " T_3 ", " S1_3 "\n\t"
#else
ANDN(TR_3, SR1_3, SR1_2)
#endif
"xor " S1_1 ", " S1_1 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S1_3 "\n\t"
"and " T_3 ", " T_3 ", " S1_4 "\n\t"
#else
ANDN(TR_3, SR1_4, SR1_3)
#endif
"xor " S1_2 ", " S1_2 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S1_4 "\n\t"
"and " T_3 ", " T_3 ", " T_0 "\n\t"
#else
ANDN(TR_3, TR_0, SR1_4)
#endif
"xor " S1_3 ", " S1_3 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " T_0 "\n\t"
"and " T_3 ", " T_3 ", " T_1 "\n\t"
#else
ANDN(TR_3, TR_1, TR_0)
#endif
"xor " S1_4 ", " S1_4 ", " T_3 "\n\t"
"mv " T_0 ", " S2_0 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S2_1 "\n\t"
"and " T_3 ", " T_3 ", " S2_2 "\n\t"
#else
ANDN(TR_3, SR2_2, SR2_1)
#endif
"xor " S2_0 ", " S2_0 ", " T_3 "\n\t"
"mv " T_1 ", " S2_1 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S2_2 "\n\t"
"and " T_3 ", " T_3 ", " S2_3 "\n\t"
#else
ANDN(TR_3, SR2_3, SR2_2)
#endif
"xor " S2_1 ", " S2_1 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S2_3 "\n\t"
"and " T_3 ", " T_3 ", " S2_4 "\n\t"
#else
ANDN(TR_3, SR2_4, SR2_3)
#endif
"xor " S2_2 ", " S2_2 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S2_4 "\n\t"
"and " T_3 ", " T_3 ", " T_0 "\n\t"
#else
ANDN(TR_3, TR_0, SR2_4)
#endif
"xor " S2_3 ", " S2_3 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " T_0 "\n\t"
"and " T_3 ", " T_3 ", " T_1 "\n\t"
#else
ANDN(TR_3, TR_1, TR_0)
#endif
"xor " S2_4 ", " S2_4 ", " T_3 "\n\t"
"mv " T_0 ", " S3_0 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S3_1 "\n\t"
"and " T_3 ", " T_3 ", " S3_2 "\n\t"
#else
ANDN(TR_3, SR3_2, SR3_1)
#endif
"xor " S3_0 ", " S3_0 ", " T_3 "\n\t"
"mv " T_1 ", " S3_1 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S3_2 "\n\t"
"and " T_3 ", " T_3 ", " S3_3 "\n\t"
#else
ANDN(TR_3, SR3_3, SR3_2)
#endif
"xor " S3_1 ", " S3_1 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S3_3 "\n\t"
"and " T_3 ", " T_3 ", " S3_4 "\n\t"
#else
ANDN(TR_3, SR3_4, SR3_3)
#endif
"xor " S3_2 ", " S3_2 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " S3_4 "\n\t"
"and " T_3 ", " T_3 ", " T_0 "\n\t"
#else
ANDN(TR_3, TR_0, SR3_4)
#endif
"xor " S3_3 ", " S3_3 ", " T_3 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " T_3 ", " T_0 "\n\t"
"and " T_3 ", " T_3 ", " T_1 "\n\t"
#else
ANDN(TR_3, TR_1, TR_0)
#endif
"xor " S3_4 ", " S3_4 ", " T_3 "\n\t"
"sd " S3_0 ", 120(%[s])\n\t"
"sd " S3_1 ", 128(%[s])\n\t"
"sd " S3_2 ", 136(%[s])\n\t"
"ld " T_0 ", 160(%[s])\n\t"
"ld " T_1 ", 168(%[s])\n\t"
"ld " T_3 ", 184(%[s])\n\t"
"mv " S3_0 ", " T_0 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " S3_2 ", " T_1 "\n\t"
"and " S3_2 ", " S3_2 ", " T_2 "\n\t"
#else
ANDN(SR3_2, TR_2, TR_1)
#endif
"xor " T_0 ", " T_0 ", " S3_2 "\n\t"
"mv " S3_1 ", " T_1 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " S3_2 ", " T_2 "\n\t"
"and " S3_2 ", " S3_2 ", " T_3 "\n\t"
#else
ANDN(SR3_2, TR_3, TR_2)
#endif
"xor " T_1 ", " T_1 ", " S3_2 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " S3_2 ", " T_3 "\n\t"
"and " S3_2 ", " S3_2 ", " T_4 "\n\t"
#else
ANDN(SR3_2, TR_4, TR_3)
#endif
"xor " T_2 ", " T_2 ", " S3_2 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " S3_2 ", " T_4 "\n\t"
"and " S3_2 ", " S3_2 ", " S3_0 "\n\t"
#else
ANDN(SR3_2, SR3_0, TR_4)
#endif
"xor " T_3 ", " T_3 ", " S3_2 "\n\t"
#ifndef WOLFSSL_RISCV_BASE_BIT_MANIPULATION
"not " S3_2 ", " S3_0 "\n\t"
"and " S3_2 ", " S3_2 ", " S3_1 "\n\t"
#else
ANDN(SR3_2, SR3_1, SR3_0)
#endif
"xor " T_4 ", " T_4 ", " S3_2 "\n\t"
"ld " S3_0 ", 120(%[s])\n\t"
"ld " S3_1 ", 128(%[s])\n\t"
"ld " S3_2 ", 136(%[s])\n\t"
"sd " T_0 ", 160(%[s])\n\t"
"sd " T_1 ", 168(%[s])\n\t"
"sd " T_2 ", 176(%[s])\n\t"
"sd " T_3 ", 184(%[s])\n\t"
"sd " T_4 ", 192(%[s])\n\t"
"ld " T_4 ", 16(sp)\n\t"
"ld " T_3 ", 0(%[r])\n\t"
"addi %[r], %[r], 8\n\t"
"addi " T_4 ", " T_4 ", -1\n\t"
"xor " S0_0 ", " S0_0 ", " T_3 "\n\t"
"bnez " T_4 ", L_riscv_64_block_sha3_loop\n\t"
"sd " S0_0 ", 0(%[s])\n\t"
"sd " S0_1 ", 8(%[s])\n\t"
"sd " S0_2 ", 16(%[s])\n\t"
"sd " S0_3 ", 24(%[s])\n\t"
"sd " S0_4 ", 32(%[s])\n\t"
"sd " S1_0 ", 40(%[s])\n\t"
"sd " S1_1 ", 48(%[s])\n\t"
"sd " S1_2 ", 56(%[s])\n\t"
"sd " S1_3 ", 64(%[s])\n\t"
"sd " S1_4 ", 72(%[s])\n\t"
"sd " S2_0 ", 80(%[s])\n\t"
"sd " S2_1 ", 88(%[s])\n\t"
"sd " S2_2 ", 96(%[s])\n\t"
"sd " S2_3 ", 104(%[s])\n\t"
"sd " S2_4 ", 112(%[s])\n\t"
"sd " S3_0 ", 120(%[s])\n\t"
"sd " S3_1 ", 128(%[s])\n\t"
"sd " S3_2 ", 136(%[s])\n\t"
"sd " S3_3 ", 144(%[s])\n\t"
"sd " S3_4 ", 152(%[s])\n\t"
"addi sp, sp, 24\n\t"
: [r] "+r" (r)
: [s] "r" (s)
: "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6",
"a1", "a2", "a3", "a4", "a5", "a6", "a7",
"s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11"
);
}
#else
#ifndef WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION
#define COL_MIX(r, b1, b4) \
VSLL_VI(REG_V31, b1, 1) \
VSRL_VX(REG_V30, b1, REG_T1) \
VXOR_VV(REG_V31, REG_V31, b4) \
VXOR_VV(REG_V31, REG_V31, REG_V30) \
VXOR_VV((r + 0), (r + 0), REG_V31) \
VXOR_VV((r + 5), (r + 5), REG_V31) \
VXOR_VV((r + 10), (r + 10), REG_V31) \
VXOR_VV((r + 15), (r + 15), REG_V31) \
VXOR_VV((r + 20), (r + 20), REG_V31)
#define SWAP_ROTL_LO(vr, vt0, vt1, sl) \
VMV_V_V(vt0, vr) \
"li t1, 64 - " #sl "\n\t" \
VSLL_VI(vr, vt1, sl) \
VSRL_VX(vt1, vt1, REG_T1) \
VOR_VV(vr, vr, vt1)
#define SWAP_ROTL_HI(vr, vt0, vt1, sl) \
VMV_V_V(vt0, vr) \
"li t1, " #sl "\n\t" \
VSRL_VI(vr, vt1, (64 - sl)) \
VSLL_VX(vt1, vt1, REG_T1) \
VOR_VV(vr, vr, vt1)
#define ROW_MIX(r) \
VMV_V_V(REG_V25, (r + 0)) \
VMV_V_V(REG_V26, (r + 1)) \
VNOT_V(REG_V30, (r + 1)) \
VNOT_V(REG_V31, (r + 2)) \
VAND_VV(REG_V30, REG_V30, (r + 2)) \
VAND_VV(REG_V31, REG_V31, (r + 3)) \
VXOR_VV((r + 0), REG_V30, (r + 0)) \
VXOR_VV((r + 1), REG_V31, (r + 1)) \
VNOT_V(REG_V30, (r + 3)) \
VNOT_V(REG_V31, (r + 4)) \
VAND_VV(REG_V30, REG_V30, (r + 4)) \
VAND_VV(REG_V31, REG_V31, REG_V25) \
VNOT_V(REG_V25, REG_V25) \
VXOR_VV((r + 2), REG_V30, (r + 2)) \
VAND_VV(REG_V25, REG_V25, REG_V26) \
VXOR_VV((r + 3), REG_V31, (r + 3)) \
VXOR_VV((r + 4), REG_V25, (r + 4))
#else
#define COL_MIX(r, t) \
VXOR_VV((r + 0), (r + 0), t) \
VXOR_VV((r + 5), (r + 5), t) \
VXOR_VV((r + 10), (r + 10), t) \
VXOR_VV((r + 15), (r + 15), t) \
VXOR_VV((r + 20), (r + 20), t)
#define SWAP_ROTL(vr, vt0, vt1, sl) \
VMV_V_V(vt0, vr) \
VROR_VI(vr, (64 - sl), vt1)
#define SWAP_ROTL_LO SWAP_ROTL
#define SWAP_ROTL_HI SWAP_ROTL
#define ROW_MIX(r) \
VMV_V_V(REG_V25, (r + 0)) \
VMV_V_V(REG_V26, (r + 1)) \
VANDN_VV(REG_V30, (r + 1), (r + 2)) \
VANDN_VV(REG_V31, (r + 2), (r + 3)) \
VXOR_VV((r + 0), REG_V30, (r + 0)) \
VXOR_VV((r + 1), REG_V31, (r + 1)) \
VANDN_VV(REG_V30, (r + 3), (r + 4)) \
VANDN_VV(REG_V31, (r + 4), REG_V25) \
VANDN_VV(REG_V25, REG_V25, REG_V26) \
VXOR_VV((r + 2), REG_V30, (r + 2)) \
VXOR_VV((r + 3), REG_V31, (r + 3)) \
VXOR_VV((r + 4), REG_V25, (r + 4))
#endif
void BlockSha3(word64* s)
{
__asm__ __volatile__ (
VSETIVLI(REG_X0, 1, 0, 1, 0b011, 0b000)
"li t2, 24\n\t"
"mv t0, %[r]\n\t"
"mv t1, %[s]\n\t"
VLSEG8E64_V(REG_V0, REG_T1)
"addi t1, %[s], 64\n\t"
VLSEG8E64_V(REG_V8, REG_T1)
"addi t1, %[s], 128\n\t"
VLSEG8E64_V(REG_V16, REG_T1)
"addi t1, %[s], 192\n\t"
VLSEG1E64_V(REG_V24, REG_T1)
"\n"
"L_riscv_64_block_sha3_loop:\n\t"
VXOR_VV(REG_V25, REG_V0, REG_V5)
VXOR_VV(REG_V26, REG_V1, REG_V6)
VXOR_VV(REG_V27, REG_V2, REG_V7)
VXOR_VV(REG_V28, REG_V3, REG_V8)
VXOR_VV(REG_V29, REG_V4, REG_V9)
VXOR_VV(REG_V25, REG_V25, REG_V10)
VXOR_VV(REG_V26, REG_V26, REG_V11)
VXOR_VV(REG_V27, REG_V27, REG_V12)
VXOR_VV(REG_V28, REG_V28, REG_V13)
VXOR_VV(REG_V29, REG_V29, REG_V14)
VXOR_VV(REG_V25, REG_V25, REG_V15)
VXOR_VV(REG_V26, REG_V26, REG_V16)
VXOR_VV(REG_V27, REG_V27, REG_V17)
VXOR_VV(REG_V28, REG_V28, REG_V18)
VXOR_VV(REG_V29, REG_V29, REG_V19)
VXOR_VV(REG_V25, REG_V25, REG_V20)
VXOR_VV(REG_V26, REG_V26, REG_V21)
VXOR_VV(REG_V27, REG_V27, REG_V22)
VXOR_VV(REG_V28, REG_V28, REG_V23)
VXOR_VV(REG_V29, REG_V29, REG_V24)
#ifndef WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION
"li t1, 63\n\t"
COL_MIX(REG_V0, REG_V26, REG_V29)
COL_MIX(REG_V1, REG_V27, REG_V25)
COL_MIX(REG_V2, REG_V28, REG_V26)
COL_MIX(REG_V3, REG_V29, REG_V27)
COL_MIX(REG_V4, REG_V25, REG_V28)
#else
VROR_VI(REG_V30, 63, REG_V26)
VROR_VI(REG_V31, 63, REG_V27)
VXOR_VV(REG_V30, REG_V30, REG_V29)
VXOR_VV(REG_V31, REG_V31, REG_V25)
COL_MIX(REG_V0, REG_V30)
COL_MIX(REG_V1, REG_V31)
VROR_VI(REG_V30, 63, REG_V28)
VROR_VI(REG_V31, 63, REG_V29)
VROR_VI(REG_V25, 63, REG_V25)
VXOR_VV(REG_V30, REG_V30, REG_V26)
VXOR_VV(REG_V31, REG_V31, REG_V27)
VXOR_VV(REG_V25, REG_V25, REG_V28)
COL_MIX(REG_V2, REG_V30)
COL_MIX(REG_V3, REG_V31)
COL_MIX(REG_V4, REG_V25)
#endif
VMV_V_V(REG_V26, REG_V1)
SWAP_ROTL_LO(REG_V10, REG_V25, REG_V26, 1)
SWAP_ROTL_LO(REG_V7 , REG_V26, REG_V25, 3)
SWAP_ROTL_LO(REG_V11, REG_V25, REG_V26, 6)
SWAP_ROTL_LO(REG_V17, REG_V26, REG_V25, 10)
SWAP_ROTL_LO(REG_V18, REG_V25, REG_V26, 15)
SWAP_ROTL_LO(REG_V3 , REG_V26, REG_V25, 21)
SWAP_ROTL_LO(REG_V5 , REG_V25, REG_V26, 28)
SWAP_ROTL_HI(REG_V16, REG_V26, REG_V25, 36)
SWAP_ROTL_HI(REG_V8 , REG_V25, REG_V26, 45)
SWAP_ROTL_HI(REG_V21, REG_V26, REG_V25, 55)
SWAP_ROTL_LO(REG_V24, REG_V25, REG_V26, 2)
SWAP_ROTL_LO(REG_V4 , REG_V26, REG_V25, 14)
SWAP_ROTL_LO(REG_V15, REG_V25, REG_V26, 27)
SWAP_ROTL_HI(REG_V23, REG_V26, REG_V25, 41)
SWAP_ROTL_HI(REG_V19, REG_V25, REG_V26, 56)
SWAP_ROTL_LO(REG_V13, REG_V26, REG_V25, 8)
SWAP_ROTL_LO(REG_V12, REG_V25, REG_V26, 25)
SWAP_ROTL_HI(REG_V2 , REG_V26, REG_V25, 43)
SWAP_ROTL_HI(REG_V20, REG_V25, REG_V26, 62)
SWAP_ROTL_LO(REG_V14, REG_V26, REG_V25, 18)
SWAP_ROTL_HI(REG_V22, REG_V25, REG_V26, 39)
SWAP_ROTL_HI(REG_V9 , REG_V26, REG_V25, 61)
SWAP_ROTL_LO(REG_V6 , REG_V25, REG_V26, 20)
"li t1, 44\n\t"
VSRL_VI(REG_V1, REG_V25, (64 - 44))
VSLL_VX(REG_V25, REG_V25, REG_T1)
VOR_VV(REG_V1, REG_V1, REG_V25)
ROW_MIX(REG_V0)
ROW_MIX(REG_V5)
ROW_MIX(REG_V10)
ROW_MIX(REG_V15)
ROW_MIX(REG_V20)
VL1RE64_V(REG_V25, REG_T0)
"addi t0, t0, 8\n\t"
"addi t2, t2, -1\n\t"
VXOR_VV(REG_V0, REG_V0, REG_V25)
"bnez t2, L_riscv_64_block_sha3_loop\n\t"
"mv t1, %[s]\n\t"
VSSEG8E64_V(REG_V0, REG_T1)
"addi t1, %[s], 64\n\t"
VSSEG8E64_V(REG_V8, REG_T1)
"addi t1, %[s], 128\n\t"
VSSEG8E64_V(REG_V16, REG_T1)
"addi t1, %[s], 192\n\t"
VSSEG1E64_V(REG_V24, REG_T1)
:
: [s] "r" (s), [r] "r" (hash_keccak_r)
: "memory", "t0", "t1", "t2"
);
}
#endif
#endif
#endif