#include "aes.hpp"
#include <cstring>
#include "base.hpp"
#if defined(SOUP_USE_INTRIN) && SOUP_BITS == 64 && (SOUP_X86 || SOUP_ARM)
#define AES_USE_INTRIN true
#else
#define AES_USE_INTRIN false
#endif
#if AES_USE_INTRIN
#include "CpuInfo.hpp"
#endif
#include "Endian.hpp"
#include "plusaes.hpp"
#if SOUP_X86
#define IS_AES_INTRIN_AVAILBLE CpuInfo::get().supportsAESNI()
#else
#define IS_AES_INTRIN_AVAILBLE CpuInfo::get().armv8_aes
#endif
NAMESPACE_SOUP
{
#if AES_USE_INTRIN
namespace intrin
{
extern void aes_expand_key_128(uint8_t w[176], const uint8_t key[16]) noexcept;
extern void aes_expand_key_192(uint8_t w[208], const uint8_t key[24]) noexcept;
extern void aes_expand_key_256(uint8_t w[240], const uint8_t key[32]) noexcept;
extern void aes_encrypt_block_128(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[176]) noexcept;
extern void aes_encrypt_block_192(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[208]) noexcept;
extern void aes_encrypt_block_256(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[240]) noexcept;
extern void aes_prepare_decryption_128(uint8_t w[176]) noexcept;
extern void aes_prepare_decryption_192(uint8_t w[208]) noexcept;
extern void aes_prepare_decryption_256(uint8_t w[240]) noexcept;
extern void aes_decrypt_block_128(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[176]) noexcept;
extern void aes_decrypt_block_192(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[208]) noexcept;
extern void aes_decrypt_block_256(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[240]) noexcept;
}
#endif
static constexpr int Nb = 4;
static constexpr unsigned int blockBytesLen = 4 * Nb * sizeof(uint8_t);
static const uint8_t sbox[16][16] = {
{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76},
{0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0},
{0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15},
{0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75},
{0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84},
{0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf},
{0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8},
{0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2},
{0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73},
{0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb},
{0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79},
{0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08},
{0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a},
{0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e},
{0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf},
{0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}
};
static const uint8_t inv_sbox[16][16] = {
{0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb},
{0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb},
{0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e},
{0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25},
{0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92},
{0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84},
{0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06},
{0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b},
{0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73},
{0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e},
{0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b},
{0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4},
{0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f},
{0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef},
{0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61},
{0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d}
};
static const uint8_t GF_MUL_TABLE[15][256] =
{
{},{},
{
0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
0x80,0x82,0x84,0x86,0x88,0x8a,0x8c,0x8e,0x90,0x92,0x94,0x96,0x98,0x9a,0x9c,0x9e,
0xa0,0xa2,0xa4,0xa6,0xa8,0xaa,0xac,0xae,0xb0,0xb2,0xb4,0xb6,0xb8,0xba,0xbc,0xbe,
0xc0,0xc2,0xc4,0xc6,0xc8,0xca,0xcc,0xce,0xd0,0xd2,0xd4,0xd6,0xd8,0xda,0xdc,0xde,
0xe0,0xe2,0xe4,0xe6,0xe8,0xea,0xec,0xee,0xf0,0xf2,0xf4,0xf6,0xf8,0xfa,0xfc,0xfe,
0x1b,0x19,0x1f,0x1d,0x13,0x11,0x17,0x15,0x0b,0x09,0x0f,0x0d,0x03,0x01,0x07,0x05,
0x3b,0x39,0x3f,0x3d,0x33,0x31,0x37,0x35,0x2b,0x29,0x2f,0x2d,0x23,0x21,0x27,0x25,
0x5b,0x59,0x5f,0x5d,0x53,0x51,0x57,0x55,0x4b,0x49,0x4f,0x4d,0x43,0x41,0x47,0x45,
0x7b,0x79,0x7f,0x7d,0x73,0x71,0x77,0x75,0x6b,0x69,0x6f,0x6d,0x63,0x61,0x67,0x65,
0x9b,0x99,0x9f,0x9d,0x93,0x91,0x97,0x95,0x8b,0x89,0x8f,0x8d,0x83,0x81,0x87,0x85,
0xbb,0xb9,0xbf,0xbd,0xb3,0xb1,0xb7,0xb5,0xab,0xa9,0xaf,0xad,0xa3,0xa1,0xa7,0xa5,
0xdb,0xd9,0xdf,0xdd,0xd3,0xd1,0xd7,0xd5,0xcb,0xc9,0xcf,0xcd,0xc3,0xc1,0xc7,0xc5,
0xfb,0xf9,0xff,0xfd,0xf3,0xf1,0xf7,0xf5,0xeb,0xe9,0xef,0xed,0xe3,0xe1,0xe7,0xe5
},
{
0x00,0x03,0x06,0x05,0x0c,0x0f,0x0a,0x09,0x18,0x1b,0x1e,0x1d,0x14,0x17,0x12,0x11,
0x30,0x33,0x36,0x35,0x3c,0x3f,0x3a,0x39,0x28,0x2b,0x2e,0x2d,0x24,0x27,0x22,0x21,
0x60,0x63,0x66,0x65,0x6c,0x6f,0x6a,0x69,0x78,0x7b,0x7e,0x7d,0x74,0x77,0x72,0x71,
0x50,0x53,0x56,0x55,0x5c,0x5f,0x5a,0x59,0x48,0x4b,0x4e,0x4d,0x44,0x47,0x42,0x41,
0xc0,0xc3,0xc6,0xc5,0xcc,0xcf,0xca,0xc9,0xd8,0xdb,0xde,0xdd,0xd4,0xd7,0xd2,0xd1,
0xf0,0xf3,0xf6,0xf5,0xfc,0xff,0xfa,0xf9,0xe8,0xeb,0xee,0xed,0xe4,0xe7,0xe2,0xe1,
0xa0,0xa3,0xa6,0xa5,0xac,0xaf,0xaa,0xa9,0xb8,0xbb,0xbe,0xbd,0xb4,0xb7,0xb2,0xb1,
0x90,0x93,0x96,0x95,0x9c,0x9f,0x9a,0x99,0x88,0x8b,0x8e,0x8d,0x84,0x87,0x82,0x81,
0x9b,0x98,0x9d,0x9e,0x97,0x94,0x91,0x92,0x83,0x80,0x85,0x86,0x8f,0x8c,0x89,0x8a,
0xab,0xa8,0xad,0xae,0xa7,0xa4,0xa1,0xa2,0xb3,0xb0,0xb5,0xb6,0xbf,0xbc,0xb9,0xba,
0xfb,0xf8,0xfd,0xfe,0xf7,0xf4,0xf1,0xf2,0xe3,0xe0,0xe5,0xe6,0xef,0xec,0xe9,0xea,
0xcb,0xc8,0xcd,0xce,0xc7,0xc4,0xc1,0xc2,0xd3,0xd0,0xd5,0xd6,0xdf,0xdc,0xd9,0xda,
0x5b,0x58,0x5d,0x5e,0x57,0x54,0x51,0x52,0x43,0x40,0x45,0x46,0x4f,0x4c,0x49,0x4a,
0x6b,0x68,0x6d,0x6e,0x67,0x64,0x61,0x62,0x73,0x70,0x75,0x76,0x7f,0x7c,0x79,0x7a,
0x3b,0x38,0x3d,0x3e,0x37,0x34,0x31,0x32,0x23,0x20,0x25,0x26,0x2f,0x2c,0x29,0x2a,
0x0b,0x08,0x0d,0x0e,0x07,0x04,0x01,0x02,0x13,0x10,0x15,0x16,0x1f,0x1c,0x19,0x1a
},
{}, {}, {}, {}, {},
{
0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f,0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77,
0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf,0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7,
0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04,0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c,
0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94,0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc,
0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49,0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01,
0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9,0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91,
0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72,0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a,
0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2,0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa,
0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3,0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b,
0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43,0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b,
0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8,0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0,
0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78,0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30,
0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5,0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed,
0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35,0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d,
0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e,0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6,
0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e,0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46
},
{},
{
0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31,0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69,
0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81,0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9,
0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a,0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12,
0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa,0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2,
0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7,0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f,
0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77,0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f,
0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc,0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4,
0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c,0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54,
0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6,0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e,
0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76,0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e,
0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd,0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5,
0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d,0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55,
0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30,0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68,
0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80,0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8,
0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b,0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13,
0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb,0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3
},
{},
{
0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23,0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b,
0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3,0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b,
0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98,0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0,
0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48,0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20,
0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e,0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26,
0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e,0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6,
0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5,0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d,
0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25,0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d,
0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9,0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91,
0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29,0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41,
0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42,0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a,
0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92,0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa,
0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94,0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc,
0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44,0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c,
0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f,0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47,
0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff,0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97
},
{
0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a,0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a,
0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca,0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba,
0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1,0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81,
0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11,0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61,
0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87,0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7,
0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67,0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17,
0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c,0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c,
0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc,0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc,
0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b,0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b,
0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b,0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb,
0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0,0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0,
0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50,0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20,
0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6,0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6,
0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26,0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56,
0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d,0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d,
0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd,0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d
}
};
static const uint8_t CMDS[4][4] =
{
{2,3,1,1},
{1,2,3,1},
{1,1,2,3},
{3,1,1,2}
};
static const uint8_t INV_CMDS[4][4] =
{
{14,11,13,9},
{9,14,11,13},
{13,9,14,11},
{11,13,9,14}
};
void aes::pkcs7Pad(std::string& encrypted) noexcept
{
auto next_aligned_size = ((encrypted.size() / 16) + 1) * 16;
auto pad_size = (next_aligned_size - encrypted.size());
encrypted.append(pad_size, (char)pad_size);
}
bool aes::pkcs7Unpad(std::string& decrypted) noexcept
{
const auto pad_size = (char)decrypted.back();
SOUP_IF_UNLIKELY (pad_size < 1 || pad_size > 16)
{
return false;
}
for (auto i = pad_size; i; --i)
{
SOUP_IF_UNLIKELY (decrypted.back() != pad_size)
{
return false;
}
decrypted.pop_back();
}
return true;
}
void aes::cbcEncrypt(uint8_t* data, size_t data_len, const uint8_t* key, size_t key_len, const uint8_t iv[16]) noexcept
{
data_len -= (data_len % blockBytesLen);
SOUP_IF_LIKELY (data_len != 0)
{
const auto Nr = getNrFromKeyLen(key_len);
alignas(16) uint8_t roundKeys[240];
expandKey(roundKeys, key, key_len);
xorBlocks(&data[0], iv);
encryptBlock(&data[0], &data[0], roundKeys, Nr);
for (size_t i = blockBytesLen; i != data_len; i += blockBytesLen)
{
xorBlocks(&data[i], &data[i - blockBytesLen]);
encryptBlock(&data[i], &data[i], roundKeys, Nr);
}
}
}
void aes::cbcDecrypt(uint8_t* data, size_t data_len, const uint8_t* key, size_t key_len, const uint8_t iv[16]) noexcept
{
data_len -= (data_len % blockBytesLen);
const auto Nr = getNrFromKeyLen(key_len);
alignas(16) uint8_t roundKeys[240];
expandKeyForDecryption(roundKeys, key, key_len);
uint8_t block_heap_a[blockBytesLen];
uint8_t block_heap_b[blockBytesLen];
uint8_t* last_block = block_heap_a;
memcpy(last_block, iv, blockBytesLen);
uint8_t* this_block = block_heap_b;
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
memcpy(this_block, &data[i], blockBytesLen);
decryptBlock(&data[i], &data[i], roundKeys, Nr);
xorBlocks(&data[i], last_block);
std::swap(this_block, last_block);
}
}
void aes::cfbEncrypt(uint8_t* data, size_t data_len, const uint8_t* key, size_t key_len, const uint8_t iv[16]) noexcept
{
data_len -= (data_len % blockBytesLen);
uint8_t block[blockBytesLen]{};
uint8_t encryptedBlock[blockBytesLen]{};
const auto Nr = getNrFromKeyLen(key_len);
alignas(16) uint8_t roundKeys[240];
expandKey(roundKeys, key, key_len);
memcpy(block, iv, blockBytesLen);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
encryptBlock(block, encryptedBlock, roundKeys, Nr);
xorBlocks(&data[i], encryptedBlock);
memcpy(block, &data[i], blockBytesLen);
}
}
void aes::cfbDecrypt(uint8_t* data, size_t data_len, const uint8_t* key, size_t key_len, const uint8_t iv[16]) noexcept
{
return cfbEncrypt(data, data_len, key, key_len, iv);
}
void aes::ecbEncrypt(uint8_t* data, size_t data_len, const uint8_t* key, size_t key_len) noexcept
{
data_len -= (data_len % blockBytesLen);
alignas(16) uint8_t roundKeys[240];
#if AES_USE_INTRIN
#if SOUP_X86
if (IS_AES_INTRIN_AVAILBLE)
{
if (key_len == 16)
{
intrin::aes_expand_key_128(roundKeys, key);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
intrin::aes_encrypt_block_128(&data[i], &data[i], roundKeys);
}
return;
}
else if (key_len == 24)
{
intrin::aes_expand_key_192(roundKeys, key);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
intrin::aes_encrypt_block_192(&data[i], &data[i], roundKeys);
}
return;
}
else if (key_len == 32)
{
intrin::aes_expand_key_256(roundKeys, key);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
intrin::aes_encrypt_block_256(&data[i], &data[i], roundKeys);
}
return;
}
}
#endif
#endif
expandKey(roundKeys, key, key_len);
const auto Nr = getNrFromKeyLen(key_len);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
encryptBlock(&data[i], &data[i], roundKeys, Nr);
}
}
void aes::ecbDecrypt(uint8_t* data, size_t data_len, const uint8_t* key, size_t key_len) noexcept
{
data_len -= (data_len % blockBytesLen);
alignas(16) uint8_t roundKeys[240];
#if AES_USE_INTRIN
#if SOUP_X86
if (IS_AES_INTRIN_AVAILBLE)
{
if (key_len == 16)
{
intrin::aes_expand_key_128(roundKeys, key);
intrin::aes_prepare_decryption_128(roundKeys);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
intrin::aes_decrypt_block_128(&data[i], &data[i], roundKeys);
}
return;
}
else if (key_len == 24)
{
intrin::aes_expand_key_192(roundKeys, key);
intrin::aes_prepare_decryption_192(roundKeys);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
intrin::aes_decrypt_block_192(&data[i], &data[i], roundKeys);
}
return;
}
else if (key_len == 32)
{
intrin::aes_expand_key_256(roundKeys, key);
intrin::aes_prepare_decryption_256(roundKeys);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
intrin::aes_decrypt_block_256(&data[i], &data[i], roundKeys);
}
return;
}
}
#endif
#endif
expandKeyForDecryption(roundKeys, key, key_len);
const auto Nr = getNrFromKeyLen(key_len);
for (size_t i = 0; i != data_len; i += blockBytesLen)
{
decryptBlock(&data[i], &data[i], roundKeys, Nr);
}
}
void aes::gcmEncrypt(uint8_t* data, size_t data_len, const uint8_t* aadata, size_t aadata_len, const uint8_t* key, size_t key_len, const uint8_t* iv, size_t iv_len, uint8_t tag[16]) SOUP_EXCAL
{
const auto Nr = getNrFromKeyLen(key_len);
alignas(16) uint8_t roundKeys[240];
aes::expandKey(roundKeys, key, key_len);
uint8_t h[16];
calcH(h, roundKeys, Nr);
uint8_t j0[16];
calcJ0(j0, h, iv, iv_len);
{
uint8_t icb[16];
memcpy(icb, j0, 16);
inc32(icb);
gctr(data, data_len, roundKeys, Nr, icb);
}
calcGcmTag(tag, data, data_len, aadata, aadata_len, roundKeys, Nr, h, j0);
}
bool aes::gcmDecrypt(uint8_t* data, size_t data_len, const uint8_t* aadata, size_t aadata_len, const uint8_t* key, size_t key_len, const uint8_t* iv, size_t iv_len, const uint8_t tag[16]) SOUP_EXCAL
{
const auto Nr = getNrFromKeyLen(key_len);
alignas(16) uint8_t roundKeys[240];
aes::expandKey(roundKeys, key, key_len);
uint8_t h[16];
calcH(h, roundKeys, Nr);
uint8_t j0[16];
calcJ0(j0, h, iv, iv_len);
uint8_t ctag[16];
calcGcmTag(ctag, data, data_len, aadata, aadata_len, roundKeys, Nr, h, j0);
if (memcmp(ctag, tag, 16) != 0)
{
return false;
}
{
uint8_t icb[16];
memcpy(icb, j0, 16);
inc32(icb);
gctr(data, data_len, roundKeys, Nr, icb);
}
return true;
}
void aes::encryptBlock(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[240], const int Nr) noexcept
{
#if AES_USE_INTRIN
if (IS_AES_INTRIN_AVAILBLE)
{
if (Nr == 10)
{
return intrin::aes_encrypt_block_128(in, out, roundKeys);
}
else if (Nr == 12)
{
return intrin::aes_encrypt_block_192(in, out, roundKeys);
}
else if (Nr == 14)
{
return intrin::aes_encrypt_block_256(in, out, roundKeys);
}
}
#endif
uint8_t state_0[4 * Nb];
uint8_t* state[4];
state[0] = state_0;
int i, j, round;
for (i = 0; i < 4; i++)
{
state[i] = state[0] + Nb * i;
}
for (i = 0; i < 4; i++)
{
for (j = 0; j < Nb; j++)
{
state[i][j] = in[i + 4 * j];
}
}
addRoundKey(state, roundKeys);
for (round = 1; round <= Nr - 1; round++)
{
subBytes(state);
shiftRows(state);
mixColumns(state);
addRoundKey(state, roundKeys + round * 4 * Nb);
}
subBytes(state);
shiftRows(state);
addRoundKey(state, roundKeys + Nr * 4 * Nb);
for (i = 0; i < 4; i++)
{
for (j = 0; j < Nb; j++)
{
out[i + 4 * j] = state[i][j];
}
}
}
void aes::decryptBlock(const uint8_t in[16], uint8_t out[16], const uint8_t roundKeys[240], const int Nr) noexcept
{
#if AES_USE_INTRIN
if (IS_AES_INTRIN_AVAILBLE)
{
if (Nr == 10)
{
return intrin::aes_decrypt_block_128(in, out, roundKeys);
}
else if (Nr == 12)
{
return intrin::aes_decrypt_block_192(in, out, roundKeys);
}
else if (Nr == 14)
{
return intrin::aes_decrypt_block_256(in, out, roundKeys);
}
}
#endif
uint8_t state_0[4 * Nb];
uint8_t* state[4];
state[0] = state_0;
int i, j, round;
for (i = 0; i < 4; i++)
{
state[i] = state[0] + Nb * i;
}
for (i = 0; i < 4; i++)
{
for (j = 0; j < Nb; j++) {
state[i][j] = in[i + 4 * j];
}
}
addRoundKey(state, roundKeys + Nr * 4 * Nb);
for (round = Nr - 1; round >= 1; round--)
{
invSubBytes(state);
invShiftRows(state);
addRoundKey(state, roundKeys + round * 4 * Nb);
invMixColumns(state);
}
invSubBytes(state);
invShiftRows(state);
addRoundKey(state, roundKeys);
for (i = 0; i < 4; i++)
{
for (j = 0; j < Nb; j++) {
out[i + 4 * j] = state[i][j];
}
}
}
void aes::expandKey(uint8_t w[240], const uint8_t* key, size_t key_len) noexcept
{
#if AES_USE_INTRIN
#if SOUP_X86
if (IS_AES_INTRIN_AVAILBLE)
{
if (key_len == 16)
{
return intrin::aes_expand_key_128(w, key);
}
else if (key_len == 24)
{
return intrin::aes_expand_key_192(w, key);
}
else if (key_len == 32)
{
return intrin::aes_expand_key_256(w, key);
}
}
#endif
#endif
const auto Nk = getNk(key_len);
const auto Nr = getNrFromNk(Nk);
SOUP_ASSUME((4 * Nb * (Nr + 1)) <= 240);
uint8_t temp[4];
uint8_t rcon[4]{};
int i = 0;
while (i < 4 * Nk)
{
w[i] = key[i];
i++;
}
i = 4 * Nk;
while (i < 4 * Nb * (Nr + 1))
{
temp[0] = w[i - 4 + 0];
temp[1] = w[i - 4 + 1];
temp[2] = w[i - 4 + 2];
temp[3] = w[i - 4 + 3];
if (i / 4 % Nk == 0)
{
rotWord(temp);
subWord(temp);
rcon[0] = getRoundConstant(i / (Nk * 4));
xorWords(temp, rcon, temp);
}
else if (Nk > 6 && i / 4 % Nk == 4)
{
subWord(temp);
}
w[i + 0] = w[i - 4 * Nk] ^ temp[0];
w[i + 1] = w[i + 1 - 4 * Nk] ^ temp[1];
w[i + 2] = w[i + 2 - 4 * Nk] ^ temp[2];
w[i + 3] = w[i + 3 - 4 * Nk] ^ temp[3];
i += 4;
}
}
void aes::expandKeyForDecryption(uint8_t w[240], const uint8_t* key, size_t key_len) noexcept
{
expandKey(w, key, key_len);
#if AES_USE_INTRIN
if (IS_AES_INTRIN_AVAILBLE)
{
if (key_len == 16)
{
intrin::aes_prepare_decryption_128(w);
}
else if (key_len == 24)
{
return intrin::aes_prepare_decryption_192(w);
}
else if (key_len == 32)
{
return intrin::aes_prepare_decryption_256(w);
}
}
#endif
}
int aes::getNk(size_t key_len) noexcept
{
return static_cast<int>(key_len / 4);
}
int aes::getNrFromKeyLen(size_t key_len) noexcept
{
return getNrFromNk(getNk(key_len));
}
int aes::getNrFromNk(const int Nk) noexcept
{
return Nk + 6;
}
void aes::subBytes(uint8_t** state) noexcept
{
int i, j;
uint8_t t;
for (i = 0; i < 4; i++)
{
for (j = 0; j < Nb; j++)
{
t = state[i][j];
state[i][j] = sbox[t / 16][t % 16];
}
}
}
void aes::shiftRow(uint8_t** state, int i, int n) noexcept {
uint8_t tmp[Nb];
for (int j = 0; j < Nb; j++)
{
tmp[j] = state[i][(j + n) % Nb];
}
memcpy(state[i], tmp, Nb * sizeof(uint8_t));
}
void aes::shiftRows(uint8_t** state) noexcept
{
shiftRow(state, 1, 1);
shiftRow(state, 2, 2);
shiftRow(state, 3, 3);
}
uint8_t aes::xtime(uint8_t b) noexcept {
return (b << 1) ^ (((b >> 7) & 1) * 0x1b);
}
void aes::mixColumns(uint8_t** state) noexcept
{
uint8_t temp_state[4][4];
for (size_t i = 0; i < 4; ++i)
{
memset(temp_state[i], 0, 4);
}
for (size_t i = 0; i < 4; ++i)
{
for (size_t k = 0; k < 4; ++k)
{
for (size_t j = 0; j < 4; ++j)
{
if (CMDS[i][k] == 1)
temp_state[i][j] ^= state[k][j];
else
temp_state[i][j] ^= GF_MUL_TABLE[CMDS[i][k]][state[k][j]];
}
}
}
for (size_t i = 0; i < 4; ++i)
{
memcpy(state[i], temp_state[i], 4);
}
}
void aes::addRoundKey(uint8_t** state, const uint8_t* key) noexcept
{
int i, j;
for (i = 0; i < 4; i++)
{
for (j = 0; j < Nb; j++)
{
state[i][j] = state[i][j] ^ key[i + 4 * j];
}
}
}
void aes::subWord(uint8_t* a) noexcept
{
int i;
for (i = 0; i < 4; i++)
{
a[i] = sbox[a[i] / 16][a[i] % 16];
}
}
void aes::rotWord(uint8_t* a) noexcept
{
uint8_t c = a[0];
a[0] = a[1];
a[1] = a[2];
a[2] = a[3];
a[3] = c;
}
void aes::xorWords(uint8_t* a, uint8_t* b, uint8_t* c) noexcept
{
int i;
for (i = 0; i < 4; i++)
{
c[i] = a[i] ^ b[i];
}
}
uint8_t aes::getRoundConstant(int n) noexcept
{
uint8_t c = 1;
for (int i = 0; i < n - 1; i++)
{
c = xtime(c);
}
return c;
}
void aes::invSubBytes(uint8_t** state) noexcept
{
int i, j;
uint8_t t;
for (i = 0; i < 4; i++)
{
for (j = 0; j < Nb; j++)
{
t = state[i][j];
state[i][j] = inv_sbox[t / 16][t % 16];
}
}
}
void aes::invMixColumns(uint8_t** state) noexcept
{
uint8_t temp_state[4][4];
for (size_t i = 0; i < 4; ++i)
{
memset(temp_state[i], 0, 4);
}
for (size_t i = 0; i < 4; ++i)
{
for (size_t k = 0; k < 4; ++k)
{
for (size_t j = 0; j < 4; ++j)
{
temp_state[i][j] ^= GF_MUL_TABLE[INV_CMDS[i][k]][state[k][j]];
}
}
}
for (size_t i = 0; i < 4; ++i)
{
memcpy(state[i], temp_state[i], 4);
}
}
void aes::invShiftRows(uint8_t** state) noexcept
{
shiftRow(state, 1, Nb - 1);
shiftRow(state, 2, Nb - 2);
shiftRow(state, 3, Nb - 3);
}
void aes::xorBlocks(uint8_t a[16], const uint8_t b[16]) noexcept
{
#if SOUP_BITS == 64
reinterpret_cast<uint64_t*>(a)[0] ^= reinterpret_cast<const uint64_t*>(b)[0];
reinterpret_cast<uint64_t*>(a)[1] ^= reinterpret_cast<const uint64_t*>(b)[1];
#else
for (unsigned int i = 0; i != 16; ++i)
{
a[i] ^= b[i];
}
#endif
}
void aes::xorBlocks(uint8_t a[], const uint8_t b[], unsigned int len) noexcept
{
for (unsigned int i = 0; i != len; ++i)
{
a[i] ^= b[i];
}
}
void aes::ghash(uint8_t res[16], const uint8_t h[16], const std::vector<uint8_t>& x) noexcept
{
plusaes::detail::gcm::Block bH(h, 16);
auto bRes = plusaes::detail::gcm::ghash(bH, x);
memcpy(res, bRes.data(), 16);
}
void aes::calcH(uint8_t h[16], uint8_t roundKeys[240], const int Nr) noexcept
{
memset(h, 0, 16);
aes::encryptBlock(h, h, roundKeys, Nr);
}
void aes::calcJ0(uint8_t j0[16], const uint8_t h[16], const uint8_t* iv, size_t iv_len) SOUP_EXCAL
{
if (iv_len == 12)
{
memcpy(j0, iv, iv_len);
j0[12] = 0;
j0[13] = 0;
j0[14] = 0;
j0[15] = 1;
}
else
{
const auto len_iv = iv_len * 8;
const auto s = 128 * plusaes::detail::gcm::ceil(len_iv / 128.0) - len_iv;
std::vector<uint8_t> ghash_in;
ghash_in.reserve(32);
plusaes::detail::gcm::push_back(ghash_in, iv, iv_len);
plusaes::detail::gcm::push_back_zero_bits(ghash_in, s + 64);
plusaes::detail::gcm::push_back(ghash_in, std::bitset<64>(len_iv));
return ghash(j0, h, ghash_in);
}
}
void aes::inc32(uint8_t block[16]) noexcept
{
uint32_t counter = reinterpret_cast<uint32_t*>(block)[3];
if constexpr (ENDIAN_NATIVE != ENDIAN_BIG)
{
counter = Endianness::invert(counter);
}
++counter;
if constexpr (ENDIAN_NATIVE != ENDIAN_BIG)
{
counter = Endianness::invert(counter);
}
reinterpret_cast<uint32_t*>(block)[3] = counter;
}
void aes::gctr(uint8_t* data, size_t data_len, const uint8_t roundKeys[240], const int Nr, const uint8_t icb[8]) noexcept
{
uint8_t cb[16];
memcpy(cb, icb, 16);
uint8_t ecb[16];
size_t i = 0;
for (; i < data_len - (data_len % 16); i += 16)
{
encryptBlock(cb, ecb, roundKeys, Nr);
xorBlocks(&data[i], ecb);
inc32(cb);
}
if (data_len % 16)
{
encryptBlock(cb, ecb, roundKeys, Nr);
xorBlocks(&data[i], ecb, data_len % 16);
}
}
void aes::calcGcmTag(uint8_t tag[16], uint8_t* data, size_t data_len, const uint8_t* aadata, size_t aadata_len, const uint8_t roundKeys[16], const int Nr, const uint8_t h[16], const uint8_t j0[16]) SOUP_EXCAL
{
const auto lenC = data_len * 8;
const auto lenA = aadata_len * 8;
const std::size_t u = 128 * plusaes::detail::gcm::ceil(lenC / 128.0) - lenC;
const std::size_t v = 128 * plusaes::detail::gcm::ceil(lenA / 128.0) - lenA;
std::vector<unsigned char> ghash_in;
ghash_in.reserve((aadata_len + v / 8) + (data_len + u / 8) + 8 + 8);
plusaes::detail::gcm::push_back(ghash_in, aadata, aadata_len);
plusaes::detail::gcm::push_back_zero_bits(ghash_in, v);
plusaes::detail::gcm::push_back(ghash_in, data, data_len);
plusaes::detail::gcm::push_back_zero_bits(ghash_in, u);
plusaes::detail::gcm::push_back(ghash_in, std::bitset<64>(lenA));
plusaes::detail::gcm::push_back(ghash_in, std::bitset<64>(lenC));
ghash(tag, h, ghash_in);
gctr(tag, 16, roundKeys, Nr, j0);
}
}