#include "../soup/base.hpp"
#include <cstddef>
#include <cstdint>
#if SOUP_X86
#include <smmintrin.h>
#include <wmmintrin.h>
#elif SOUP_ARM
#if SOUP_WINDOWS
#include <intrin.h>
#else
#include <arm_acle.h>
#endif
#endif
NAMESPACE_SOUP
{
namespace intrin
{
#if SOUP_X86
uint32_t crc32_pclmul(const uint8_t* p, size_t size, uint32_t crc) noexcept
{
static const uint64_t
#ifdef _MSC_VER
__declspec(align(16))
#else
__attribute__((aligned(16)))
#endif
s_u[2] = { 0x1DB710641, 0x1F7011641 }, s_k5k0[2] = { 0x163CD6124, 0 }, s_k3k4[2] = { 0x1751997D0, 0xCCAA009E };
__m128i b = _mm_xor_si128(_mm_cvtsi32_si128(~crc), _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));
const __m128i k3k4 = _mm_load_si128(reinterpret_cast<const __m128i*>(s_k3k4));
for (size -= 16, p += 16; size >= 16; size -= 16, p += 16)
b = _mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(b, k3k4, 17), _mm_loadu_si128(reinterpret_cast<const __m128i*>(p))), _mm_clmulepi64_si128(b, k3k4, 0));
const __m128i z = _mm_set_epi32(0, ~0, 0, ~0), u = _mm_load_si128(reinterpret_cast<const __m128i*>(s_u));
b = _mm_xor_si128(_mm_srli_si128(b, 8), _mm_clmulepi64_si128(b, k3k4, 16));
b = _mm_xor_si128(_mm_clmulepi64_si128(_mm_and_si128(b, z), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(s_k5k0)), 0), _mm_srli_si128(b, 4));
return ~_mm_extract_epi32(_mm_xor_si128(b, _mm_clmulepi64_si128(_mm_and_si128(_mm_clmulepi64_si128(_mm_and_si128(b, z), u, 16), z), u, 0)), 1);
}
#elif SOUP_ARM
uint32_t crc32_armv8(const uint8_t* p, size_t size, uint32_t crc) noexcept
{
crc = ~crc;
for (; size >= 8; size -= 8)
{
crc = __crc32d(crc, *reinterpret_cast<const uint64_t*>(p));
p += 8;
}
while (size--)
{
crc = __crc32b(crc, *p++);
}
crc = ~crc;
return crc;
}
#endif
}
}