#if defined(_MSC_VER)
#include <intrin.h>
#endif
#if defined(__i386) || defined(_M_IX86)
#define ARCH_X86
#elif defined(__x86_64__) || defined(_M_X64)
#define ARCH_X64
#elif (defined(__arm__) && defined(__ARM_ARCH) && __ARM_ARCH >= 5) || (defined(_M_ARM) && _M_ARM >= 5) || defined(__ARM_FEATURE_CLZ)
#define ARCH_ARM
#endif
#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
#define ARCH_64BIT
#else
#define ARCH_32BIT
#endif
#if defined(ARCH_X86) || defined(ARCH_X64)
#if defined(_MSC_VER) && _MSC_VER >= 1500
#define HAS_LZCNT32_HARD
#if defined(ARCH_64BIT)
#define HAS_LZCNT64_HARD
#endif
#elif defined(__GNUC__) || defined(__clang__)
#define HAS_LZCNT32_HARD
#if defined(ARCH_64BIT)
#define HAS_LZCNT64_HARD
#endif
#endif
#elif defined(ARCH_ARM)
#if defined(__GNUC__) || defined(__clang__)
#define HAS_LZCNT32_HARD
#if defined(ARCH_64BIT)
#define HAS_LZCNT64_HARD
#endif
#endif
#endif
#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(ARCH_X86) || defined(ARCH_X64)) && !defined(__clang__)
#define HAS_LZCNT_INTRINSIC
#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
#define HAS_LZCNT_INTRINSIC
#elif defined(__clang__)
#if defined(__has_builtin)
#if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
#define HAS_LZCNT_INTRINSIC
#endif
#endif
#endif
unsigned int lzcnt32_generic(unsigned int x)
{
unsigned int n;
static unsigned int clz_table_4[] = {
0,
4,
3, 3,
2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1
};
if (x == 0) {
return sizeof(x)*8;
}
n = clz_table_4[x >> (sizeof(x)*8 - 4)];
if (n == 0) {
if ((x & 0xFFFF0000) == 0) { n = 16; x <<= 16; }
if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; }
if ((x & 0xF0000000) == 0) { n += 4; x <<= 4; }
n += clz_table_4[x >> (sizeof(x)*8 - 4)];
}
return n - 1;
}
unsigned int lzcnt64_generic(unsigned long long x)
{
unsigned int n;
static unsigned int clz_table_4[] = {
0,
4,
3, 3,
2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1
};
if (x == 0) {
return sizeof(x)*8;
}
n = clz_table_4[x >> (sizeof(x)*8 - 4)];
if (n == 0) {
if ((x & ((unsigned long long)0xFFFFFFFF << 32)) == 0) { n = 32; x <<= 32; }
if ((x & ((unsigned long long)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
if ((x & ((unsigned long long)0xFF000000 << 32)) == 0) { n += 8; x <<= 8; }
if ((x & ((unsigned long long)0xF0000000 << 32)) == 0) { n += 4; x <<= 4; }
n += clz_table_4[x >> (sizeof(x)*8 - 4)];
}
return n - 1;
}
#if defined(_MSC_VER)
unsigned int lzcnt32_msvc_bsr(unsigned int x)
{
unsigned long n;
if (x == 0) {
return 32;
}
_BitScanReverse(&n, x);
return 32 - n - 1;
}
#if defined(ARCH_64BIT)
unsigned int lzcnt64_msvc_bsr(unsigned long long x)
{
unsigned long n;
if (x == 0) {
return 64;
}
_BitScanReverse64(&n, x);
return 64 - n - 1;
}
#endif
#elif (defined(__GNUC__) || defined(__clang__)) && defined(HAS_LZCNT_INTRINSIC)
unsigned int lzcnt32_gcc_builtin(unsigned int x)
{
if (x == 0) {
return 32;
}
return (unsigned int)__builtin_clzl((unsigned long)x);
}
unsigned int lzcnt64_gcc_builtin(unsigned long long x)
{
if (x == 0) {
return 64;
}
return (unsigned int)__builtin_clzll(x);
}
#endif
int has_lzcnt_hard()
{
#if defined(ARCH_X86) || defined(ARCH_X64)
int info[4] = {0};
#if defined(_MSC_VER)
__cpuid(info, 0x80000001);
#elif defined(__GNUC__) || defined(__clang__)
#if defined(ARCH_X86) && defined(__PIC__)
__asm__ __volatile__ (
"xchg{l} {%%}ebx, %k1;"
"cpuid;"
"xchg{l} {%%}ebx, %k1;"
: "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(0x80000001), "c"(0)
);
#else
__asm__ __volatile__ (
"cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(0x80000001), "c"(0)
);
#endif
#endif
return (info[2] & (1 << 5)) != 0;
#elif defined(ARCH_ARM)
return 1;
#else
return 0;
#endif
}
#if defined(HAS_LZCNT32_HARD)
#if defined(ARCH_X86) || defined(ARCH_X64)
#if defined(_MSC_VER) && !defined(__clang__)
unsigned int lzcnt32_msvc_x86(unsigned int x)
{
return __lzcnt(x);
}
#elif defined(__GNUC__) || defined(__clang__)
unsigned int lzcnt32_gcc_x86(unsigned int x)
{
unsigned int r;
__asm__ __volatile__ (
"lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x)
);
return r;
}
#endif
#endif
#if defined(ARCH_ARM)
#if defined(__GNUC__) || defined(__clang__)
unsigned int lzcnt32_gcc_arm(unsigned int x)
{
unsigned int r;
__asm__ __volatile__ (
#if defined(ARCH_32BIT)
"clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
#else
"clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)
#endif
);
return r;
}
#endif
#endif
unsigned int lzcnt32_hard(unsigned int x)
{
#if defined(ARCH_X86) || defined(ARCH_X64)
#if defined(_MSC_VER) && !defined(__clang__)
return lzcnt32_msvc_x86(x);
#elif defined(__GNUC__) || defined(__clang__)
return lzcnt32_gcc_x86(x);
#else
#error "This compiler does not support the lzcnt intrinsic."
#endif
#elif defined(ARCH_ARM)
#if defined(__GNUC__) || defined(__clang__)
return lzcnt32_gcc_arm(x);
#else
#error "This compiler does not support the clz intrinsic."
#endif
#else
#error "The build target does not support a native instruction."
#endif
}
#endif
#if defined(HAS_LZCNT64_HARD)
#if defined(ARCH_X86) || defined(ARCH_X64)
#if defined(_MSC_VER) && !defined(__clang__)
unsigned int lzcnt64_msvc_x64(unsigned long long x)
{
return (unsigned int)__lzcnt64(x);
}
#elif defined(__GNUC__) || defined(__clang__)
unsigned int lzcnt64_gcc_x64(unsigned long long x)
{
unsigned long long r;
__asm__ __volatile__ (
"lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x)
);
return r;
}
#endif
#endif
#if defined(ARCH_ARM)
#if defined(__GNUC__) || defined(__clang__)
unsigned int lzcnt64_gcc_arm(unsigned long long x)
{
unsigned long long r;
__asm__ __volatile__ (
"clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
);
return r;
}
#endif
#endif
unsigned int lzcnt64_hard(unsigned long long x)
{
#if defined(ARCH_X64)
#if defined(_MSC_VER) && !defined(__clang__)
return lzcnt64_msvc_x64(x);
#elif defined(__GNUC__) || defined(__clang__)
return lzcnt64_gcc_x64(x);
#else
#error "This compiler does not support the lzcnt intrinsic."
#endif
#elif defined(ARCH_ARM) && defined(ARCH_64BIT)
#if defined(__GNUC__) || defined(__clang__)
return lzcnt64_gcc_arm(x);
#else
#error "This compiler does not support the clz intrinsic."
#endif
#else
#error "The build target does not support a native instruction."
#endif
}
#endif
unsigned int lzcnt32_soft(unsigned int x)
{
#if defined(_MSC_VER)
return lzcnt32_msvc_bsr(x);
#elif defined(HAS_LZCNT_INTRINSIC)
return lzcnt32_gcc_builtin(x);
#else
return lzcnt32_generic(x);
#endif
}
unsigned int lzcnt64_soft(unsigned long long x)
{
#if defined(ARCH_64BIT)
#if defined(_MSC_VER)
return lzcnt64_msvc_bsr(x);
#elif defined(HAS_LZCNT_INTRINSIC)
return lzcnt64_gcc_builtin(x);
#else
return lzcnt64_generic(x);
#endif
#else
return lzcnt64_generic(x);
#endif
}
unsigned int lzcnt32(unsigned int x)
{
#if defined(HAS_LZCNT32_HARD)
if (has_lzcnt_hard()) {
return lzcnt32_hard(x);
} else
#endif
{
return lzcnt32_soft(x);
}
}
unsigned int lzcnt64(unsigned int x)
{
#if defined(HAS_LZCNT64_HARD)
if (has_lzcnt_hard()) {
return lzcnt64_hard(x);
} else
#endif
{
return lzcnt64_soft(x);
}
}