#ifndef PG_BITUTILS_H
#define PG_BITUTILS_H
#ifdef _MSC_VER
#include <intrin.h>
#define HAVE_BITSCAN_FORWARD
#define HAVE_BITSCAN_REVERSE
#else
#if defined(HAVE__BUILTIN_CTZ)
#define HAVE_BITSCAN_FORWARD
#endif
#if defined(HAVE__BUILTIN_CLZ)
#define HAVE_BITSCAN_REVERSE
#endif
#endif
extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256];
extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256];
extern PGDLLIMPORT const uint8 pg_number_of_ones[256];
static inline int
pg_leftmost_one_pos32(uint32 word)
{
#ifdef HAVE__BUILTIN_CLZ
Assert(word != 0);
return 31 - __builtin_clz(word);
#elif defined(_MSC_VER)
unsigned long result;
bool non_zero;
Assert(word != 0);
non_zero = _BitScanReverse(&result, word);
return (int) result;
#else
int shift = 32 - 8;
Assert(word != 0);
while ((word >> shift) == 0)
shift -= 8;
return shift + pg_leftmost_one_pos[(word >> shift) & 255];
#endif
}
static inline int
pg_leftmost_one_pos64(uint64 word)
{
#ifdef HAVE__BUILTIN_CLZ
Assert(word != 0);
#if defined(HAVE_LONG_INT_64)
return 63 - __builtin_clzl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return 63 - __builtin_clzll(word);
#else
#error must have a working 64-bit integer datatype
#endif
#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
unsigned long result;
bool non_zero;
Assert(word != 0);
non_zero = _BitScanReverse64(&result, word);
return (int) result;
#else
int shift = 64 - 8;
Assert(word != 0);
while ((word >> shift) == 0)
shift -= 8;
return shift + pg_leftmost_one_pos[(word >> shift) & 255];
#endif
}
static inline int
pg_rightmost_one_pos32(uint32 word)
{
#ifdef HAVE__BUILTIN_CTZ
Assert(word != 0);
return __builtin_ctz(word);
#elif defined(_MSC_VER)
unsigned long result;
bool non_zero;
Assert(word != 0);
non_zero = _BitScanForward(&result, word);
return (int) result;
#else
int result = 0;
Assert(word != 0);
while ((word & 255) == 0)
{
word >>= 8;
result += 8;
}
result += pg_rightmost_one_pos[word & 255];
return result;
#endif
}
static inline int
pg_rightmost_one_pos64(uint64 word)
{
#ifdef HAVE__BUILTIN_CTZ
Assert(word != 0);
#if defined(HAVE_LONG_INT_64)
return __builtin_ctzl(word);
#elif defined(HAVE_LONG_LONG_INT_64)
return __builtin_ctzll(word);
#else
#error must have a working 64-bit integer datatype
#endif
#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
unsigned long result;
bool non_zero;
Assert(word != 0);
non_zero = _BitScanForward64(&result, word);
return (int) result;
#else
int result = 0;
Assert(word != 0);
while ((word & 255) == 0)
{
word >>= 8;
result += 8;
}
result += pg_rightmost_one_pos[word & 255];
return result;
#endif
}
static inline uint32
pg_nextpower2_32(uint32 num)
{
Assert(num > 0 && num <= PG_UINT32_MAX / 2 + 1);
if ((num & (num - 1)) == 0)
return num;
return ((uint32) 1) << (pg_leftmost_one_pos32(num) + 1);
}
static inline uint64
pg_nextpower2_64(uint64 num)
{
Assert(num > 0 && num <= PG_UINT64_MAX / 2 + 1);
if ((num & (num - 1)) == 0)
return num;
return ((uint64) 1) << (pg_leftmost_one_pos64(num) + 1);
}
static inline uint32
pg_prevpower2_32(uint32 num)
{
return ((uint32) 1) << pg_leftmost_one_pos32(num);
}
static inline uint64
pg_prevpower2_64(uint64 num)
{
return ((uint64) 1) << pg_leftmost_one_pos64(num);
}
static inline uint32
pg_ceil_log2_32(uint32 num)
{
if (num < 2)
return 0;
else
return pg_leftmost_one_pos32(num - 1) + 1;
}
static inline uint64
pg_ceil_log2_64(uint64 num)
{
if (num < 2)
return 0;
else
return pg_leftmost_one_pos64(num - 1) + 1;
}
#if defined(_MSC_VER) && defined(_M_AMD64)
#define HAVE_X86_64_POPCNTQ
#endif
#ifdef HAVE_X86_64_POPCNTQ
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
#define TRY_POPCNT_FAST 1
#endif
#endif
#ifdef TRY_POPCNT_FAST
extern PGDLLIMPORT int (*pg_popcount32) (uint32 word);
extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
extern bool pg_popcount_avx512_available(void);
extern uint64 pg_popcount_avx512(const char *buf, int bytes);
extern uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask);
#endif
#else
extern int pg_popcount32(uint32 word);
extern int pg_popcount64(uint64 word);
extern uint64 pg_popcount_optimized(const char *buf, int bytes);
extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
#endif
static inline uint64
pg_popcount(const char *buf, int bytes)
{
#if SIZEOF_VOID_P >= 8
int threshold = 8;
#else
int threshold = 4;
#endif
if (bytes < threshold)
{
uint64 popcnt = 0;
while (bytes--)
popcnt += pg_number_of_ones[(unsigned char) *buf++];
return popcnt;
}
return pg_popcount_optimized(buf, bytes);
}
static inline uint64
pg_popcount_masked(const char *buf, int bytes, bits8 mask)
{
#if SIZEOF_VOID_P >= 8
int threshold = 8;
#else
int threshold = 4;
#endif
if (bytes < threshold)
{
uint64 popcnt = 0;
while (bytes--)
popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
return popcnt;
}
return pg_popcount_masked_optimized(buf, bytes, mask);
}
static inline uint32
pg_rotate_right32(uint32 word, int n)
{
return (word >> n) | (word << (32 - n));
}
static inline uint32
pg_rotate_left32(uint32 word, int n)
{
return (word << n) | (word >> (32 - n));
}
#if SIZEOF_SIZE_T == 4
#define pg_leftmost_one_pos_size_t pg_leftmost_one_pos32
#define pg_nextpower2_size_t pg_nextpower2_32
#define pg_prevpower2_size_t pg_prevpower2_32
#else
#define pg_leftmost_one_pos_size_t pg_leftmost_one_pos64
#define pg_nextpower2_size_t pg_nextpower2_64
#define pg_prevpower2_size_t pg_prevpower2_64
#endif
#endif