#ifndef BMFUNC__H__INCLUDED__
#define BMFUNC__H__INCLUDED__
#include <memory.h>
#include <type_traits>
#include "bmdef.h"
#include "bmutil.h"
#ifdef _MSC_VER
# pragma warning( disable: 4146 )
#endif
namespace bm
{
inline
bm::id_t bit_block_calc_count_range(const bm::word_t* block,
bm::word_t left,
bm::word_t right) BMNOEXCEPT;
inline
bm::id_t bit_block_any_range(const bm::word_t* block,
bm::word_t left,
bm::word_t right) BMNOEXCEPT;
struct bv_statistics
{
size_t bit_blocks; size_t gap_blocks; size_t ptr_sub_blocks; size_t bv_count; size_t max_serialize_mem; size_t memory_used; size_t gap_cap_overhead; gap_word_t gap_levels[bm::gap_levels]; unsigned long long gaps_by_level[bm::gap_levels];
void add_bit_block() BMNOEXCEPT
{
++bit_blocks;
size_t mem_used = sizeof(bm::word_t) * bm::set_block_size;
memory_used += mem_used;
max_serialize_mem += mem_used;
}
void add_gap_block(unsigned capacity, unsigned length) BMNOEXCEPT
{
++gap_blocks;
size_t mem_used = (capacity * sizeof(gap_word_t));
memory_used += mem_used;
max_serialize_mem += (unsigned)(length * sizeof(gap_word_t));
BM_ASSERT(length <= capacity);
gap_cap_overhead += (capacity - length) * sizeof(gap_word_t);
for (unsigned i = 0; i < bm::gap_levels; ++i)
{
if (capacity == gap_levels[i])
{
gaps_by_level[i]++;
return;
}
}
BM_ASSERT(0);
}
void reset() BMNOEXCEPT
{
bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0;
max_serialize_mem = memory_used = gap_cap_overhead = 0;
for (unsigned i = 0; i < bm::gap_levels; ++i)
gaps_by_level[i] = 0ull;
}
void add(const bv_statistics& st) BMNOEXCEPT
{
bit_blocks += st.bit_blocks;
gap_blocks += st.gap_blocks;
ptr_sub_blocks += st.ptr_sub_blocks;
bv_count += st.bv_count;
max_serialize_mem += st.max_serialize_mem + 8;
memory_used += st.memory_used;
gap_cap_overhead += st.gap_cap_overhead;
}
};
struct bv_arena_statistics
{
size_t bit_blocks_sz; size_t gap_blocks_sz; size_t ptr_sub_blocks_sz;
void reset() BMNOEXCEPT
{
bit_blocks_sz = gap_blocks_sz = ptr_sub_blocks_sz = 0;
}
};
template<typename First, typename Second>
struct pair
{
First first;
Second second;
pair(First f, Second s) : first(f), second(s) {}
};
struct bit_decode_cache
{
unsigned short bits[65]; unsigned bcnt; bm::id64_t cvalue;
bit_decode_cache() : bcnt(0), cvalue(0) {}
};
template<typename BI_TYPE>
BMFORCEINLINE
void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) BMNOEXCEPT
{
i = unsigned(nb >> bm::set_array_shift); j = unsigned(nb & bm::set_array_mask); }
template<typename RTYPE>
BMFORCEINLINE RTYPE get_super_block_start(unsigned i) BMNOEXCEPT
{
return RTYPE(i) * bm::set_sub_total_bits;
}
template<typename RTYPE>
BMFORCEINLINE RTYPE get_block_start(unsigned i, unsigned j) BMNOEXCEPT
{
RTYPE idx = bm::get_super_block_start<RTYPE>(i);
idx += (j) * bm::gap_max_bits;
return idx;
}
BMFORCEINLINE
bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
{
#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
return bm::id_t(_mm_popcnt_u32(w));
#else
#if defined(BM_USE_GCC_BUILD)
return (bm::id_t)__builtin_popcount(w);
#else
return
bm::bit_count_table<true>::_count[(unsigned char)(w)] +
bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
#endif
#endif
}
inline
int parallel_popcnt_32(unsigned int n) BMNOEXCEPT
{
unsigned int tmp;
tmp = n - ((n >> 1) & 033333333333)
- ((n >> 2) & 011111111111);
return ((tmp + (tmp >> 3)) & 030707070707) % 63;
}
BMFORCEINLINE
unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
{
#if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
#if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
return unsigned(_mm_popcnt_u64(x));
#else
return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
#endif
#else
#if defined(BM_USE_GCC_BUILD)
return (unsigned)__builtin_popcountll(x);
#else
#if (defined(__arm__))
return bm::word_bitcount(x >> 32) + bm::word_bitcount((unsigned)x);
#else
x = x - ((x >> 1) & 0x5555555555555555);
x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
x = x + (x >> 8);
x = x + (x >> 16);
x = x + (x >> 32);
return x & 0xFF;
#endif
#endif
#endif
}
inline
unsigned bitcount64_4way(bm::id64_t x, bm::id64_t y,
bm::id64_t u, bm::id64_t v) BMNOEXCEPT
{
const bm::id64_t m1 = 0x5555555555555555U;
const bm::id64_t m2 = 0x3333333333333333U;
const bm::id64_t m3 = 0x0F0F0F0F0F0F0F0FU;
const bm::id64_t m4 = 0x000000FF000000FFU;
x = x - ((x >> 1) & m1);
y = y - ((y >> 1) & m1);
u = u - ((u >> 1) & m1);
v = v - ((v >> 1) & m1);
x = (x & m2) + ((x >> 2) & m2);
y = (y & m2) + ((y >> 2) & m2);
u = (u & m2) + ((u >> 2) & m2);
v = (v & m2) + ((v >> 2) & m2);
x = x + y;
u = u + v;
x = (x & m3) + ((x >> 4) & m3);
u = (u & m3) + ((u >> 4) & m3);
x = x + u;
x = x + (x >> 8);
x = x + (x >> 16);
x = x & m4;
x = x + (x >> 32);
return x & 0x000001FFU;
}
template<typename T, typename F>
void bit_for_each_4(T w, F& func)
{
for (unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
{
switch (w & 15) {
case 0: break;
case 1: func(sub_octet);
break;
case 2: func(sub_octet + 1);
break;
case 3: func(sub_octet, sub_octet + 1);
break;
case 4: func(sub_octet + 2);
break;
case 5: func(sub_octet, sub_octet + 2);
break;
case 6: func(sub_octet + 1, sub_octet + 2);
break;
case 7: func(sub_octet, sub_octet + 1, sub_octet + 2);
break;
case 8: func(sub_octet + 3);
break;
case 9: func(sub_octet, sub_octet + 3);
break;
case 10: func(sub_octet + 1, sub_octet + 3);
break;
case 11: func(sub_octet, sub_octet + 1, sub_octet + 3);
break;
case 12: func(sub_octet + 2, sub_octet + 3);
break;
case 13: func(sub_octet, sub_octet + 2, sub_octet + 3);
break;
case 14: func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
break;
case 15: func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
break;
default:
BM_ASSERT(0);
break;
}
} }
template<typename T, typename F>
void bit_for_each(T w, F& func)
{
for (unsigned octet = 0; w != 0; w >>= 8, octet += 8)
{
if (w & 1) func(octet + 0);
if (w & 2) func(octet + 1);
if (w & 4) func(octet + 2);
if (w & 8) func(octet + 3);
if (w & 16) func(octet + 4);
if (w & 32) func(octet + 5);
if (w & 64) func(octet + 6);
if (w & 128) func(octet + 7);
} }
inline
unsigned bitscan_nibble(unsigned w, unsigned* bits) BMNOEXCEPT
{
unsigned cnt = 0;
for (unsigned sub_octet = 0; w; w >>= 4, sub_octet+=4)
{
switch (w & 15) {
case 1: bits[cnt++] = 0 + sub_octet;
break;
case 2: bits[cnt++] = 1 + sub_octet;
break;
case 3: bits[cnt] = 0 + sub_octet;
bits[cnt+1] = 1 + sub_octet;
cnt += 2;
break;
case 4: bits[cnt++] = 2 + sub_octet;
break;
case 5: bits[cnt+0] = 0 + sub_octet;
bits[cnt+1] = 2 + sub_octet;
cnt += 2;
break;
case 6: bits[cnt+0] = 1 + sub_octet;
bits[cnt+1] = 2 + sub_octet;
cnt += 2;
break;
case 7: bits[cnt+0] = 0 + sub_octet;
bits[cnt+1] = 1 + sub_octet;
bits[cnt+2] = 2 + sub_octet;
cnt += 3;
break;
case 8: bits[cnt++] = 3 + sub_octet;
break;
case 9: bits[cnt+0] = 0 + sub_octet;
bits[cnt+1] = 3 + sub_octet;
cnt += 2;
break;
case 10: bits[cnt+0] = 1 + sub_octet;
bits[cnt+1] = 3 + sub_octet;
cnt += 2;
break;
case 11: bits[cnt+0] = 0 + sub_octet;
bits[cnt+1] = 1 + sub_octet;
bits[cnt+2] = 3 + sub_octet;
cnt += 3;
break;
case 12: bits[cnt+0] = 2 + sub_octet;
bits[cnt+1] = 3 + sub_octet;
cnt += 2;
break;
case 13: bits[cnt+0] = 0 + sub_octet;
bits[cnt+1] = 2 + sub_octet;
bits[cnt+2] = 3 + sub_octet;
cnt += 3;
break;
case 14: bits[cnt+0] = 1 + sub_octet;
bits[cnt+1] = 2 + sub_octet;
bits[cnt+2] = 3 + sub_octet;
cnt += 3;
break;
case 15: bits[cnt+0] = 0 + sub_octet;
bits[cnt+1] = 1 + sub_octet;
bits[cnt+2] = 2 + sub_octet;
bits[cnt+3] = 3 + sub_octet;
cnt += 4;
break;
default:
break;
}
} return cnt;
}
#ifdef BM_NONSTANDARD_EXTENTIONS
#ifdef __GNUC__
inline
unsigned bitscan_nibble_gcc(unsigned w, unsigned* bits) BMNOEXCEPT
{
static void* d_table[] = { &&l0,
&&l1, &&l3_1, &&l3, &&l7_1, &&l5, &&l7_0, &&l7, &&l15_1,
&&l9, &&l11_0, &&l11, &&l15_0, &&l13, &&l14, &&l15 };
unsigned cnt = 0;
for (unsigned sub_octet = 0; w; w >>= 4, sub_octet+=4)
{
goto *d_table[w & 15];
l1: bits[cnt++] = sub_octet;
continue;
l3: bits[cnt++] = sub_octet;
l3_1:
bits[cnt++] = 1 + sub_octet;
continue;
l5: bits[cnt++] = sub_octet;
goto l7_1;
l7: bits[cnt++] = sub_octet;
l7_0:
bits[cnt++] = 1 + sub_octet;
l7_1:
bits[cnt++] = 2 + sub_octet;
continue;
l9: bits[cnt++] = sub_octet;
goto l15_1;
continue;
l11: bits[cnt++] = sub_octet;
l11_0:
bits[cnt++] = 1 + sub_octet;
bits[cnt++] = 3 + sub_octet;
continue;
l13: bits[cnt++] = sub_octet;
goto l15_0;
l14: bits[cnt++] = 1 + sub_octet;
goto l15_0;
l15: bits[cnt++] = 0 + sub_octet;
bits[cnt++] = 1 + sub_octet;
l15_0:
bits[cnt++] = 2 + sub_octet;
l15_1:
bits[cnt++] = 3 + sub_octet;
l0:
continue;
} return cnt;
}
#endif
#endif
template<typename B>
class copy_to_array_functor
{
public:
copy_to_array_functor(B* bits): bp_(bits)
{}
B* ptr() { return bp_; }
void operator()(unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; }
void operator()(unsigned bit_idx0,
unsigned bit_idx1) BMNOEXCEPT
{
bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
bp_+=2;
}
void operator()(unsigned bit_idx0,
unsigned bit_idx1,
unsigned bit_idx2) BMNOEXCEPT
{
bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
bp_+=3;
}
void operator()(unsigned bit_idx0,
unsigned bit_idx1,
unsigned bit_idx2,
unsigned bit_idx3) BMNOEXCEPT
{
bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
bp_+=4;
}
private:
copy_to_array_functor(const copy_to_array_functor&);
copy_to_array_functor& operator=(const copy_to_array_functor&);
private:
B* bp_;
};
template<typename T,typename B>
unsigned bit_list(T w, B* bits) BMNOEXCEPT
{
copy_to_array_functor<B> func(bits);
bit_for_each(w, func);
return (unsigned)(func.ptr() - bits);
}
template<typename T,typename B>
unsigned bit_list_4(T w, B* bits) BMNOEXCEPT
{
copy_to_array_functor<B> func(bits);
bit_for_each_4(w, func);
return (unsigned)(func.ptr() - bits);
}
template<typename B>
unsigned short
bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) BMNOEXCEPT
{
unsigned pos = 0;
while (w)
{
bm::id_t t = w & -w;
bits[pos++] = (B)(bm::word_bitcount(t - 1) + offs);
w &= w - 1;
}
return (unsigned short)pos;
}
template<typename B>
unsigned short bitscan_popcnt(bm::id_t w, B* bits) BMNOEXCEPT
{
unsigned pos = 0;
while (w)
{
bm::id_t t = w & -w;
bits[pos++] = (B)(bm::word_bitcount(t - 1));
w &= w - 1;
}
return (unsigned short)pos;
}
template<typename B>
unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) BMNOEXCEPT
{
unsigned short pos = 0;
while (w)
{
bm::id64_t t = bmi_blsi_u64(w); bits[pos++] = (B) bm::word_bitcount64(t - 1);
w = bmi_bslr_u64(w); }
return pos;
}
template<typename B>
unsigned short bitscan_bsf(unsigned w, B* bits) BMNOEXCEPT
{
unsigned short pos = 0;
while (w)
{
bits[pos++] = count_trailing_zeros_u32(w);
w &= w - 1;
}
return pos;
}
template<typename B, typename OT>
unsigned short bitscan_bsf(unsigned w, B* bits, OT offs) BMNOEXCEPT
{
unsigned short pos = 0;
while (w)
{
bits[pos++] = (B) (bm::count_trailing_zeros_u32(w) + offs);
w &= w - 1;
}
return pos;
}
template<typename B>
unsigned short bitscan_bsf64(bm::id64_t w, B* bits) BMNOEXCEPT
{
unsigned short pos = 0;
while (w)
{
bits[pos++] = bm::count_trailing_zeros_u64(w);
w &= w - 1;
}
return pos;
}
template<typename B>
unsigned short
bitscan_popcnt64(bm::id64_t w, B* bits, unsigned short offs) BMNOEXCEPT
{
unsigned short pos = 0;
while (w)
{
bm::id64_t t = bmi_blsi_u64(w); bits[pos++] = B(bm::word_bitcount64(t - 1) + offs);
w = bmi_bslr_u64(w); }
return pos;
}
template<typename V, typename B>
unsigned short bitscan(V w, B* bits) BMNOEXCEPT
{
static_assert(std::is_unsigned<V>::value, "BM: unsigned type is required");
#if (defined(__arm__) || defined(__aarch64__))
if constexpr (sizeof(V) == 8)
return bm::bitscan_bsf64(w, bits);
else
return bm::bitscan_bsf((bm::word_t)w, bits);
#else
if constexpr (sizeof(V) == 8)
return bm::bitscan_popcnt64(w, bits);
else
return bm::bitscan_popcnt((bm::word_t)w, bits);
#endif
}
inline
unsigned word_select64_linear(bm::id64_t w, unsigned rank) BMNOEXCEPT
{
BM_ASSERT(w);
BM_ASSERT(rank);
for (unsigned count = 0; w; w >>=1ull, ++count)
{
rank -= unsigned(w & 1ull);
if (!rank)
return count;
}
BM_ASSERT(0); return ~0u;
}
inline
unsigned word_select64_bitscan_popcnt(bm::id64_t w, unsigned rank) BMNOEXCEPT
{
BM_ASSERT(w);
BM_ASSERT(rank);
BM_ASSERT(rank <= bm::word_bitcount64(w));
do
{
--rank;
if (!rank)
break;
w &= w - 1;
} while (1);
bm::id64_t t = w & -w;
unsigned count = bm::word_bitcount64(t - 1);
return count;
}
inline
unsigned word_select64_bitscan_tz(bm::id64_t w, unsigned rank) BMNOEXCEPT
{
BM_ASSERT(w);
BM_ASSERT(rank);
BM_ASSERT(rank <= bm::word_bitcount64(w));
do
{
if (!(--rank))
break;
w &= w - 1;
} while (1);
bm::id64_t t = w & -w;
unsigned count = bm::count_trailing_zeros_u64(t);
return count;
}
inline
unsigned word_select32_bitscan_popcnt(unsigned w, unsigned rank) BMNOEXCEPT
{
BM_ASSERT(w);
BM_ASSERT(rank);
BM_ASSERT(rank <= bm::word_bitcount(w));
do
{
--rank;
if (!rank)
break;
w &= w - 1;
} while (1);
unsigned t = w & -w;
unsigned count = bm::word_bitcount(t - 1);
return count;
}
inline
unsigned word_select32_bitscan_tz(unsigned w, unsigned rank) BMNOEXCEPT
{
BM_ASSERT(w);
BM_ASSERT(rank);
BM_ASSERT(rank <= bm::word_bitcount(w));
do
{
if (!(--rank))
break;
w &= w - 1;
} while (1);
return bm::count_trailing_zeros_u32(w & -w);
}
inline
unsigned word_select64(bm::id64_t w, unsigned rank) BMNOEXCEPT
{
#if defined(BMI2_SELECT64)
return BMI2_SELECT64(w, rank);
#else
#if defined(BMI1_SELECT64)
return BMI2_SELECT64(w, rank);
#else
#if (defined(__arm__) || defined(__aarch64__))
return bm::word_select64_bitscan_tz(w, rank);
#else
return bm::word_select64_bitscan_popcnt(w, rank);
#endif
#endif
#endif
}
inline
unsigned word_select32(unsigned w, unsigned rank) BMNOEXCEPT
{
#if defined(BMI2_SELECT64)
return BMI2_SELECT64(w, rank);
#else
#if defined(BMI1_SELECT64)
return BMI2_SELECT64(w, rank);
#else
#if (defined(__arm__) || defined(__aarch64__))
return bm::word_select32_bitscan_tz(w, rank);
#else
return bm::word_select32_bitscan_popcnt(w, rank);
#endif
#endif
#endif
}
BMFORCEINLINE
bm::id64_t widx_to_digest_mask(unsigned w_idx) BMNOEXCEPT
{
bm::id64_t mask(1ull);
return mask << (w_idx / bm::set_block_digest_wave_size);
}
inline
bm::id64_t dm_control(unsigned from, unsigned to) BMNOEXCEPT
{
bm::id64_t m = 0;
for (unsigned i = from; i <= to; ++i)
m |= (1ull << (i / 1024));
return m;
}
BMFORCEINLINE
bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(from <= to);
BM_ASSERT(to < bm::gap_max_bits);
bm::id64_t digest_from = from >> bm::set_block_digest_pos_shift;
bm::id64_t digest_to = to >> bm::set_block_digest_pos_shift;;
bm::id64_t mask(~0ull);
mask = (mask >> (63 - (digest_to - digest_from))) << digest_from;
return mask;
}
inline
bool check_zero_digest(bm::id64_t digest,
unsigned bitpos_from, unsigned bitpos_to) BMNOEXCEPT
{
bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
return !(digest & mask);
}
inline
void block_init_digest0(bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
{
unsigned off;
for (unsigned i = 0; i < 64; ++i)
{
off = i * bm::set_block_digest_wave_size;
bm::word_t mask = (digest & 1) ? ~0u : 0u;
#if defined(VECT_BLOCK_SET_DIGEST)
VECT_BLOCK_SET_DIGEST(&block[off], mask);
#else
for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
{
block[off+j+0] = block[off+j+1] =
block[off+j+2] = block[off+j+3] = mask;
} #endif
digest >>= 1ull;
} }
inline
bm::id64_t calc_block_digest0(const bm::word_t* const block) BMNOEXCEPT
{
bm::id64_t digest0 = 0;
unsigned off;
for (unsigned i = 0; i < 64; ++i)
{
off = i * bm::set_block_digest_wave_size;
#if defined(VECT_IS_DIGEST_ZERO)
bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
digest0 |= bm::id64_t(!all_zero) << i;
#else
const bm::id64_t mask(1ull);
for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
{
bm::word_t w = block[off+j+0] | block[off+j+1] |
block[off+j+2] | block[off+j+3];
if (w)
{
digest0 |= (mask << i);
break;
}
} #endif
} return digest0;
}
inline
bm::id64_t
update_block_digest0(const bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
{
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#if defined(VECT_IS_DIGEST_ZERO)
bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
digest &= all_zero ? ~(mask << wave) : digest;
#else
const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
(const bm::bit_block_t::bunion_t*)(&block[off]);
bm::id64_t w64 = 0;
unsigned j = 0;
do
{
w64 |=
src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
j += 4;
} while ((j < bm::set_block_digest_wave_size/2) & !w64);
digest &= w64 ? digest : ~(mask << wave);
#endif
d = bm::bmi_bslr_u64(d); }
BM_ASSERT(bm::calc_block_digest0(block) == digest);
return digest;
}
inline
void block_compact_by_digest(bm::word_t* t_block,
const bm::word_t* block,
bm::id64_t digest,
bool zero_tail) BMNOEXCEPT
{
unsigned t_wave = 0;
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d); unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
unsigned t_off = t_wave * bm::set_block_digest_wave_size;
const bm::word_t* sub_block = block + off;
bm::word_t* t_sub_block = t_block + t_off;
const bm::word_t* sub_block_end =
sub_block + bm::set_block_digest_wave_size;
for (; sub_block < sub_block_end; t_sub_block+=4, sub_block+=4)
{
t_sub_block[0] = sub_block[0];
t_sub_block[1] = sub_block[1];
t_sub_block[2] = sub_block[2];
t_sub_block[3] = sub_block[3];
}
d = bm::bmi_bslr_u64(d); ++t_wave;
}
if (zero_tail)
{
for ( ;t_wave < bm::block_waves; ++t_wave)
{
unsigned t_off = t_wave * bm::set_block_digest_wave_size;
bm::word_t* t_sub_block = t_block + t_off;
const bm::word_t* t_sub_block_end =
t_sub_block + bm::set_block_digest_wave_size;
for (; t_sub_block < t_sub_block_end; t_sub_block+=4)
{
t_sub_block[0] = 0;
t_sub_block[1] = 0;
t_sub_block[2] = 0;
t_sub_block[3] = 0;
} } }
}
inline
void block_expand_by_digest(bm::word_t* t_block,
const bm::word_t* block,
bm::id64_t digest,
bool zero_subs) BMNOEXCEPT
{
unsigned s_wave = 0;
bm::id64_t d = digest;
bm::id64_t mask1 = 1ULL;
for (unsigned wave = 0; wave < bm::block_waves; ++wave)
{
unsigned s_off = s_wave * bm::set_block_digest_wave_size;
const bm::word_t* sub_block = block + s_off;
const bm::word_t* sub_block_end =
sub_block + bm::set_block_digest_wave_size;
bm::word_t* t_sub_block =
t_block + (wave * bm::set_block_digest_wave_size);
if (d & mask1)
{
for (; sub_block < sub_block_end; t_sub_block+=4, sub_block+=4)
{
t_sub_block[0] = sub_block[0];
t_sub_block[1] = sub_block[1];
t_sub_block[2] = sub_block[2];
t_sub_block[3] = sub_block[3];
}
++s_wave;
}
else
{
if (zero_subs)
{
const bm::word_t* t_sub_block_end =
t_sub_block + bm::set_block_digest_wave_size;
for (; t_sub_block < t_sub_block_end; t_sub_block+=4)
{
t_sub_block[0] = 0; t_sub_block[1] = 0;
t_sub_block[2] = 0; t_sub_block[3] = 0;
} }
}
mask1 <<= 1;
} }
inline
bool is_const_set_operation(set_operation op) BMNOEXCEPT
{
return (int(op) >= int(set_COUNT));
}
inline
bm::operation setop2op(bm::set_operation op) BMNOEXCEPT
{
BM_ASSERT(op == set_AND ||
op == set_OR ||
op == set_SUB ||
op == set_XOR);
return (bm::operation) op;
}
template<bool T> struct all_set
{
struct BM_VECT_ALIGN all_set_block
{
bm::word_t BM_VECT_ALIGN* _s[bm::set_sub_array_size] BM_VECT_ALIGN_ATTR;
bm::word_t BM_VECT_ALIGN _p[bm::set_block_size] BM_VECT_ALIGN_ATTR;
bm::word_t* _p_fullp;
all_set_block() BMNOEXCEPT
{
::memset(_p, 0xFF, sizeof(_p)); if constexpr (sizeof(void*) == 8)
{
const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
_s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
}
else
{
const unsigned magic_mask = 0xFFFFfffe;
::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
_s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
}
}
};
inline
static bm::id64_t block_type(const bm::word_t* bp) BMNOEXCEPT
{
bm::id64_t type;
if constexpr (sizeof(void*) == 8)
{
bm::id64_t w = reinterpret_cast<unsigned long long>(bp);
type = (w & 3) | ((bp == _block._p) << 1);
type = type ? type : w;
}
else
{
unsigned w = reinterpret_cast<unsigned long>(bp);
type = (w & 3) | ((bp == _block._p) << 1);
type = type ? type : w;
}
return type;
}
BMFORCEINLINE
static bool is_full_block(const bm::word_t* bp) BMNOEXCEPT
{ return (bp == _block._p || bp == _block._p_fullp); }
BMFORCEINLINE
static bool is_valid_block_addr(const bm::word_t* bp) BMNOEXCEPT
{ return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
static all_set_block _block;
};
template<bool T> typename all_set<T>::all_set_block all_set<T>::_block;
template<typename N>
bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) BMNOEXCEPT
{
BM_ASSERT(pos);
#if 0#else
for (; start < size; ++start)
{
if (arr[start])
{
*pos = start;
return true;
}
} #endif
return false;
}
template<typename T> int wordcmp0(T w1, T w2) BMNOEXCEPT
{
while (w1 != w2)
{
int res = (w1 & 1) - (w2 & 1);
if (res != 0) return res;
w1 >>= 1;
w2 >>= 1;
}
return 0;
}
template<typename T> int wordcmp(T a, T b) BMNOEXCEPT
{
T diff = a ^ b;
return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
}
inline
bool bit_is_all_zero(const bm::word_t* BMRESTRICT start) BMNOEXCEPT
{
#if defined(VECT_IS_ZERO_BLOCK)
return VECT_IS_ZERO_BLOCK(start);
#else
const bm::wordop_t* BMRESTRICT blk = (bm::wordop_t*) (start);
const bm::wordop_t* BMRESTRICT blk_end = (bm::wordop_t*)(start + bm::set_block_size);
do
{
if (blk[0] | blk[1] | blk[2] | blk[3])
return false;
blk += 4;
} while (blk < blk_end);
return true;
#endif
}
BMFORCEINLINE
bool gap_is_all_zero(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
{
return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
}
BMFORCEINLINE
bool gap_is_all_one(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
{
return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
}
BMFORCEINLINE
bm::gap_word_t gap_length(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
{
return (bm::gap_word_t)((*buf >> 3) + 1);
}
template<typename T>
unsigned
gap_capacity(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
{
return glevel_len[(*buf >> 1) & 3];
}
template<typename T>
unsigned
gap_limit(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
{
return glevel_len[(*buf >> 1) & 3]-4;
}
template<typename T>
T gap_level(const T* BMRESTRICT buf) BMNOEXCEPT
{
return T((*buf >> 1) & 3u);
}
template<typename T>
unsigned
gap_find_last(const T* BMRESTRICT buf, unsigned* BMRESTRICT last) BMNOEXCEPT
{
BM_ASSERT(last);
T is_set = (*buf) & 1u;
T end = T((*buf) >> 3u);
BM_ASSERT(buf[end] == bm::gap_max_bits - 1);
is_set ^= T((end-1) & 1u);
if (is_set)
{
*last = buf[end];
return is_set;
}
*last = buf[--end];
return end;
}
template<typename T>
unsigned
gap_find_first(const T* BMRESTRICT buf, unsigned* BMRESTRICT first) BMNOEXCEPT
{
BM_ASSERT(first);
T is_set = (*buf) & 1u;
if (is_set)
{
*first = 0;
return is_set;
}
if (buf[1] == bm::gap_max_bits - 1)
return 0;
*first = buf[1] + 1;
return 1;
}
template<typename T>
unsigned gap_bfind(const T* BMRESTRICT buf,
unsigned pos, unsigned* BMRESTRICT is_set) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
#undef VECT_GAP_BFIND
#ifdef VECT_GAP_BFIND
return VECT_GAP_BFIND(buf, pos, is_set);
#else
*is_set = (*buf) & 1;
unsigned start = 1;
unsigned end = 1 + ((*buf) >> 3);
while ( start != end )
{
unsigned curr = (start + end) >> 1;
if ( buf[curr] < pos )
start = curr + 1;
else
end = curr;
}
*is_set ^= ((start-1) & 1);
return start;
#endif
}
template<typename T>
unsigned gap_test(const T* BMRESTRICT buf, unsigned pos) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
unsigned start = 1;
unsigned end = 1 + ((*buf) >> 3);
if (end - start < 10)
{
unsigned sv = *buf & 1;
unsigned sv1= sv ^ 1;
if (buf[1] >= pos) return sv;
if (buf[2] >= pos) return sv1;
if (buf[3] >= pos) return sv;
if (buf[4] >= pos) return sv1;
if (buf[5] >= pos) return sv;
if (buf[6] >= pos) return sv1;
if (buf[7] >= pos) return sv;
if (buf[8] >= pos) return sv1;
if (buf[9] >= pos) return sv;
BM_ASSERT(0);
}
else
{
while (start != end)
{
unsigned curr = (start + end) >> 1;
if (buf[curr] < pos)
start = curr + 1;
else
end = curr;
}
}
return ((*buf) & 1) ^ ((--start) & 1);
}
template<typename T>
unsigned gap_test_unr(const T* BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
if (pos == 0) {
return (*buf) & 1;
}
#if defined(BMSSE2OPT)
unsigned res = bm::sse2_gap_test(buf, pos);
BM_ASSERT(res == bm::gap_test(buf, pos));
#elif defined(BMSSE42OPT)
unsigned res = bm::sse42_gap_test(buf, pos);
BM_ASSERT(res == bm::gap_test(buf, pos));
#elif defined(BMAVX2OPT)
unsigned res = bm::avx2_gap_test(buf, pos);
BM_ASSERT(res == bm::gap_test(buf, pos));
#else
unsigned res = bm::gap_test(buf, pos);
#endif
return res;
}
template<typename T, typename N, typename F>
void for_each_nzblock_range(T*** root,
N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT
{
BM_ASSERT(top_size);
if (nb_from > nb_to)
return;
unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
unsigned j_from = unsigned(nb_from & bm::set_array_mask);
unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
unsigned j_to = unsigned(nb_to & bm::set_array_mask);
if (i_from >= top_size)
return;
if (i_to >= top_size)
{
i_to = unsigned(top_size-1);
j_to = bm::set_sub_array_size-1;
}
for (unsigned i = i_from; i <= i_to; ++i)
{
T** blk_blk = root[i];
if (!blk_blk)
continue;
if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
{
unsigned j = (i == i_from) ? j_from : 0;
if (!j && (i != i_to)) f.add_full(bm::set_sub_total_bits);
else
{
do
{
f.add_full(bm::gap_max_bits);
if ((i == i_to) && (j == j_to))
return;
} while (++j < bm::set_sub_array_size);
}
}
else
{
unsigned j = (i == i_from) ? j_from : 0;
do
{
if (blk_blk[j])
f(blk_blk[j]);
if ((i == i_to) && (j == j_to))
return;
} while (++j < bm::set_sub_array_size);
}
} }
template<class T, class F>
void for_each_nzblock(T*** root, unsigned size1, F& f)
{
typedef typename F::size_type size_type;
for (unsigned i = 0; i < size1; ++i)
{
T** blk_blk = root[i];
if (!blk_blk)
{
f.on_empty_top(i);
continue;
}
f.on_non_empty_top(i);
if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
{
size_type r = i * bm::set_sub_array_size;
unsigned j = 0;
do
{
f(FULL_BLOCK_FAKE_ADDR, r + j);
} while (++j < bm::set_sub_array_size);
continue;
}
unsigned non_empty_top = 0;
size_type r = i * bm::set_sub_array_size;
unsigned j = 0;
do
{
#if defined(BM64_AVX2) || defined(BM64_AVX512)
if (!avx2_test_all_zero_wave(blk_blk + j))
{
non_empty_top = 1;
T* blk0 = blk_blk[j + 0];
T* blk1 = blk_blk[j + 1];
T* blk2 = blk_blk[j + 2];
T* blk3 = blk_blk[j + 3];
size_type block_idx = r + j + 0;
if (blk0)
f(blk0, block_idx);
else
f.on_empty_block(block_idx);
if (blk1)
f(blk1, block_idx + 1);
else
f.on_empty_block(block_idx + 1);
if (blk2)
f(blk2, block_idx + 2);
else
f.on_empty_block(block_idx + 2);
if (blk3)
f(blk3, block_idx + 3);
else
f.on_empty_block(block_idx + 3);
}
else
{
f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
}
j += 4;
#elif defined(BM64_SSE4)
if (!sse42_test_all_zero_wave((blk_blk + j)))
{
non_empty_top = 1;
T* blk0 = blk_blk[j + 0];
T* blk1 = blk_blk[j + 1];
size_type block_idx = r + j + 0;
if (blk0)
f(blk0, block_idx);
else
f.on_empty_block(block_idx);
++block_idx;
if (blk1)
f(blk1, block_idx);
else
f.on_empty_block(block_idx);
}
else
{
f.on_empty_block(r + j + 0);
f.on_empty_block(r + j + 1);
}
j += 2;
#else
if (blk_blk[j])
{
f(blk_blk[j], r + j);
non_empty_top = 1;
}
else
f.on_empty_block(r + j);
++j;
#endif
} while (j < bm::set_sub_array_size);
if (non_empty_top == 0)
f.on_empty_top(i);
} }
template<class T, class F>
void for_each_nzblock2(T*** root, unsigned size1, F& f)
{
#ifdef BM64_SSE4
for (unsigned i = 0; i < size1; ++i)
{
T** blk_blk;
if ((blk_blk = root[i])!=0)
{
if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
{
for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
{
f(FULL_BLOCK_FAKE_ADDR);
}
continue;
}
{
__m128i w0;
for (unsigned j = 0; j < bm::set_sub_array_size; j+=4)
{
w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
if (!_mm_testz_si128(w0, w0))
{
T* blk0 = blk_blk[j + 0];
T* blk1 = blk_blk[j + 1];
if (blk0)
f(blk0);
if (blk1)
f(blk1);
}
w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
if (!_mm_testz_si128(w0, w0))
{
T* blk0 = blk_blk[j + 2];
T* blk1 = blk_blk[j + 3];
if (blk0)
f(blk0);
if (blk1)
f(blk1);
}
} }
}
} #elif defined(BM64_AVX2) || defined(BM64_AVX512)
for (unsigned i = 0; i < size1; ++i)
{
T** blk_blk;
if ((blk_blk = root[i]) != 0)
{
if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
{
for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
{
f(FULL_BLOCK_FAKE_ADDR);
}
continue;
}
{
for (unsigned j = 0; j < bm::set_sub_array_size; j += 4)
{
__m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
if (!_mm256_testz_si256(w0, w0))
{
T* blk0 = blk_blk[j + 0];
T* blk1 = blk_blk[j + 1];
T* blk2 = blk_blk[j + 2];
T* blk3 = blk_blk[j + 3];
if (blk0)
f(blk0);
if (blk1)
f(blk1);
if (blk2)
f(blk2);
if (blk3)
f(blk3);
}
} }
}
}
#else
for (unsigned i = 0; i < size1; ++i)
{
T** blk_blk;
if ((blk_blk = root[i])!=0)
{
if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
{
for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
{
f(FULL_BLOCK_FAKE_ADDR);
}
continue;
}
unsigned j = 0;
do
{
if (blk_blk[j])
f(blk_blk[j]);
if (blk_blk[j+1])
f(blk_blk[j+1]);
if (blk_blk[j+2])
f(blk_blk[j+2]);
if (blk_blk[j+3])
f(blk_blk[j+3]);
j += 4;
} while (j < bm::set_sub_array_size);
}
} #endif
}
template<typename T, typename BI, typename F>
bool for_each_nzblock_if(T*** root, BI size1, F& f) BMNOEXCEPT
{
BI block_idx = 0;
for (BI i = 0; i < size1; ++i)
{
T** blk_blk = root[i];
if (!blk_blk)
{
block_idx += bm::set_sub_array_size;
continue;
}
if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
{
for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
{
if (f(FULL_BLOCK_FAKE_ADDR, block_idx))
return true;
} continue;
}
for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
{
if (blk_blk[j])
if (f(blk_blk[j], block_idx))
return true;
} } return false;
}
template<class T, class F, typename BLOCK_IDX>
void for_each_block(T*** root, unsigned size1, F& f, BLOCK_IDX start)
{
BLOCK_IDX block_idx = start;
for (unsigned i = 0; i < size1; ++i)
{
T** blk_blk = root[i];
if (blk_blk)
{
if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
{
for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
{
f(FULL_BLOCK_FAKE_ADDR, block_idx);
}
continue;
}
for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
{
f(blk_blk[j], block_idx);
}
}
else
{
for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
{
f(0, block_idx);
}
}
}
}
template<class T, class F> F bmfor_each(T first, T last, F f)
{
do
{
f(*first);
++first;
} while (first < last);
return f;
}
template<typename T>
bm::id64_t sum_arr(const T* first, const T* last) BMNOEXCEPT
{
bm::id64_t sum = 0;
for (;first < last; ++first)
sum += *first;
return sum;
}
template<typename T>
void gap_split(const T* buf,
T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT
{
const T* pcurr = buf;
unsigned len = (*pcurr >> 3);
const T* pend = pcurr + len;
T cnt0, cnt1;
cnt0 = cnt1 = 0;
unsigned is_set = (*buf & 1);
if (*pcurr == 0)
{
if (is_set)
{
arr1[cnt1] = *pcurr;
++cnt1;
}
else
{
arr0[cnt0] = *pcurr;
++cnt0;
}
}
T prev = *pcurr;
++pcurr;
while (pcurr <= pend)
{
is_set ^= 1;
T delta = *pcurr - prev;
if (delta == 1)
{
if (is_set)
{
arr1[cnt1] = prev;
++cnt1;
}
else
{
arr0[cnt0] = prev;
++cnt0;
}
}
prev = *pcurr++;
}
arr0_cnt = cnt0;
arr1_cnt = cnt1;
}
template<typename T>
unsigned gap_bit_count(const T* buf, unsigned dsize=0) BMNOEXCEPT
{
const T* pcurr = buf;
if (dsize == 0)
dsize = (*pcurr >> 3);
const T* pend = pcurr + dsize;
unsigned bits_counter = 0;
++pcurr;
if (*buf & 1)
{
bits_counter += *pcurr + 1;
++pcurr;
}
for (++pcurr; pcurr <= pend; pcurr += 2)
bits_counter += *pcurr - *(pcurr-1);
return bits_counter;
}
template<typename T>
unsigned gap_bit_count_unr(const T* buf) BMNOEXCEPT
{
const T* pcurr = buf;
unsigned dsize = (*pcurr >> 3);
unsigned cnt = 0;
pcurr = buf + 1; T first_one = *buf & 1;
if (first_one)
{
cnt += *pcurr + 1;
++pcurr;
}
++pcurr;
#if defined(BMAVX2OPT) || defined(BMAVX512OPT)
if (dsize > 34)
{
const unsigned unr_factor = 32;
unsigned waves = (dsize-2) / unr_factor;
pcurr = avx2_gap_sum_arr(pcurr, waves, &cnt);
}
#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
if (dsize > 18)
{
const unsigned unr_factor = 16;
unsigned waves = (dsize - 2) / unr_factor;
pcurr = sse2_gap_sum_arr(pcurr, waves, &cnt);
}
#else
if (dsize > 10)
{
const unsigned unr_factor = 8;
unsigned waves = (dsize - 2) / unr_factor;
for (unsigned i = 0; i < waves; i += unr_factor)
{
cnt += pcurr[0] - pcurr[0 - 1];
cnt += pcurr[2] - pcurr[2 - 1];
cnt += pcurr[4] - pcurr[4 - 1];
cnt += pcurr[6] - pcurr[6 - 1];
pcurr += unr_factor;
} }
#endif
const T* pend = buf + dsize;
for ( ; pcurr <= pend ; pcurr+=2)
{
cnt += *pcurr - *(pcurr - 1);
}
BM_ASSERT(cnt == bm::gap_bit_count(buf));
return cnt;
}
template<typename T>
unsigned gap_bit_count_range(const T* const buf,
unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right < bm::gap_max_bits);
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
unsigned bits_counter = 0;
unsigned is_set;
unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
is_set = ~(is_set - 1u);
pcurr = buf + start_pos;
if (right <= *pcurr) {
bits_counter = unsigned(right - left + 1u) & is_set; return bits_counter;
}
bits_counter += unsigned(*pcurr - left + 1u) & is_set;
unsigned prev_gap = *pcurr++;
for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
{
bits_counter += (*pcurr - prev_gap) & is_set;
if (pcurr == pend)
return bits_counter;
prev_gap = *pcurr++;
}
bits_counter += unsigned(right - prev_gap) & is_set;
return bits_counter;
}
template<typename T>
bool gap_is_all_one_range(const T* const BMRESTRICT buf,
unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right < bm::gap_max_bits);
unsigned is_set;
unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
if (!is_set) return false;
const T* const pcurr = buf + start_pos;
return (right <= *pcurr);
}
template<typename T>
bool gap_any_range(const T* const BMRESTRICT buf,
unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right < bm::gap_max_bits);
unsigned is_set;
unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
const T* const pcurr = buf + start_pos;
if (!is_set) {
if (right <= *pcurr) return false; return true;
}
return true;
}
template<typename T>
bool gap_is_interval(const T* const BMRESTRICT buf,
unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(left > 0); BM_ASSERT(right < bm::gap_max_bits-1);
unsigned is_set;
unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
const T* pcurr = buf + start_pos;
if (!is_set || (right != *pcurr) || (start_pos <= 1))
return false;
--pcurr;
if (*pcurr != left-1)
return false;
return true;
}
template<typename T>
bool gap_find_interval_end(const T* const BMRESTRICT buf,
unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(pos);
BM_ASSERT(nbit < bm::gap_max_bits);
unsigned is_set;
unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
if (!is_set)
return false;
*pos = buf[start_pos];
return true;
}
template<typename T>
bool gap_find_interval_start(const T* const BMRESTRICT buf,
unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(pos);
BM_ASSERT(nbit < bm::gap_max_bits);
unsigned is_set;
unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
if (!is_set)
return false;
--start_pos;
if (!start_pos)
*pos = 0;
else
*pos = buf[start_pos]+1;
return true;
}
template<typename T>
bool gap_find_prev(const T* const BMRESTRICT buf,
unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(pos);
BM_ASSERT(nbit < bm::gap_max_bits);
unsigned is_set;
unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
if (is_set)
{
*pos = nbit;
return true;
}
--start_pos;
if (!start_pos)
return false;
else
*pos = buf[start_pos];
return true;
}
template<typename T, typename SIZE_TYPE>
SIZE_TYPE gap_find_rank(const T* const block,
SIZE_TYPE rank,
unsigned nbit_from,
unsigned& nbit_pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(rank);
const T* pcurr = block;
const T* pend = pcurr + (*pcurr >> 3);
unsigned bits_counter = 0;
unsigned is_set;
unsigned start_pos = bm::gap_bfind(block, nbit_from, &is_set);
is_set = ~(is_set - 1u);
pcurr = block + start_pos;
bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
if (bits_counter >= rank) {
nbit_pos = nbit_from + unsigned(rank) - 1u;
return 0;
}
rank -= bits_counter;
unsigned prev_gap = *pcurr++;
for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
{
bits_counter = (*pcurr - prev_gap) & is_set;
if (bits_counter >= rank) {
nbit_pos = prev_gap + unsigned(rank);
return 0;
}
rank -= bits_counter;
prev_gap = *pcurr++;
}
return rank;
}
template<typename T>
unsigned gap_bit_count_to(const T* const buf, T right,
bool is_corrected=false) BMNOEXCEPT
{
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
unsigned bits_counter = 0;
unsigned is_set = ~((unsigned(*buf) & 1u) - 1u); BM_ASSERT(is_set == 0u || is_set == ~0u);
pcurr = buf + 1;
if (right <= *pcurr) {
bits_counter = (right + 1u) & is_set; bits_counter -= (is_set & unsigned(is_corrected));
return bits_counter;
}
bits_counter += (*pcurr + 1u) & is_set;
unsigned prev_gap = *pcurr++;
for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
{
bits_counter += (*pcurr - prev_gap) & is_set;
if (pcurr == pend)
{
bits_counter -= (is_set & unsigned(is_corrected));
return bits_counter;
}
prev_gap = *pcurr++;
}
bits_counter += (right - prev_gap) & is_set;
bits_counter -= (is_set & unsigned(is_corrected));
return bits_counter;
}
template<class T, class Func>
void for_each_dgap(const T* gap_buf, Func& func)
{
const T* pcurr = gap_buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
T prev = *pcurr;
func((T)(prev + 1)); ++pcurr;
do
{
func((T)(*pcurr - prev)); prev = *pcurr;
} while (++pcurr < pend);
}
template<typename T> struct d_copy_func
{
d_copy_func(T* dg_buf) : dgap_buf_(dg_buf) {}
void operator()(T dgap) { *dgap_buf_++ = dgap; }
T* dgap_buf_;
};
template<typename T>
T* gap_2_dgap(const T* BMRESTRICT gap_buf,
T* BMRESTRICT dgap_buf, bool copy_head=true) BMNOEXCEPT
{
if (copy_head) {
*dgap_buf++ = *gap_buf;
}
d_copy_func<T> copy_func(dgap_buf);
for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
return copy_func.dgap_buf_;
}
template<typename T>
void dgap_2_gap(const T* BMRESTRICT dgap_buf,
T* BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT
{
const T* pcurr = dgap_buf;
unsigned len;
if (!gap_header) {
len = *pcurr >> 3;
*gap_buf++ = *pcurr++; }
else {
len = gap_header >> 3;
*gap_buf++ = gap_header; }
--len; const T* pend = pcurr + len;
*gap_buf = *pcurr++; if (*gap_buf == 0)
*gap_buf = 65535; else
*gap_buf = T(*gap_buf - 1);
for (++gap_buf; pcurr < pend; ++pcurr)
{
T prev = *(gap_buf-1); *gap_buf++ = T(*pcurr + prev);
}
*gap_buf = 65535; }
template<typename T>
int gapcmp(const T* buf1, const T* buf2) BMNOEXCEPT
{
const T* pcurr1 = buf1;
const T* pend1 = pcurr1 + (*pcurr1 >> 3);
unsigned bitval1 = *buf1 & 1;
++pcurr1;
const T* pcurr2 = buf2;
unsigned bitval2 = *buf2 & 1;
++pcurr2;
while (pcurr1 <= pend1)
{
if (*pcurr1 == *pcurr2)
{
if (bitval1 != bitval2)
{
return (bitval1) ? 1 : -1;
}
}
else
{
if (bitval1 == bitval2)
{
if (bitval1)
{
return (*pcurr1 < *pcurr2) ? -1 : 1;
}
else
{
return (*pcurr1 < *pcurr2) ? 1 : -1;
}
}
else
{
return (bitval1) ? 1 : -1;
}
}
++pcurr1; ++pcurr2;
bitval1 ^= 1;
bitval2 ^= 1;
}
return 0;
}
template<typename T>
bool gap_find_first_diff(const T* BMRESTRICT buf1,
const T* BMRESTRICT buf2,
unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(buf1 && buf2 && pos);
const T* pcurr1 = buf1;
const T* pend1 = pcurr1 + (*pcurr1 >> 3);
const T* pcurr2 = buf2;
for (++pcurr1, ++pcurr2; pcurr1 <= pend1; ++pcurr1, ++pcurr2)
{
if (*pcurr1 != *pcurr2)
{
*pos = 1 + ((*pcurr1 < *pcurr2) ? *pcurr1 : *pcurr2);
return true;
}
} return false;
}
template<typename T, class F>
void gap_buff_op(T* BMRESTRICT dest,
const T* BMRESTRICT vect1,
unsigned vect1_mask,
const T* BMRESTRICT vect2,
unsigned vect2_mask,
unsigned& dlen) BMNOEXCEPT2
{
const T* cur1 = vect1;
const T* cur2 = vect2;
T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
T bitval = (T) F::op(bitval1, bitval2);
T bitval_prev = bitval;
T* res = dest;
*res = bitval;
++res;
T c1 = *cur1; T c2 = *cur2;
while (1)
{
bitval = (T) F::op(bitval1, bitval2);
res += (bitval != bitval_prev);
bitval_prev = bitval;
if (c1 < c2) {
*res = c1;
++cur1; c1 = *cur1;
bitval1 ^= 1;
}
else {
*res = c2;
if (c2 < c1) {
bitval2 ^= 1;
}
else {
if (c2 == (bm::gap_max_bits - 1))
break;
++cur1; c1 = *cur1;
bitval1 ^= 1; bitval2 ^= 1;
}
++cur2; c2 = *cur2;
}
}
dlen = (unsigned)(res - dest);
*dest = (T)((*dest & 7) + (dlen << 3));
}
template<typename T, class F>
unsigned gap_buff_any_op(const T* BMRESTRICT vect1,
unsigned vect1_mask,
const T* BMRESTRICT vect2,
unsigned vect2_mask) BMNOEXCEPT2
{
const T* cur1 = vect1;
const T* cur2 = vect2;
unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
unsigned bitval = F::op(bitval1, bitval2);
if (bitval)
return bitval;
unsigned bitval_prev = bitval;
while (1)
{
bitval = F::op(bitval1, bitval2);
if (bitval)
return bitval;
if (bitval != bitval_prev)
bitval_prev = bitval;
if (*cur1 < *cur2)
{
++cur1;
bitval1 ^= 1;
}
else {
if (*cur2 < *cur1)
{
bitval2 ^= 1;
}
else {
if (*cur2 == (bm::gap_max_bits - 1))
{
break;
}
++cur1;
bitval1 ^= 1; bitval2 ^= 1;
}
++cur2;
}
}
return 0;
}
template<typename T, class F>
unsigned gap_buff_count_op(const T* vect1, const T* vect2) BMNOEXCEPT2
{
unsigned count; const T* cur1 = vect1;
const T* cur2 = vect2;
unsigned bitval1 = (*cur1++ & 1);
unsigned bitval2 = (*cur2++ & 1);
unsigned bitval = count = F::op(bitval1, bitval2);
unsigned bitval_prev = bitval;
T res, res_prev;
res = res_prev = 0;
while (1)
{
bitval = F::op(bitval1, bitval2);
if (bitval != bitval_prev)
{
bitval_prev = bitval;
res_prev = res;
}
if (*cur1 < *cur2)
{
res = *cur1;
if (bitval)
{
count += res - res_prev;
res_prev = res;
}
++cur1; bitval1 ^= 1;
}
else {
res = *cur2;
if (bitval)
{
count += res - res_prev;
res_prev = res;
}
if (*cur2 < *cur1)
{
bitval2 ^= 1;
}
else {
if (*cur2 == (bm::gap_max_bits - 1))
break;
++cur1;
bitval1 ^= 1; bitval2 ^= 1;
}
++cur2;
}
}
return count;
}
#ifdef __GNUG__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
template<typename T>
unsigned gap_set_value(unsigned val,
T* BMRESTRICT buf,
unsigned pos,
unsigned* BMRESTRICT is_set) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
unsigned curr = bm::gap_bfind(buf, pos, is_set);
T end = (T)(*buf >> 3);
if (*is_set == val)
{
*is_set = 0;
return end;
}
*is_set = 1;
T* pcurr = buf + curr;
T* pprev = pcurr - 1;
T* pend = buf + end;
if (!pos)
{
*buf ^= 1;
if (buf[1]) {
::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
buf[1] = 0;
++end;
}
else {
pprev = buf + 1; pcurr = pprev + 1;
goto copy_gaps;
}
}
else
if (curr > 1 && ((unsigned)(*pprev))+1 == pos) {
++(*pprev);
if (*pprev == *pcurr) {
--end;
if (pcurr != pend) {
++pcurr;
copy_gaps:
--end;
do { *pprev++ = *pcurr++; } while (pcurr < pend);
}
}
}
else
if (*pcurr == pos) {
--(*pcurr);
end += (pcurr == pend);
}
else {
if (*pcurr != bm::gap_max_bits-1) ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
end += 2;
pcurr[0] = (T)(pos-1);
pcurr[1] = (T)pos;
}
*buf = (T)((*buf & 7) + (end << 3));
buf[end] = bm::gap_max_bits-1;
return end;
}
template<typename T>
unsigned gap_set_value(unsigned val,
T* BMRESTRICT buf,
unsigned pos) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
unsigned is_set;
unsigned curr = bm::gap_bfind(buf, pos, &is_set);
T end = (T)(*buf >> 3);
if (is_set == val)
return end;
T* pcurr = buf + curr;
T* pprev = pcurr - 1;
T* pend = buf + end;
if (!pos)
{
*buf ^= 1;
if (buf[1]) {
::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
buf[1] = 0;
++end;
}
else {
pprev = buf + 1; pcurr = pprev + 1;
goto copy_gaps;
}
}
else
if (curr > 1 && ((unsigned)(*pprev))+1 == pos) {
++(*pprev);
if (*pprev == *pcurr) {
--end;
if (pcurr != pend) {
++pcurr;
copy_gaps:
--end;
do { *pprev++ = *pcurr++; } while (pcurr < pend);
}
}
}
else
if (*pcurr == pos) {
--(*pcurr);
end += (pcurr == pend);
}
else {
if (*pcurr != bm::gap_max_bits-1) ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
end += 2;
pcurr[0] = (T)(pos-1);
pcurr[1] = (T)pos;
}
*buf = (T)((*buf & 7) + (end << 3));
buf[end] = bm::gap_max_bits-1;
return end;
}
template<typename T>
unsigned gap_add_value(T* buf, unsigned pos) BMNOEXCEPT
{
BM_ASSERT(pos < bm::gap_max_bits);
T end = (T)(*buf >> 3);
T curr = end;
T* pcurr = buf + end;
T* pend = pcurr;
T* pprev = pcurr - 1;
if (!pos)
{
*buf ^= 1;
if ( buf[1] ) {
::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
buf[1] = 0;
++end;
}
else {
pprev = buf + 1; pcurr = pprev + 1;
--end;
do { *pprev++ = *pcurr++; } while (pcurr < pend);
}
}
else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) {
++(*pprev);
if (*pprev == *pcurr) {
--end;
BM_ASSERT(pcurr == pend);
}
}
else if (*pcurr == pos) {
--(*pcurr);
end += (pcurr == pend);
}
else {
pcurr[0] = (T)(pos-1);
pcurr[1] = (T)pos;
end = (T)(end+2);
}
*buf = (T)((*buf & 7) + (end << 3));
buf[end] = bm::gap_max_bits - 1;
return end;
}
#ifdef __GNUG__
#pragma GCC diagnostic pop
#endif
template<typename T>
bool gap_shift_r1(T* BMRESTRICT buf,
unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
{
BM_ASSERT(new_len);
bool co;
{
unsigned bitval = *buf & 1;
if (buf[1] == bm::gap_max_bits-1) {
co = bitval;
}
else
{
unsigned len = (*buf >> 3);
unsigned i = 1;
for (; i < len; ++i)
{
buf[i]++;
bitval ^= 1;
} BM_ASSERT(buf[i] == bm::gap_max_bits-1);
if (buf[i-1] == bm::gap_max_bits-1) {
--len;
*buf = (T)((*buf & 7) + (len << 3));
}
co = bitval;
}
}
{
unsigned is_set;
*new_len = bm::gap_set_value(co_flag, buf, 0, &is_set);
}
return co;
}
template<typename T>
bool gap_shift_l1(T* BMRESTRICT buf,
unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
{
BM_ASSERT(new_len);
unsigned is_set;
unsigned bitval = *buf & 1;
bool co0 = bitval;
if (!buf[1]) {
bitval ^= 1;
*new_len = bm::gap_set_value(bitval, buf, 0, &is_set);
BM_ASSERT(is_set);
BM_ASSERT(buf[1]);
BM_ASSERT(bitval == unsigned(*buf & 1u));
if (*new_len == 1)
{
*new_len = bm::gap_set_value(co_flag, buf,
bm::gap_max_bits-1, &is_set);
return co0;
}
}
if (buf[1] != bm::gap_max_bits-1) {
BM_ASSERT(buf[1]);
unsigned len = (*buf >> 3);
unsigned i = 1;
for (; i < len; ++i)
{
buf[i]--;
bitval ^= 1;
} BM_ASSERT(buf[i] == bm::gap_max_bits-1);
}
*new_len = bm::gap_set_value(co_flag, buf, bm::gap_max_bits-1, &is_set);
return co0;
}
template<typename T>
unsigned gap_set_array(T* buf, const T* arr, unsigned len) BMNOEXCEPT
{
*buf = (T)((*buf & 6u) + (1u << 3));
T* pcurr = buf + 1;
unsigned i = 0;
T curr = arr[i];
if (curr != 0) {
*pcurr = (T)(curr - 1);
++pcurr;
}
else
{
++(*buf); }
T prev = curr;
T acc = prev;
for (i = 1; i < len; ++i)
{
curr = arr[i];
if (curr == prev + 1)
{
++acc;
prev = curr;
}
else
{
*pcurr++ = acc;
acc = curr;
*pcurr++ = (T)(curr-1);
}
prev = curr;
}
*pcurr = acc;
if (acc != bm::gap_max_bits - 1)
{
++pcurr;
*pcurr = bm::gap_max_bits - 1;
}
unsigned gap_len = unsigned(pcurr - buf);
BM_ASSERT(gap_len == ((gap_len << 3) >> 3));
*buf = (T)((*buf & 7) + (gap_len << 3));
return gap_len+1;
}
template<typename T>
unsigned bit_array_compute_gaps(const T* arr, unsigned len) BMNOEXCEPT
{
unsigned gap_count = 1;
T prev = arr[0];
if (prev > 0)
++gap_count;
for (unsigned i = 1; i < len; ++i)
{
T curr = arr[i];
if (curr != prev + 1)
{
gap_count += 2;
}
prev = curr;
}
return gap_count;
}
template<typename T>
unsigned gap_block_find(const T* BMRESTRICT buf,
unsigned nbit,
bm::id_t* BMRESTRICT prev) BMNOEXCEPT
{
BM_ASSERT(nbit < bm::gap_max_bits);
unsigned bitval;
unsigned gap_idx = bm::gap_bfind(buf, nbit, &bitval);
if (bitval) {
*prev = nbit;
return 1u;
}
unsigned val = buf[gap_idx] + 1;
*prev = val;
return (val != bm::gap_max_bits); }
BMFORCEINLINE
void set_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
dest[nword] |= unsigned(1u << nbit);
}
BMFORCEINLINE
void clear_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
dest[nword] &= ~(unsigned(1u << nbit));
}
BMFORCEINLINE
unsigned test_bit(const unsigned* block, unsigned bitpos) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
return (block[nword] >> nbit) & 1u;
}
inline
void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
{
const unsigned maskFF = ~0u;
dest += unsigned(bitpos >> bm::set_word_shift); bitpos &= bm::set_word_mask;
if (bitcount == 1u) {
*dest |= (1u << bitpos);
return;
}
if (bitpos) {
unsigned mask_r = maskFF << bitpos;
unsigned right_margin = bitpos + bitcount;
if (right_margin < 32)
{
*dest |= (maskFF >> (32 - right_margin)) & mask_r;
return;
}
*dest++ |= mask_r;
bitcount -= 32 - bitpos;
}
for ( ;bitcount >= 64; bitcount-=64, dest+=2)
dest[0] = dest[1] = maskFF;
if (bitcount >= 32)
{
*dest++ = maskFF; bitcount -= 32;
}
if (bitcount)
{
*dest |= maskFF >> (32 - bitcount);
}
}
inline
void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
{
const unsigned maskFF = ~0u;
dest += unsigned(bitpos >> bm::set_word_shift); bitpos &= bm::set_word_mask;
if (bitcount == 1u) {
*dest &= ~(1u << bitpos);
return;
}
if (bitpos) {
unsigned mask_r = maskFF << bitpos;
unsigned right_margin = bitpos + bitcount;
if (right_margin < 32)
{
*dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
return;
}
*dest++ &= ~mask_r;
bitcount -= 32 - bitpos;
}
for ( ;bitcount >= 64; bitcount-=64, dest+=2)
dest[0] = dest[1] = 0u;
if (bitcount >= 32)
{
*dest++ = 0u; bitcount -= 32;
}
if (bitcount)
{
*dest &= ~(maskFF >> (32 - bitcount));
}
}
inline void xor_bit_block(unsigned* dest,
unsigned bitpos,
unsigned bitcount) BMNOEXCEPT
{
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
bm::word_t* word = dest + nword;
if (bitcount == 1) {
*word ^= unsigned(1 << nbit);
return;
}
if (nbit) {
unsigned right_margin = nbit + bitcount;
if (right_margin < 32)
{
unsigned mask_r = bm::mask_r_u32(nbit);
unsigned mask_l = bm::mask_l_u32(right_margin-1);
unsigned mask = mask_r & mask_l;
*word ^= mask;
return;
}
*word ^= bm::mask_r_u32(nbit);
bitcount -= 32 - nbit;
++word;
}
for ( ;bitcount >= 64; bitcount-=64, word+=2)
{
word[0] ^= ~0u; word[1] ^= ~0u;
}
if (bitcount >= 32)
{
*word++ ^= ~0u; bitcount -= 32;
}
if (bitcount)
{
*word ^= bm::mask_l_u32(bitcount-1);
}
}
template<typename T>
void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
const T* pend = pcurr + (*pcurr >> 3);
if (*pcurr & 1) {
bm::sub_bit_block(dest, 0, 1 + pcurr[1]);
++pcurr;
}
for (pcurr += 2; pcurr <= pend; pcurr += 2)
{
BM_ASSERT(*pcurr > pcurr[-1]);
bm::sub_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
}
}
template<typename T>
void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
const T* pend = pcurr + (*pcurr >> 3);
if (*pcurr & 1) {
bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]);
if (!all_zero)
bm::sub_bit_block(dest, 0, pcurr[1] + 1); pcurr += 3;
}
else
pcurr += 2;
{
unsigned tz = bm::count_trailing_zeros_u64(digest0);
unsigned start_pos = tz << set_block_digest_pos_shift;
for (; pcurr <= pend; pcurr += 2) {
if (*pcurr >= start_pos)
break;
}
}
unsigned lz = bm::count_leading_zeros_u64(digest0);
unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
unsigned bc, pos;
T prev;
for (; pcurr <= pend; pcurr += 2) {
BM_ASSERT(*pcurr > *(pcurr-1));
prev = pcurr[-1];
bc = *pcurr - prev;
pos = 1u + prev;
bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
if (!all_zero)
bm::sub_bit_block(dest, pos, bc);
if (pos > stop_pos)
break;
} }
template<typename T>
void gap_xor_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
const T* pend = pcurr + (*pcurr >> 3);
if (*pcurr & 1) {
bm::xor_bit_block(dest, 0, 1 + pcurr[1]);
++pcurr;
}
for (pcurr += 2; pcurr <= pend; pcurr += 2)
{
BM_ASSERT(*pcurr > pcurr[-1]);
bm::xor_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
}
}
template<typename T>
void gap_add_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
const T* pend = pcurr + len;
BM_ASSERT(*pend == 65535);
if (*pcurr & 1) {
bm::or_bit_block(dest, 0, 1 + pcurr[1]);
pcurr += 3;
}
else
pcurr += 2;
unsigned bc, pos;
for (; pcurr <= pend; )
{
BM_ASSERT(*pcurr > pcurr[-1]);
pos = 1u + pcurr[-1];
bc = *pcurr - pcurr[-1];
pcurr += 2;
bm::or_bit_block(dest, pos, bc);
}
}
template<typename T>
void gap_add_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT pcurr) BMNOEXCEPT
{
unsigned len = (*pcurr >> 3);
gap_add_to_bitset(dest, pcurr, len);
}
template<typename T>
void gap_and_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
const T* pend = pcurr + (*pcurr >> 3);
if (!(*pcurr & 1) ) {
bm::sub_bit_block(dest, 0, pcurr[1] + 1); pcurr += 3;
}
else
pcurr += 2;
unsigned bc, pos;
for (; pcurr <= pend; ) {
BM_ASSERT(*pcurr > *(pcurr-1));
pos = 1u + pcurr[-1];
bc = *pcurr - pcurr[-1];
pcurr += 2;
bm::sub_bit_block(dest, pos, bc);
}
}
template<typename T>
void gap_and_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
{
BM_ASSERT(dest && pcurr);
if (!digest0)
return;
const T* pend = pcurr + (*pcurr >> 3);
if (!(*pcurr & 1) ) {
bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]);
if (!all_zero)
bm::sub_bit_block(dest, 0, pcurr[1] + 1); pcurr += 3;
}
else
pcurr += 2;
{
unsigned tz = bm::count_trailing_zeros_u64(digest0);
unsigned start_pos = tz << set_block_digest_pos_shift;
for (; pcurr <= pend; pcurr += 2) {
if (*pcurr >= start_pos)
break;
}
}
unsigned lz = bm::count_leading_zeros_u64(digest0);
unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
unsigned bc, pos;
T prev;
for (; pcurr <= pend; pcurr += 2) {
BM_ASSERT(*pcurr > *(pcurr-1));
prev = pcurr[-1];
bc = *pcurr - prev;
pos = 1u + prev;
bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
if (!all_zero)
bm::sub_bit_block(dest, pos, bc);
if (pos > stop_pos) break;
} }
template<typename T>
bm::id_t gap_bitset_and_count(const unsigned* BMRESTRICT block,
const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pend = pcurr + (*pcurr >> 3);
bm::id_t count = 0;
if (*pcurr & 1) {
count += bm::bit_block_calc_count_range(block, 0, pcurr[1]);
++pcurr;
}
for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
{
count += bm::bit_block_calc_count_range(block, pcurr[-1]+1, *pcurr);
}
return count;
}
template<typename T>
bm::id_t gap_bitset_and_any(const unsigned* BMRESTRICT block,
const T* BMRESTRICT pcurr) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pend = pcurr + (*pcurr >> 3);
bm::id_t count = 0;
if (*pcurr & 1) {
count = bm::bit_block_any_range(block, 0, pcurr[1]);
++pcurr;
}
for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
{
count = bm::bit_block_any_range(block, pcurr[-1]+1, *pcurr);
}
return count;
}
template<typename T>
bm::id_t gap_bitset_sub_count(const unsigned* BMRESTRICT block,
const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
bm::id_t count = 0;
if (!(*buf & 1)) {
count += bit_block_calc_count_range(block, 0, *pcurr);
++pcurr;
}
++pcurr;
for (;pcurr <= pend; pcurr+=2)
{
count += bm::bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr);
}
return count;
}
template<typename T>
bm::id_t gap_bitset_sub_any(const unsigned* BMRESTRICT block,
const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
bm::id_t count = 0;
if (!(*buf & 1)) {
count += bit_block_any_range(block, 0, *pcurr);
if (count)
return count;
++pcurr;
}
++pcurr;
for (; !count && pcurr <= pend; pcurr+=2)
{
count += bm::bit_block_any_range(block, *(pcurr-1)+1, *pcurr);
}
return count;
}
template<typename T>
bm::id_t gap_bitset_xor_count(const unsigned* BMRESTRICT block,
const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
unsigned bitval = *buf & 1;
bm::id_t count = bm::bit_block_calc_count_range(block, 0, *pcurr);
if (bitval)
{
count = *pcurr + 1 - count;
}
for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
{
T prev = (T)(*(pcurr-1)+1);
bm::id_t c = bit_block_calc_count_range(block, prev, *pcurr);
if (bitval) c = (*pcurr - prev + 1) - c;
count += c;
}
return count;
}
template<typename T>
bm::id_t gap_bitset_xor_any(const unsigned* BMRESTRICT block,
const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
unsigned bitval = *buf & 1;
bm::id_t count = bit_block_any_range(block, 0, *pcurr);
if (bitval)
count = *pcurr + 1 - count;
for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
{
T prev = (T)(*(pcurr-1)+1);
bm::id_t c = bit_block_any_range(block, prev, *pcurr);
if (bitval) c = (*pcurr - prev + 1) - c;
count += c;
}
return count;
}
template<typename T>
bm::id_t gap_bitset_or_count(const unsigned* BMRESTRICT block,
const T* BMRESTRICT buf) BMNOEXCEPT
{
BM_ASSERT(block);
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
unsigned bitval = *buf & 1;
bm::id_t count = bitval ? *pcurr + 1
: bm::bit_block_calc_count_range(block, 0, *pcurr);
for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
{
T prev = (T)(*(pcurr-1)+1);
bm::id_t c =
bitval ? (*pcurr - prev + 1)
: bm::bit_block_calc_count_range(block, prev, *pcurr);
count += c;
}
return count;
}
template<typename T>
bm::id_t gap_bitset_or_any(const unsigned* BMRESTRICT block,
const T* BMRESTRICT buf) BMNOEXCEPT
{
bool b = !bm::gap_is_all_zero(buf) ||
!bm::bit_is_all_zero(block);
return b;
}
inline
void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
{
#ifdef BMVECTOPT
VECT_SET_BLOCK(dst, value);
#else
::memset(dst, int(value), bm::set_block_size * sizeof(bm::word_t));
#endif
}
template<typename T>
void gap_convert_to_bitset(unsigned* BMRESTRICT dest,
const T* BMRESTRICT buf,
unsigned len=0) BMNOEXCEPT
{
bm::bit_block_set(dest, 0);
if (!len)
len = bm::gap_length(buf)-1;
bm::gap_add_to_bitset(dest, buf, len);
}
template<typename T>
unsigned* gap_convert_to_bitset_smart(unsigned* BMRESTRICT dest,
const T* BMRESTRICT buf,
id_t set_max) BMNOEXCEPT
{
if (buf[1] == set_max - 1)
return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
bm::gap_convert_to_bitset(dest, buf);
return dest;
}
template<typename T>
unsigned gap_control_sum(const T* buf) BMNOEXCEPT
{
unsigned end = *buf >> 3;
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
if (*buf & 1) {
++pcurr;
}
++pcurr; while (pcurr <= pend)
{
BM_ASSERT(*pcurr > *(pcurr-1));
pcurr += 2;
}
return buf[end];
}
template<class T>
void gap_set_all(T* buf, unsigned set_max, unsigned value) BMNOEXCEPT
{
BM_ASSERT(value == 0 || value == 1);
*buf = (T)((*buf & 6u) + (1u << 3) + value);
*(++buf) = (T)(set_max - 1);
}
template<class T>
void gap_init_range_block(T* buf,
T from,
T to,
T value) BMNOEXCEPT
{
BM_ASSERT(value == 0 || value == 1);
const unsigned set_max = bm::bits_in_block;
unsigned gap_len;
if (from == 0)
{
if (to == set_max - 1)
{
bm::gap_set_all(buf, set_max, value);
}
else
{
gap_len = 2;
buf[1] = (T)to;
buf[2] = (T)(set_max - 1);
buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
}
return;
}
value = !value;
if (to == set_max - 1)
{
gap_len = 2;
buf[1] = (T)(from - 1);
buf[2] = (T)(set_max - 1);
}
else
{
gap_len = 3;
buf[1] = (T) (from - 1);
buf[2] = (T) to;
buf[3] = (T)(set_max - 1);
}
buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
}
template<typename T> void gap_invert(T* buf) BMNOEXCEPT
{
*buf ^= 1;
}
#ifdef __GNUG__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
template<typename T>
void set_gap_level(T* buf, int level) BMNOEXCEPT
{
BM_ASSERT(level >= 0);
BM_ASSERT(unsigned(level) < bm::gap_levels);
*buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
}
#ifdef __GNUG__
#pragma GCC diagnostic pop
#endif
template<typename T>
int gap_calc_level(unsigned len, const T* glevel_len) BMNOEXCEPT
{
if (len <= unsigned(glevel_len[0]-4)) return 0;
if (len <= unsigned(glevel_len[1]-4)) return 1;
if (len <= unsigned(glevel_len[2]-4)) return 2;
if (len <= unsigned(glevel_len[3]-4)) return 3;
BM_ASSERT(bm::gap_levels == 4);
return -1;
}
template<typename T>
inline unsigned gap_free_elements(const T* BMRESTRICT buf,
const T* BMRESTRICT glevel_len) BMNOEXCEPT
{
unsigned len = bm::gap_length(buf);
unsigned capacity = bm::gap_capacity(buf, glevel_len);
return capacity - len;
}
template<typename T>
int bitcmp(const T* buf1, const T* buf2, unsigned len) BMNOEXCEPT
{
BM_ASSERT(len);
const T* pend1 = buf1 + len;
do
{
T w1 = *buf1++;
T w2 = *buf2++;
T diff = w1 ^ w2;
if (diff)
return (w1 & diff & -diff) ? 1 : -1;
} while (buf1 < pend1);
return 0;
}
inline
bool bit_find_first_diff(const bm::word_t* BMRESTRICT blk1,
const bm::word_t* BMRESTRICT blk2,
unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(blk1 && blk2 && pos);
#ifdef VECT_BIT_FIND_DIFF
bool f = VECT_BIT_FIND_DIFF(blk1, blk2, pos);
return f;
#else
#ifdef BM64OPT
BM_ASSERT(sizeof(bm::wordop_t) == 8);
const bm::bit_block_t::bunion_t* BMRESTRICT b1_u =
(const bm::bit_block_t::bunion_t*)(blk1);
const bm::bit_block_t::bunion_t* BMRESTRICT b2_u =
(const bm::bit_block_t::bunion_t*)(blk2);
for (unsigned i = 0; i < bm::set_block_size/2; ++i)
{
bm::wordop_t w1 = b1_u->w64[i];
bm::wordop_t w2 = b2_u->w64[i];
bm::wordop_t diff = w1 ^ w2;
if (diff)
{
unsigned idx = bm::count_trailing_zeros_u64(diff);
*pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::wordop_t))));
return true;
}
} #else
for (unsigned i = 0; i < bm::set_block_size; ++i)
{
bm::word_t w1 = blk1[i]; bm::word_t w2 = blk2[i];
bm::word_t diff = w1 ^ w2;
if (diff)
{
unsigned idx = bm::bit_scan_forward32(diff); *pos = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
return true;
}
} #endif
#endif
return false;
}
inline
unsigned bit_block_to_gap(gap_word_t* BMRESTRICT dest,
const unsigned* BMRESTRICT block,
unsigned dest_len) BMNOEXCEPT
{
const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
gap_word_t* BMRESTRICT pcurr = dest;
gap_word_t* BMRESTRICT end = dest + dest_len; (void)end;
unsigned bitval = (*block) & 1u;
*pcurr++ = bm::gap_word_t(bitval);
*pcurr = 0;
unsigned bit_idx = 0;
do
{
unsigned val = *block;
while (!val || val == ~0u)
{
if (bitval != unsigned(bool(val)))
{
*pcurr++ = (gap_word_t)(bit_idx-1);
bitval ^= 1u;
BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
BM_ASSERT(pcurr != end);
}
bit_idx += unsigned(sizeof(*block) * 8);
if (++block >= block_end)
goto complete;
val = *block;
}
unsigned bits_consumed = 0;
do
{
unsigned tz = 1u;
if (bitval != (val & 1u))
{
*pcurr++ = (gap_word_t)(bit_idx-1);
bitval ^= 1u;
BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
BM_ASSERT(pcurr != end);
}
else {
tz = bm::bit_scan_forward32(bitval ? ~val : val);
}
bits_consumed += tz;
bit_idx += tz;
val >>= tz;
if (!val)
{
if (bits_consumed < 32u)
{
*pcurr++ = (gap_word_t)(bit_idx-1);
bitval ^= 1u;
bit_idx += 32u - bits_consumed;
BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
BM_ASSERT(pcurr != end);
}
break;
}
} while (1);
} while(++block < block_end);
complete:
*pcurr = (gap_word_t)(bit_idx-1);
unsigned len = (unsigned)(pcurr - dest);
*dest = (gap_word_t)((*dest & 7) + (len << 3));
return len;
}
inline
unsigned bit_to_gap(gap_word_t* BMRESTRICT dest,
const unsigned* BMRESTRICT block,
unsigned dest_len) BMNOEXCEPT
{
#if defined(VECT_BIT_TO_GAP)
return VECT_BIT_TO_GAP(dest, block, dest_len);
#else
return bm::bit_block_to_gap(dest, block, dest_len);
#endif
}
template<class T, class F>
void for_each_gap_dbit(const T* buf, F& func)
{
const T* pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
++pcurr;
unsigned prev = 0;
unsigned first_inc;
if (*buf & 1)
{
first_inc = 0;
unsigned to = *pcurr;
for (unsigned i = 0; i <= to; ++i)
{
func(1);
}
prev = to;
++pcurr;
}
else
{
first_inc = 1;
}
++pcurr;
while (pcurr <= pend)
{
unsigned from = *(pcurr-1)+1;
unsigned to = *pcurr;
if (first_inc)
{
func(from - prev + first_inc);
first_inc = 0;
}
else
{
func(from - prev);
}
for (unsigned i = from+1; i <= to; ++i)
{
func(1);
}
prev = to;
pcurr += 2; }
}
template<typename D, typename T>
D gap_convert_to_arr(D* BMRESTRICT dest,
const T* BMRESTRICT buf,
unsigned dest_len,
bool invert = false) BMNOEXCEPT
{
const T* BMRESTRICT pcurr = buf;
const T* pend = pcurr + (*pcurr >> 3);
D* BMRESTRICT dest_curr = dest;
++pcurr;
int bitval = (*buf) & 1;
if (invert)
bitval = !bitval;
if (bitval)
{
if (unsigned(*pcurr + 1) >= dest_len)
return 0; dest_len -= *pcurr;
T to = *pcurr;
for (T i = 0; ;++i)
{
*dest_curr++ = i;
if (i == to) break;
}
++pcurr;
}
++pcurr;
while (pcurr <= pend)
{
unsigned pending = *pcurr - *(pcurr-1);
if (pending >= dest_len)
return 0;
dest_len -= pending;
T from = (T)(*(pcurr-1)+1);
T to = *pcurr;
for (T i = from; ;++i)
{
*dest_curr++ = i;
if (i == to) break;
}
pcurr += 2; }
return (D) (dest_curr - dest);
}
BMFORCEINLINE
unsigned bit_count_min_unroll(const bm::word_t* BMRESTRICT block,
const bm::word_t* BMRESTRICT block_end) BMNOEXCEPT
{
unsigned count = 0;
#ifdef BM64OPT__
do
{
const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
(const bm::bit_block_t::bunion_t*)(block);
bm::id64_t x = src_u->w64[0]; bm::id64_t y = src_u->w64[1];
bm::id64_t u = src_u->w64[2]; bm::id64_t v = src_u->w64[3];
#if defined(BM_USE_GCC_BUILD)
count += unsigned(__builtin_popcountll(x) + __builtin_popcountll(y)
+ __builtin_popcountll(u) + __builtin_popcountll(v));
#else
if (x | y | u | v)
{
unsigned c = bitcount64_4way(x, y, u, v);
BM_ASSERT(c);
count += c;
}
#endif
block += 8;
} while (block < block_end);
#else
do
{
unsigned c1= bm::word_bitcount(*block);
unsigned c2 = bm::word_bitcount(block[1]);
count += c1 + c2;
c1= bm::word_bitcount(block[2]);
c2 = bm::word_bitcount(block[3]);
count += c1 + c2;
block+=4;
} while (block < block_end);
#endif
return count;
}
inline
bm::id_t bit_block_count(const bm::word_t* block) BMNOEXCEPT
{
const bm::word_t* block_end = block + bm::set_block_size;
#ifdef BMVECTOPT
return VECT_BITCOUNT(block, block_end);
#else
return bm::bit_count_min_unroll(block, block_end);
#endif
}
inline
bm::id_t bit_block_count(const bm::word_t* BMRESTRICT const block,
bm::id64_t digest) BMNOEXCEPT
{
#ifdef VECT_BIT_COUNT_DIGEST
return VECT_BIT_COUNT_DIGEST(block, digest);
#else
bm::id_t count = 0;
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#ifdef BM64OPT
const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
(const bm::bit_block_t::bunion_t*)(&block[off]);
unsigned j = 0;
do
{
bm::id64_t x = src_u->w64[0+j]; bm::id64_t y = src_u->w64[1+j];
bm::id64_t u = src_u->w64[2+j]; bm::id64_t v = src_u->w64[3+j];
#if defined(BM_USE_GCC_BUILD)
count += unsigned(__builtin_popcountll(x) + __builtin_popcountll(y)
+ __builtin_popcountll(u) + __builtin_popcountll(v));
#else
if (x | y | u | v)
{
unsigned c = bm::bitcount64_4way(x, y, u, v);
BM_ASSERT(c);
count += c;
}
#endif
j += 4;
} while (j < bm::set_block_digest_wave_size/2);
#else
const bm::word_t* BMRESTRICT blk = &block[off];
const bm::word_t* BMRESTRICT blk_end = &block[off+bm::set_block_digest_wave_size];
do
{
unsigned c1= bm::word_bitcount(*blk);
unsigned c2 = bm::word_bitcount(blk[1]);
count += c1 + c2;
c1= bm::word_bitcount(blk[2]);
c2 = bm::word_bitcount(blk[3]);
count += c1 + c2;
blk+=4;
} while (blk < blk_end);
#endif
d = bm::bmi_bslr_u64(d); } return count;
#endif
}
inline
bm::id_t bit_count_change(bm::word_t w) BMNOEXCEPT
{
unsigned count = 1;
w ^= (w >> 1);
count += bm::word_bitcount(w);
count -= (w >> ((sizeof(w) * 8) - 1));
return count;
}
inline
unsigned bit_block_change32(const bm::word_t* BMRESTRICT block,
unsigned size) BMNOEXCEPT
{
unsigned gap_count = 1;
bm::word_t w, w0, w_prev, w_l;
w = w0 = *block;
const int w_shift = int(sizeof(w) * 8 - 1);
w ^= (w >> 1);
BM_INCWORD_BITCOUNT(gap_count, w);
gap_count -= (w_prev = (w0 >> w_shift));
const bm::word_t* block_end = block + size;
for (++block; block < block_end; ++block)
{
w = w0 = *block;
++gap_count;
if (!w)
{
gap_count -= !w_prev;
w_prev = 0;
}
else
{
w ^= (w >> 1);
BM_INCWORD_BITCOUNT(gap_count, w);
w_l = w0 & 1;
gap_count -= (w0 >> w_shift); gap_count -= !(w_prev ^ w_l);
w_prev = (w0 >> w_shift);
}
} return gap_count;
}
inline
unsigned bit_block_change64(const bm::word_t* BMRESTRICT in_block,
unsigned size) BMNOEXCEPT
{
unsigned gap_count = 1;
const bm::id64_t* BMRESTRICT block = (const bm::id64_t*) in_block;
bm::id64_t w, w0, w_prev, w_l;
w = w0 = *block;
const int w_shift = int(sizeof(w) * 8 - 1);
w ^= (w >> 1);
gap_count += bm::word_bitcount64(w);
gap_count -= unsigned(w_prev = (w0 >> w_shift));
const bm::id64_t* block_end = block + (size/2);
for (++block; block < block_end; ++block)
{
w = w0 = *block;
++gap_count;
if (!w)
{
gap_count -= !w_prev;
w_prev = 0;
}
else
{
w ^= (w >> 1);
gap_count += bm::word_bitcount64(w);
w_l = w0 & 1;
gap_count -= unsigned(w0 >> w_shift); gap_count -= !(w_prev ^ w_l); w_prev = (w0 >> w_shift);
}
} return gap_count;
}
inline
void bit_block_change_bc(const bm::word_t* BMRESTRICT block,
unsigned* BMRESTRICT gc, unsigned* BMRESTRICT bc) BMNOEXCEPT
{
BM_ASSERT(gc);
BM_ASSERT(bc);
#ifdef VECT_BLOCK_CHANGE_BC
VECT_BLOCK_CHANGE_BC(block, gc, bc);
#else
#ifdef BM64OPT
*gc = bm::bit_block_change64(block, bm::set_block_size);
#else
*gc = bm::bit_block_change32(block, bm::set_block_size);
#endif
*bc = bm::bit_block_count(block);
#endif
}
inline
unsigned bit_block_calc_change(const bm::word_t* block) BMNOEXCEPT
{
#if defined(VECT_BLOCK_CHANGE)
return VECT_BLOCK_CHANGE(block, bm::set_block_size);
#else
#ifdef BM64OPT
return bm::bit_block_change64(block, bm::set_block_size);
#else
return bm::bit_block_change32(block, bm::set_block_size);
#endif
#endif
}
inline
bool bit_block_is_all_one_range(const bm::word_t* const BMRESTRICT block,
bm::word_t left,
bm::word_t right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right <= bm::gap_max_bits-1);
unsigned nword, nbit, bitcount, temp;
nbit = left & bm::set_word_mask;
const bm::word_t* word =
block + (nword = unsigned(left >> bm::set_word_shift));
if (left == right) return (*word >> nbit) & 1u;
if (nbit) {
unsigned right_margin = nbit + right - left;
if (right_margin < 32)
{
unsigned mask_r = bm::mask_r_u32(nbit);
unsigned mask_l = bm::mask_l_u32(right_margin);
unsigned mask = mask_r & mask_l;
return mask == (*word & mask);
}
unsigned mask_r = bm::mask_r_u32(nbit);
temp = *word & mask_r;
if (temp != mask_r)
return false;
bitcount = (right - left + 1u) - (32 - nbit);
++word;
}
else
{
bitcount = right - left + 1u;
}
const bm::word_t maskFF = ~0u;
#if defined(BM64OPT) || defined(BM64_SSE4) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
const bm::id64_t maskFF64 = ~0ull;
for ( ;bitcount >= 128; bitcount-=128, word+=4)
{
bm::id64_t w64_0 = bm::id64_t(word[0]) + (bm::id64_t(word[1]) << 32);
bm::id64_t w64_1 = bm::id64_t(word[2]) + (bm::id64_t(word[3]) << 32);
if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64))
return false;
} #else
for ( ;bitcount >= 128; bitcount-=128, word+=4)
{
bm::word_t m = (word[0] != maskFF) || (word[1] != maskFF) |
(word[2] != maskFF) || (word[3] != maskFF);
if (m)
return false;
} #endif
for ( ;bitcount >= 32; bitcount-=32, ++word)
{
if (*word != maskFF)
return false;
} BM_ASSERT(bitcount < 32);
if (bitcount) {
unsigned mask_l = bm::mask_l_u32(bitcount-1);
temp = *word & mask_l;
if (temp != mask_l)
return false;
}
return true;
}
inline
bm::id_t bit_block_calc_count_range(const bm::word_t* block,
bm::word_t left,
bm::word_t right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right <= bm::gap_max_bits-1);
unsigned nword, nbit, bitcount, count;
nbit = left & bm::set_word_mask;
const bm::word_t* word =
block + (nword = unsigned(left >> bm::set_word_shift));
if (left == right) {
return (*word >> nbit) & 1u;
}
count = 0;
bitcount = right - left + 1u;
if (nbit) {
unsigned right_margin = nbit + right - left;
if (right_margin < 32)
{
unsigned mask_r = bm::mask_r_u32(nbit);
unsigned mask_l = bm::mask_l_u32(right_margin);
unsigned mask = mask_r & mask_l;
return bm::word_bitcount(*word & mask);
}
unsigned mask_r = bm::mask_r_u32(nbit);
count = bm::word_bitcount(*word & mask_r);
bitcount -= 32 - nbit;
++word;
}
#if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
for ( ;bitcount >= 64; bitcount-=64)
{
bm::id64_t w64 = *((bm::id64_t*)word); count += unsigned(_mm_popcnt_u64(w64));
word += 2;
}
if (bitcount >= 32)
{
count += bm::word_bitcount(*word++);
bitcount-=32;
}
#else
for ( ;bitcount >= 32; bitcount-=32, ++word)
count += bm::word_bitcount(*word);
#endif
BM_ASSERT(bitcount < 32);
if (bitcount) {
unsigned mask_l = bm::mask_l_u32(bitcount-1);
count += bm::word_bitcount(*word & mask_l);
}
return count;
}
inline
bm::id_t bit_block_calc_count_to(const bm::word_t* block,
bm::word_t right) BMNOEXCEPT
{
BM_ASSERT(block);
if (!right) return *block & 1u;
bm::id_t count = 0;
unsigned bitcount = right + 1;
#if defined(BMAVX2OPT) || defined(BMAVX512OPT)
BM_AVX2_POPCNT_PROLOG
__m256i cnt = _mm256_setzero_si256();
bm::id64_t* cnt64;
for ( ;bitcount >= 256; bitcount -= 256)
{
const __m256i* src = (__m256i*)block;
__m256i xmm256 = _mm256_load_si256(src);
BM_AVX2_BIT_COUNT(bc, xmm256);
cnt = _mm256_add_epi64(cnt, bc);
block += 8;
} cnt64 = (bm::id64_t*)&cnt;
count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
#endif
for ( ;bitcount >= 64; bitcount -= 64)
{
bm::id64_t* p = (bm::id64_t*)block;
count += bm::word_bitcount64(*p);
block += 2;
}
if (bitcount >= 32)
{
count += bm::word_bitcount(*block++);
bitcount-=32;
}
if (bitcount) {
unsigned mask_l = bm::mask_l_u32(bitcount-1);
count += bm::word_bitcount(*block & mask_l);
}
return count;
}
inline
void bit_block_rotate_left_1(bm::word_t* block) BMNOEXCEPT
{
bm::word_t co_flag = (block[0] >> 31) & 1; for (unsigned i = 0; i < bm::set_block_size-1; ++i)
{
block[i] = (block[i] << 1) | (block[i + 1] >> 31);
}
block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
}
inline
void bit_block_rotate_left_1_unr(bm::word_t* block) BMNOEXCEPT
{
bm::word_t co_flag = (block[0] >> 31) & 1; const unsigned unroll_factor = 4;
bm::word_t w0, w1, w2, w3;
unsigned i;
for (i = 0; i < bm::set_block_size - unroll_factor; i += unroll_factor)
{
w0 = block[i + 1] >> 31;
w1 = block[i + 2] >> 31;
w2 = block[i + 3] >> 31;
w3 = block[i + 4] >> 31;
block[0 + i] = (block[0 + i] << 1) | w0;
block[1 + i] = (block[1 + i] << 1) | w1;
block[2 + i] = (block[2 + i] << 1) | w2;
block[3 + i] = (block[3 + i] << 1) | w3;
}
block[i] = (block[i] << 1) | (block[i + 1] >> 31);
block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
}
inline
bm::word_t bit_block_insert(bm::word_t* BMRESTRICT block,
unsigned bitpos, bool value) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(bitpos < 65536);
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
bm::word_t co_flag = 0;
if (nbit)
{
unsigned mask_r = bm::mask_r_u32(nbit);
bm::word_t w = block[nword] & mask_r;
bm::word_t wl = block[nword] & ~mask_r;
co_flag = w >> 31;
w <<= 1u;
block[nword] = w | (unsigned(value) << nbit) | wl;
++nword;
}
else
{
co_flag = value;
}
for (unsigned i = nword; i < bm::set_block_size; ++i)
{
bm::word_t w = block[i];
bm::word_t co_flag1 = w >> 31;
w = (w << 1u) | co_flag;
block[i] = w;
co_flag = co_flag1;
} return co_flag;
}
inline
bool bit_block_shift_r1(bm::word_t* BMRESTRICT block,
bm::word_t* BMRESTRICT empty_acc,
bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
bm::word_t acc(0);
for (unsigned i = 0; i < bm::set_block_size; ++i)
{
bm::word_t w = block[i];
bm::word_t co_flag1 = w >> 31;
acc |= w = (w << 1u) | co_flag;
block[i] = w;
co_flag = co_flag1;
} *empty_acc = acc;
return co_flag;
}
inline
bool bit_block_shift_r1_unr_min(bm::word_t* BMRESTRICT block,
bm::word_t* BMRESTRICT empty_acc,
bm::id64_t co_flag) BMNOEXCEPT
{
#if defined(BM64OPT)
bm::bit_block_t::bunion_t* BMRESTRICT b_u =
(bm::bit_block_t::bunion_t*)(block);
bm::id64_t acc0(0), acc1(0);
unsigned i = 0;
do
{
bm::id64_t w, co_flag1;
w = b_u->w64[i];
co_flag1 = w >> 63;
acc0 |= w = (w << 1u) | co_flag;
b_u->w64[i++] = w;
co_flag = co_flag1;
w = b_u->w64[i];
co_flag1 = w >> 63;
acc1 |= w = (w << 1u) | co_flag;
b_u->w64[i] = w;
co_flag = co_flag1;
} while(++i < bm::set_block_size/2);
*empty_acc = bool(acc0 | acc1);
return co_flag;
#else
return bm::bit_block_shift_r1(block, empty_acc, unsigned(co_flag));
#endif
}
inline
bool bit_block_shift_r1_unr(bm::word_t* BMRESTRICT block,
bm::word_t* BMRESTRICT empty_acc,
bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
#if defined(VECT_SHIFT_R1)
return VECT_SHIFT_R1(block, empty_acc, co_flag);
#else
return bm::bit_block_shift_r1_unr_min(block, empty_acc, co_flag);
#endif
}
inline
bool bit_block_shift_l1(bm::word_t* block,
bm::word_t* empty_acc, bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
bm::word_t acc = 0;
for (int i = bm::set_block_size-1; i >= 0; --i)
{
bm::word_t w = block[i];
bm::word_t co_flag1 = w & 1u;
acc |= w = (w >> 1u) | (co_flag << 31u);
block[i] = w;
co_flag = co_flag1;
}
*empty_acc = acc;
return co_flag;
}
inline
bool bit_block_shift_l1_unr_min(bm::word_t* BMRESTRICT block,
bm::word_t* BMRESTRICT empty_acc,
unsigned co_flag) BMNOEXCEPT
{
bm::word_t acc = 0;
for (int i = bm::set_block_size-1; i >= 0; i-=2)
{
bm::word_t w0, w1, co_flag1;
w0 = block[i]; w1 = block[i-1];
co_flag1 = w0 & 1u;
acc |= w0 = (w0 >> 1u) | (co_flag << 31u);
block[i] = w0;
co_flag = co_flag1;
co_flag1 = w1 & 1u;
acc |= w1 = (w1 >> 1u) | (co_flag << 31u);
block[i-1] = w1;
co_flag = co_flag1;
i-=2;
w0 = block[i]; w1 = block[i-1];
co_flag1 = w0 & 1u;
acc |= w0 = (w0 >> 1u) | (co_flag << 31u);
block[i] = w0;
co_flag = co_flag1;
co_flag1 = w1 & 1u;
acc |= w1 = (w1 >> 1u) | (co_flag << 31u);
block[i-1] = w1;
co_flag = co_flag1;
} *empty_acc = acc;
return co_flag;
}
inline
bool bit_block_shift_l1_unr(bm::word_t* block,
bm::word_t* empty_acc,
bm::word_t co_flag) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(empty_acc);
#if defined(VECT_SHIFT_L1)
return VECT_SHIFT_L1(block, empty_acc, co_flag);
#else
return bm::bit_block_shift_l1_unr_min(block, empty_acc, co_flag);
#endif
}
inline
void bit_block_erase(bm::word_t* block,
unsigned bitpos,
bool carry_over) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(bitpos < 65536);
if (!bitpos)
{
bm::word_t acc;
bm::bit_block_shift_l1_unr(block, &acc, carry_over);
return;
}
unsigned nbit = unsigned(bitpos & bm::set_block_mask);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
bm::word_t co_flag = carry_over;
for (unsigned i = bm::set_block_size-1; i > nword; --i)
{
bm::word_t w = block[i];
bm::word_t co_flag1 = w & 1u;
w = (w >> 1u) | (co_flag << 31u);
block[i] = w;
co_flag = co_flag1;
}
if (nbit)
{
unsigned mask_r = bm::mask_r_u32(nbit);
bm::word_t w = block[nword] & mask_r;
bm::word_t wl = block[nword] & ~mask_r;
w &= ~(1 << nbit); w >>= 1u;
w |= wl | (co_flag << 31u);
block[nword] = w;
}
else
{
block[nword] = (block[nword] >> 1u) | (co_flag << 31u);
}
}
inline
bool bit_block_shift_r1_and(bm::word_t* BMRESTRICT block,
bm::word_t co_flag,
const bm::word_t* BMRESTRICT mask_block,
bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(mask_block);
BM_ASSERT(digest && *digest);
bm::id64_t d = *digest;
unsigned di = 0;
if (!co_flag)
{
bm::id64_t t = d & -d;
di = bm::word_bitcount64(t - 1); }
for (; di < 64; ++di)
{
const unsigned d_base = di * bm::set_block_digest_wave_size;
bm::id64_t dmask = (1ull << di);
if (d & dmask) {
bm::word_t acc = 0;
for (unsigned i = d_base; i < d_base + bm::set_block_digest_wave_size; ++i)
{
BM_ASSERT(i < bm::set_block_size);
bm::word_t w = block[i];
bm::word_t co_flag1 = w >> 31;
w = (w << 1u) | co_flag;
acc |= block[i] = w & mask_block[i];
co_flag = co_flag1;
}
if (!acc)
d &= ~dmask; }
else {
BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
BM_ASSERT(block[d_base]==0);
if (co_flag) {
BM_ASSERT(co_flag == 1);
BM_ASSERT(block[d_base] == 0);
block[d_base] = co_flag & mask_block[d_base];
if (block[d_base])
d |= dmask; co_flag = 0;
}
}
}
*digest = d;
return co_flag;
}
inline
bool bit_block_shift_r1_and_unr(bm::word_t* BMRESTRICT block,
bm::word_t co_flag,
const bm::word_t* BMRESTRICT mask_block,
bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(mask_block);
BM_ASSERT(digest);
#if defined(VECT_SHIFT_R1_AND)
return VECT_SHIFT_R1_AND(block, co_flag, mask_block, digest);
#else
return bm::bit_block_shift_r1_and(block, co_flag, mask_block, digest);
#endif
}
inline
bm::id_t bit_block_any_range(const bm::word_t* const BMRESTRICT block,
bm::word_t left,
bm::word_t right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
unsigned nbit = left; unsigned nword = unsigned(nbit >> bm::set_word_shift);
nbit &= bm::set_word_mask;
const bm::word_t* word = block + nword;
if (left == right) {
return (*word >> nbit) & 1;
}
unsigned acc;
unsigned bitcount = right - left + 1;
if (nbit) {
unsigned right_margin = nbit + (right - left);
if (right_margin < 32)
{
unsigned mask_r = bm::mask_r_u32(nbit);
unsigned mask_l = bm::mask_l_u32(right_margin);
unsigned mask = mask_r & mask_l;
return *word & mask;
}
else
{
unsigned mask_r = bm::mask_r_u32(nbit);
acc = *word & mask_r;
if (acc)
return acc;
bitcount -= 32 - nbit;
}
++word;
}
for ( ;bitcount >= 128; bitcount-=128, word+=4)
{
acc = word[0] | word[1] | word[2] | word[3];
if (acc)
return acc;
}
acc = 0;
for ( ;bitcount >= 32; bitcount -= 32)
{
acc |= *word++;
}
if (bitcount) {
unsigned mask_l = bm::mask_l_u32(bitcount-1);
acc |= (*word) & mask_l;
}
return acc;
}
template<typename T>
void bit_invert(T* start) BMNOEXCEPT
{
BM_ASSERT(IS_VALID_ADDR((bm::word_t*)start));
#ifdef BMVECTOPT
VECT_INVERT_BLOCK(start);
#else
T* end = (T*)((unsigned*)(start) + bm::set_block_size);
do
{
start[0] = ~start[0];
start[1] = ~start[1];
start[2] = ~start[2];
start[3] = ~start[3];
start+=4;
} while (start < end);
#endif
}
inline
bool is_bits_one(const bm::wordop_t* start) BMNOEXCEPT
{
#if defined(BMSSE42OPT) || defined(BMAVX2OPT)
return VECT_IS_ONE_BLOCK(start);
#else
const bm::word_t* BMRESTRICT src_end = (bm::word_t*)start + bm::set_block_size;
const bm::wordop_t* end = (const bm::wordop_t*)src_end;
do
{
bm::wordop_t tmp =
start[0] & start[1] & start[2] & start[3];
if (tmp != bm::all_bits_mask)
return false;
start += 4;
} while (start < end);
return true;
#endif
}
inline
bool block_is_all_one_range(const bm::word_t* const BMRESTRICT block,
unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right < bm::gap_max_bits);
if (block)
{
if (BM_IS_GAP(block))
return bm::gap_is_all_one_range(BMGAP_PTR(block), left, right);
if (block == FULL_BLOCK_FAKE_ADDR)
return true;
return bm::bit_block_is_all_one_range(block, left, right);
}
return false;
}
inline
bool block_is_interval(const bm::word_t* const BMRESTRICT block,
unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right < bm::gap_max_bits-1);
if (block)
{
bool is_left, is_right, all_one;
if (BM_IS_GAP(block))
{
const bm::gap_word_t* gap = BMGAP_PTR(block);
all_one = bm::gap_is_interval(gap, left, right);
return all_one;
}
else {
if (block == FULL_BLOCK_FAKE_ADDR)
return false;
unsigned nword = ((left-1) >> bm::set_word_shift);
is_left = block[nword] & (1u << ((left-1) & bm::set_word_mask));
if (is_left == false)
{
nword = ((right + 1) >> bm::set_word_shift);
is_right = block[nword] & (1u << ((right + 1) & bm::set_word_mask));
if (is_right == false)
{
all_one = bm::bit_block_is_all_one_range(block, left, right);
return all_one;
}
}
}
}
return false;
}
inline
bool bit_block_find_interval_end(const bm::word_t* BMRESTRICT block,
unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(pos);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
unsigned bit_pos = (nbit & bm::set_word_mask);
bm::word_t w = block[nword];
w &= (1u << bit_pos);
if (!w)
return false;
if (nbit == bm::gap_max_bits-1)
{
*pos = bm::gap_max_bits-1;
return true;
}
*pos = nbit;
++nbit;
nword = unsigned(nbit >> bm::set_word_shift);
bit_pos = (nbit & bm::set_word_mask);
w = (~block[nword]) >> bit_pos;
w <<= bit_pos; if (w)
{
bit_pos = bm::bit_scan_forward32(w); *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
return true;
}
for (++nword; nword < bm::set_block_size; ++nword)
{
w = ~block[nword];
if (w)
{
bit_pos = bm::bit_scan_forward32(w); *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
return true;
}
}
*pos = bm::gap_max_bits-1;
return true;
}
inline
unsigned block_find_interval_end(const bm::word_t* BMRESTRICT block,
unsigned nbit_from,
unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
{
BM_ASSERT(block && found_nbit);
BM_ASSERT(nbit_from < bm::gap_max_bits);
bool b;
if (BM_IS_GAP(block))
{
const bm::gap_word_t* gap = BMGAP_PTR(block);
b = bm::gap_find_interval_end(gap, nbit_from, found_nbit);
if (b && *found_nbit == bm::gap_max_bits-1)
return 2; }
else {
if (IS_FULL_BLOCK(block))
{
*found_nbit = bm::gap_max_bits-1;
return 2;
}
b = bm::bit_block_find_interval_end(block, nbit_from, found_nbit);
if (b && *found_nbit == bm::gap_max_bits-1)
return 2; }
return b;
}
inline
bool bit_block_find_interval_start(const bm::word_t* BMRESTRICT block,
unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(pos);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
unsigned bit_pos = (nbit & bm::set_word_mask);
bm::word_t w = block[nword];
w &= (1u << bit_pos);
if (!w)
return false;
if (nbit == 0)
{
*pos = 0;
return true;
}
*pos = nbit;
--nbit;
nword = unsigned(nbit >> bm::set_word_shift);
bit_pos = (nbit & bm::set_word_mask);
unsigned mask_l = bm::mask_l_u32(bit_pos);
w = (~block[nword]) & mask_l;
if (w)
{
bit_pos = bm::bit_scan_reverse32(w);
*pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
return true;
}
if (nword)
{
for (--nword; true; --nword)
{
w = ~block[nword];
if (w)
{
bit_pos = bm::bit_scan_reverse32(w); *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
return true;
}
if (!nword)
break;
} }
*pos = 0;
return true;
}
inline
bool bit_block_find_prev(const bm::word_t* BMRESTRICT block,
unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(pos);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
unsigned bit_pos = (nbit & bm::set_word_mask);
bm::word_t w = block[nword];
w &= (1u << bit_pos);
if (w)
{
*pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
return true;
}
if (!nbit)
return false;
--nbit;
nword = unsigned(nbit >> bm::set_word_shift);
bit_pos = (nbit & bm::set_word_mask);
unsigned mask_l = bm::mask_l_u32(bit_pos);
w = block[nword] & mask_l;
if (w)
{
bit_pos = bm::bit_scan_reverse32(w);
*pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
return true;
}
if (nword)
{
for (--nword; true; --nword)
{
w = block[nword];
if (w)
{
bit_pos = bm::bit_scan_reverse32(w); *pos =
unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
return true;
}
if (!nword)
break;
} }
return false;
}
inline
unsigned block_find_interval_start(const bm::word_t* BMRESTRICT block,
unsigned nbit_from,
unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
{
BM_ASSERT(block && found_nbit);
BM_ASSERT(nbit_from < bm::gap_max_bits);
bool b;
if (BM_IS_GAP(block))
{
const bm::gap_word_t* gap = BMGAP_PTR(block);
b = bm::gap_find_interval_start(gap, nbit_from, found_nbit);
if (b && *found_nbit == 0)
return 2; }
else {
if (IS_FULL_BLOCK(block))
{
*found_nbit = 0;
return 2;
}
b = bm::bit_block_find_interval_start(block, nbit_from, found_nbit);
if (b && *found_nbit == 0)
return 2; }
return b;
}
inline
bool block_find_reverse(const bm::word_t* BMRESTRICT block,
unsigned nbit_from,
unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
{
BM_ASSERT(block && found_nbit);
BM_ASSERT(nbit_from < bm::gap_max_bits);
bool b;
if (BM_IS_GAP(block))
{
b = bm::gap_find_prev(BMGAP_PTR(block), nbit_from, found_nbit);
}
else {
if (IS_FULL_BLOCK(block))
{
*found_nbit = nbit_from;
return true;
}
b = bm::bit_block_find_prev(block, nbit_from, found_nbit);
}
return b;
}
inline
bool block_any_range(const bm::word_t* const BMRESTRICT block,
unsigned left, unsigned right) BMNOEXCEPT
{
BM_ASSERT(left <= right);
BM_ASSERT(right < bm::gap_max_bits);
if (!block)
return false;
if (BM_IS_GAP(block))
return bm::gap_any_range(BMGAP_PTR(block), left, right);
if (IS_FULL_BLOCK(block))
return true;
return bm::bit_block_any_range(block, left, right);
}
inline
bool block_any(const bm::word_t* const BMRESTRICT block) BMNOEXCEPT
{
if (!block)
return false;
if (IS_FULL_BLOCK(block))
return true;
bool all_zero = (BM_IS_GAP(block)) ?
bm::gap_is_all_zero(BMGAP_PTR(block))
: bm::bit_is_all_zero(block);
return !all_zero;
}
inline
gap_word_t* gap_operation_and(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
gap_word_t* BMRESTRICT tmp_buf,
unsigned& dsize) BMNOEXCEPT
{
bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
tmp_buf, vect1, 0, vect2, 0, dsize);
return tmp_buf;
}
inline
unsigned gap_operation_any_and(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
return gap_buff_any_op<bm::gap_word_t, bm::and_func>(vect1, 0, vect2, 0);
}
inline
unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
return bm::gap_buff_count_op<bm::gap_word_t, bm::and_func>(vect1, vect2);
}
inline
gap_word_t* gap_operation_xor(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
gap_word_t* BMRESTRICT tmp_buf,
unsigned& dsize) BMNOEXCEPT
{
bm::gap_buff_op<bm::gap_word_t, bm::xor_func>(
tmp_buf, vect1, 0, vect2, 0, dsize);
return tmp_buf;
}
BMFORCEINLINE
unsigned gap_operation_any_xor(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
return gap_buff_any_op<bm::gap_word_t, bm::xor_func>(vect1, 0, vect2, 0);
}
BMFORCEINLINE
unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
return bm::gap_buff_count_op<bm::gap_word_t, bm::xor_func>(vect1, vect2);
}
inline
gap_word_t* gap_operation_or(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
gap_word_t* BMRESTRICT tmp_buf,
unsigned& dsize) BMNOEXCEPT
{
bm::gap_buff_op<bm::gap_word_t, bm::and_func>(tmp_buf, vect1, 1, vect2, 1, dsize);
bm::gap_invert(tmp_buf);
return tmp_buf;
}
BMFORCEINLINE
unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
return gap_buff_count_op<bm::gap_word_t, bm::or_func>(vect1, vect2);
}
inline
gap_word_t* gap_operation_sub(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2,
gap_word_t* BMRESTRICT tmp_buf,
unsigned& dsize) BMNOEXCEPT
{
bm::gap_buff_op<bm::gap_word_t, bm::and_func>( tmp_buf, vect1, 0, vect2, 1, dsize);
return tmp_buf;
}
inline
unsigned gap_operation_any_sub(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
return
bm::gap_buff_any_op<bm::gap_word_t, bm::and_func>( vect1, 0, vect2, 1);
}
BMFORCEINLINE
unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
{
return bm::gap_buff_count_op<bm::gap_word_t, bm::sub_func>(vect1, vect2);
}
inline
void bit_block_copy(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef BMVECTOPT
VECT_COPY_BLOCK(dst, src);
#else
::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
#endif
}
inline
void bit_block_copy_unalign(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef VECT_COPY_BLOCK_UNALIGN
VECT_COPY_BLOCK_UNALIGN(dst, src);
#else
::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
#endif
}
inline
void bit_block_stream(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef VECT_STREAM_BLOCK
VECT_STREAM_BLOCK(dst, src);
#else
::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
#endif
}
inline
void bit_block_stream_unalign(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef VECT_STREAM_BLOCK_UNALIGN
VECT_STREAM_BLOCK_UNALIGN(dst, src);
#else
::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
#endif
}
inline
bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
BM_ASSERT(dst != src);
#ifdef BMVECTOPT
bm::id64_t acc = VECT_AND_BLOCK(dst, src);
#else
unsigned arr_sz = bm::set_block_size / 2;
const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)src;
bm::bit_block_t::bunion_t* BMRESTRICT dst_u = (bm::bit_block_t::bunion_t*)dst;
bm::id64_t acc = 0;
for (unsigned i = 0; i < arr_sz; i+=4)
{
acc |= dst_u->w64[i] &= src_u->w64[i];
acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
}
#endif
return acc;
}
inline
bm::id64_t bit_block_and(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
BM_ASSERT(dst != src);
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#if defined(VECT_AND_DIGEST)
bool all_zero = VECT_AND_DIGEST(&dst[off], &src[off]);
if (all_zero)
digest &= ~(mask << wave);
#else
const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
bm::bit_block_t::bunion_t* BMRESTRICT dst_u = (bm::bit_block_t::bunion_t*)(&dst[off]);
bm::id64_t acc = 0;
unsigned j = 0;
do
{
acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
j+=4;
} while (j < bm::set_block_digest_wave_size/2);
if (!acc) digest &= ~(mask << wave);
#endif
d = bm::bmi_bslr_u64(d); }
return digest;
}
inline
bm::id64_t bit_block_and_5way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src0,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
const bm::word_t* BMRESTRICT src3,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src0 && src1 && src2 && src3);
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#if defined(VECT_AND_DIGEST_5WAY)
bool all_zero = VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
if (all_zero)
digest &= ~(mask << wave);
#else
const bm::bit_block_t::bunion_t* BMRESTRICT src_u0 = (const bm::bit_block_t::bunion_t*)(&src0[off]);
const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 = (const bm::bit_block_t::bunion_t*)(&src1[off]);
const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 = (const bm::bit_block_t::bunion_t*)(&src2[off]);
const bm::bit_block_t::bunion_t* BMRESTRICT src_u3 = (const bm::bit_block_t::bunion_t*)(&src3[off]);
bm::bit_block_t::bunion_t* BMRESTRICT dst_u = (bm::bit_block_t::bunion_t*)(&dst[off]);
bm::id64_t acc = 0;
unsigned j = 0;
do
{
acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
j += 4;
} while (j < bm::set_block_digest_wave_size / 2);
if (!acc) digest &= ~(mask << wave);
#endif
d = bm::bmi_bslr_u64(d); }
return digest;
}
inline
bm::id64_t bit_block_and_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src1 && src2);
BM_ASSERT(dst != src1 && dst != src2);
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#if defined(VECT_AND_DIGEST_2WAY)
bool all_zero = VECT_AND_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
if (all_zero)
digest &= ~(mask << wave);
#else
const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
(const bm::bit_block_t::bunion_t*)(&src1[off]);
const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
(const bm::bit_block_t::bunion_t*)(&src2[off]);
bm::bit_block_t::bunion_t* BMRESTRICT dst_u =
(bm::bit_block_t::bunion_t*)(&dst[off]);
bm::id64_t acc = 0;
unsigned j = 0;
do
{
acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
j+=4;
} while (j < bm::set_block_digest_wave_size/2);
if (!acc) digest &= ~(mask << wave);
#endif
d = bm::bmi_bslr_u64(d); }
return digest;
}
inline
bm::id64_t bit_block_and_or_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src1 && src2);
BM_ASSERT(dst != src1 && dst != src2);
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#if defined(VECT_AND_OR_DIGEST_2WAY)
bool all_zero =
VECT_AND_OR_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
if (all_zero)
digest &= ~(mask << wave);
#else
const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
(const bm::bit_block_t::bunion_t*)(&src1[off]);
const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
(const bm::bit_block_t::bunion_t*)(&src2[off]);
bm::bit_block_t::bunion_t* BMRESTRICT dst_u =
(bm::bit_block_t::bunion_t*)(&dst[off]);
bm::id64_t acc = 0;
unsigned j = 0;
do
{
acc |= dst_u->w64[j+0] |= src_u1->w64[j+0] & src_u2->w64[j+0];
acc |= dst_u->w64[j+1] |= src_u1->w64[j+1] & src_u2->w64[j+1];
acc |= dst_u->w64[j+2] |= src_u1->w64[j+2] & src_u2->w64[j+2];
acc |= dst_u->w64[j+3] |= src_u1->w64[j+3] & src_u2->w64[j+3];
j+=4;
} while (j < bm::set_block_digest_wave_size/2);
if (!acc) digest &= ~(mask << wave);
#endif
d = bm::bmi_bslr_u64(d); }
return digest;
}
inline
unsigned bit_block_and_count(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* src1_end = src1 + bm::set_block_size;
#ifdef BMVECTOPT
count = VECT_BITCOUNT_AND(src1, src1_end, src2);
#else
count = 0;
# ifdef BM64OPT
const bm::id64_t* b1 = (bm::id64_t*) src1;
const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
const bm::id64_t* b2 = (bm::id64_t*) src2;
do
{
count += bitcount64_4way(b1[0] & b2[0],
b1[1] & b2[1],
b1[2] & b2[2],
b1[3] & b2[3]);
b1 += 4;
b2 += 4;
} while (b1 < b1_end);
# else
do
{
BM_INCWORD_BITCOUNT(count, src1[0] & src2[0]);
BM_INCWORD_BITCOUNT(count, src1[1] & src2[1]);
BM_INCWORD_BITCOUNT(count, src1[2] & src2[2]);
BM_INCWORD_BITCOUNT(count, src1[3] & src2[3]);
src1+=4;
src2+=4;
} while (src1 < src1_end);
# endif
#endif
return count;
}
inline
unsigned bit_block_and_any(const bm::word_t* src1,
const bm::word_t* src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* src1_end = src1 + bm::set_block_size;
do
{
count = (src1[0] & src2[0]) |
(src1[1] & src2[1]) |
(src1[2] & src2[2]) |
(src1[3] & src2[3]);
src1+=4; src2+=4;
} while ((src1 < src1_end) && !count);
return count;
}
inline
unsigned bit_block_xor_count(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
#ifdef BMVECTOPT
count = VECT_BITCOUNT_XOR(src1, src1_end, src2);
#else
count = 0;
# ifdef BM64OPT
const bm::id64_t* b1 = (bm::id64_t*) src1;
const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
const bm::id64_t* b2 = (bm::id64_t*) src2;
do
{
count += bitcount64_4way(b1[0] ^ b2[0],
b1[1] ^ b2[1],
b1[2] ^ b2[2],
b1[3] ^ b2[3]);
b1 += 4;
b2 += 4;
} while (b1 < b1_end);
# else
do
{
BM_INCWORD_BITCOUNT(count, src1[0] ^ src2[0]);
BM_INCWORD_BITCOUNT(count, src1[1] ^ src2[1]);
BM_INCWORD_BITCOUNT(count, src1[2] ^ src2[2]);
BM_INCWORD_BITCOUNT(count, src1[3] ^ src2[3]);
src1+=4;
src2+=4;
} while (src1 < src1_end);
# endif
#endif
return count;
}
inline
unsigned bit_block_xor_any(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
do
{
count = (src1[0] ^ src2[0]) |
(src1[1] ^ src2[1]) |
(src1[2] ^ src2[2]) |
(src1[3] ^ src2[3]);
src1+=4; src2+=4;
} while (!count && (src1 < src1_end));
return count;
}
inline
unsigned bit_block_sub_count(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
#ifdef BMVECTOPT
count = VECT_BITCOUNT_SUB(src1, src1_end, src2);
#else
count = 0;
# ifdef BM64OPT
const bm::id64_t* b1 = (bm::id64_t*) src1;
const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
const bm::id64_t* b2 = (bm::id64_t*) src2;
do
{
count += bitcount64_4way(b1[0] & ~b2[0],
b1[1] & ~b2[1],
b1[2] & ~b2[2],
b1[3] & ~b2[3]);
b1 += 4;
b2 += 4;
} while (b1 < b1_end);
# else
do
{
BM_INCWORD_BITCOUNT(count, src1[0] & ~src2[0]);
BM_INCWORD_BITCOUNT(count, src1[1] & ~src2[1]);
BM_INCWORD_BITCOUNT(count, src1[2] & ~src2[2]);
BM_INCWORD_BITCOUNT(count, src1[3] & ~src2[3]);
src1+=4;
src2+=4;
} while (src1 < src1_end);
# endif
#endif
return count;
}
inline
unsigned bit_block_sub_any(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
do
{
count = (src1[0] & ~src2[0]) |
(src1[1] & ~src2[1]) |
(src1[2] & ~src2[2]) |
(src1[3] & ~src2[3]);
src1+=4; src2+=4;
} while ((src1 < src1_end) && (count == 0));
return count;
}
inline
unsigned bit_block_or_count(const bm::word_t* src1,
const bm::word_t* src2) BMNOEXCEPT
{
unsigned count;
const bm::word_t* src1_end = src1 + bm::set_block_size;
#ifdef BMVECTOPT
count = VECT_BITCOUNT_OR(src1, src1_end, src2);
#else
count = 0;
# ifdef BM64OPT
const bm::id64_t* b1 = (bm::id64_t*) src1;
const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
const bm::id64_t* b2 = (bm::id64_t*) src2;
do
{
count += bitcount64_4way(b1[0] | b2[0],
b1[1] | b2[1],
b1[2] | b2[2],
b1[3] | b2[3]);
b1 += 4;
b2 += 4;
} while (b1 < b1_end);
# else
do
{
BM_INCWORD_BITCOUNT(count, src1[0] | src2[0]);
BM_INCWORD_BITCOUNT(count, src1[1] | src2[1]);
BM_INCWORD_BITCOUNT(count, src1[2] | src2[2]);
BM_INCWORD_BITCOUNT(count, src1[3] | src2[3]);
src1+=4;
src2+=4;
} while (src1 < src1_end);
# endif
#endif
return count;
}
inline
unsigned bit_block_or_any(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
unsigned count = 0;
const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
do
{
count = (src1[0] | src2[0]) |
(src1[1] | src2[1]) |
(src1[2] | src2[2]) |
(src1[3] | src2[3]);
src1+=4; src2+=4;
} while (!count && (src1 < src1_end));
return count;
}
inline bm::word_t* bit_operation_and(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
bm::word_t* ret = dst;
if (IS_VALID_ADDR(dst)) {
if (!IS_VALID_ADDR(src))
{
if (IS_EMPTY_BLOCK(src))
{
return 0;
}
}
else
{
auto any = bm::bit_block_and(dst, src);
if (!any)
ret = 0;
}
}
else {
if(!IS_VALID_ADDR(src))
{
if(IS_EMPTY_BLOCK(src))
{
return 0;
}
}
else {
if (IS_FULL_BLOCK(dst))
return const_cast<bm::word_t*>(src);
}
}
return ret;
}
inline
bm::id_t bit_operation_and_count(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
return 0;
if (src1 == FULL_BLOCK_FAKE_ADDR)
src1 = FULL_BLOCK_REAL_ADDR;
if (src2 == FULL_BLOCK_FAKE_ADDR)
src2 = FULL_BLOCK_REAL_ADDR;
return bit_block_and_count(src1, src2);
}
inline
bm::id_t bit_operation_and_any(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
return 0;
if (src1 == FULL_BLOCK_FAKE_ADDR)
src1 = FULL_BLOCK_REAL_ADDR;
if (src2 == FULL_BLOCK_FAKE_ADDR)
src2 = FULL_BLOCK_REAL_ADDR;
return bit_block_and_any(src1, src2);
}
inline
bm::id_t bit_operation_sub_count(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (src1 == src2)
return 0;
if (IS_EMPTY_BLOCK(src1))
return 0;
if (IS_EMPTY_BLOCK(src2)) {
if (IS_FULL_BLOCK(src1))
return bm::gap_max_bits;
return bm::bit_block_count(src1);
}
if (IS_FULL_BLOCK(src2))
return 0;
if (src1 == FULL_BLOCK_FAKE_ADDR)
src1 = FULL_BLOCK_REAL_ADDR;
if (src2 == FULL_BLOCK_FAKE_ADDR)
src2 = FULL_BLOCK_REAL_ADDR;
return bm::bit_block_sub_count(src1, src2);
}
inline
bm::id_t bit_operation_sub_count_inv(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
return bit_operation_sub_count(src2, src1);
}
inline
bm::id_t bit_operation_sub_any(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1))
return 0;
if (src1 == src2)
return 0;
if (IS_EMPTY_BLOCK(src2)) return !bit_is_all_zero(src1);
if (IS_FULL_BLOCK(src2))
return 0;
if (src1 == FULL_BLOCK_FAKE_ADDR)
src1 = FULL_BLOCK_REAL_ADDR;
if (src2 == FULL_BLOCK_FAKE_ADDR)
src2 = FULL_BLOCK_REAL_ADDR;
return bm::bit_block_sub_any(src1, src2);
}
inline
bm::id_t bit_operation_or_count(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_FULL_BLOCK(src1) || IS_FULL_BLOCK(src2))
return bm::gap_max_bits;
if (IS_EMPTY_BLOCK(src1))
{
if (!IS_EMPTY_BLOCK(src2))
return bm::bit_block_count(src2);
else
return 0; }
else
{
if (IS_EMPTY_BLOCK(src2))
return bm::bit_block_count(src1);
}
if (src1 == FULL_BLOCK_FAKE_ADDR)
src1 = FULL_BLOCK_REAL_ADDR;
if (src2 == FULL_BLOCK_FAKE_ADDR)
src2 = FULL_BLOCK_REAL_ADDR;
return bm::bit_block_or_count(src1, src2);
}
inline
bm::id_t bit_operation_or_any(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (IS_EMPTY_BLOCK(src1))
{
if (!IS_EMPTY_BLOCK(src2))
return !bit_is_all_zero(src2);
else
return 0; }
else
{
if (IS_EMPTY_BLOCK(src2))
return !bit_is_all_zero(src1);
}
if (src1 == FULL_BLOCK_FAKE_ADDR)
src1 = FULL_BLOCK_REAL_ADDR;
if (src2 == FULL_BLOCK_FAKE_ADDR)
src2 = FULL_BLOCK_REAL_ADDR;
return bit_block_or_any(src1, src2);
}
inline
bool bit_block_or(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK(dst, src);
#else
const bm::wordop_t* BMRESTRICT wrd_ptr = (wordop_t*)src;
const bm::wordop_t* BMRESTRICT wrd_end = (wordop_t*)(src + bm::set_block_size);
bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
bm::wordop_t acc = 0;
const bm::wordop_t not_acc = acc = ~acc;
do
{
acc &= (dst_ptr[0] |= wrd_ptr[0]);
acc &= (dst_ptr[1] |= wrd_ptr[1]);
acc &= (dst_ptr[2] |= wrd_ptr[2]);
acc &= (dst_ptr[3] |= wrd_ptr[3]);
dst_ptr+=4;wrd_ptr+=4;
} while (wrd_ptr < wrd_end);
return acc == not_acc;
#endif
}
inline
bool bit_block_or_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK_2WAY(dst, src1, src2);
#else
const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
bm::wordop_t acc = 0;
const bm::wordop_t not_acc = acc = ~acc;
do
{
acc &= (dst_ptr[0] = wrd_ptr1[0] | wrd_ptr2[0]);
acc &= (dst_ptr[1] = wrd_ptr1[1] | wrd_ptr2[1]);
acc &= (dst_ptr[2] = wrd_ptr1[2] | wrd_ptr2[2]);
acc &= (dst_ptr[3] = wrd_ptr1[3] | wrd_ptr2[3]);
dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
} while (wrd_ptr1 < wrd_end1);
return acc == not_acc;
#endif
}
inline
bm::id64_t bit_block_xor_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_XOR_BLOCK_2WAY(dst, src1, src2);
#else
const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
bm::wordop_t acc = 0;
do
{
acc |= (dst_ptr[0] = wrd_ptr1[0] ^ wrd_ptr2[0]);
acc |= (dst_ptr[1] = wrd_ptr1[1] ^ wrd_ptr2[1]);
acc |= (dst_ptr[2] = wrd_ptr1[2] ^ wrd_ptr2[2]);
acc |= (dst_ptr[3] = wrd_ptr1[3] ^ wrd_ptr2[3]);
dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
} while (wrd_ptr1 < wrd_end1);
return acc;
#endif
}
inline
bool bit_block_or_3way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK_3WAY(dst, src1, src2);
#else
const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
bm::wordop_t acc = 0;
const bm::wordop_t not_acc = acc = ~acc;
do
{
acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0]);
acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1]);
acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2]);
acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3]);
dst_ptr+=4; wrd_ptr1+=4;wrd_ptr2+=4;
} while (wrd_ptr1 < wrd_end1);
return acc == not_acc;
#endif
}
inline
bool bit_block_or_5way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
const bm::word_t* BMRESTRICT src3,
const bm::word_t* BMRESTRICT src4) BMNOEXCEPT
{
#ifdef BMVECTOPT
return VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4);
#else
const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
const bm::wordop_t* BMRESTRICT wrd_ptr3 = (wordop_t*)src3;
const bm::wordop_t* BMRESTRICT wrd_ptr4 = (wordop_t*)src4;
bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
bm::wordop_t acc = 0;
const bm::wordop_t not_acc = acc = ~acc;
do
{
acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0] | wrd_ptr3[0] | wrd_ptr4[0]);
acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1] | wrd_ptr3[1] | wrd_ptr4[1]);
acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2] | wrd_ptr3[2] | wrd_ptr4[2]);
acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3] | wrd_ptr3[3] | wrd_ptr4[3]);
dst_ptr+=4;
wrd_ptr1+=4;wrd_ptr2+=4;wrd_ptr3+=4;wrd_ptr4+=4;
} while (wrd_ptr1 < wrd_end1);
return acc == not_acc;
#endif
}
inline
bm::word_t* bit_operation_or(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
bm::word_t* ret = dst;
if (IS_VALID_ADDR(dst)) {
if (!IS_VALID_ADDR(src))
{
if (IS_FULL_BLOCK(src))
{
::memset(dst, 0xFF, bm::set_block_size * sizeof(bm::word_t));
}
}
else
{
bm::bit_block_or(dst, src);
}
}
else {
if (!IS_VALID_ADDR(src))
{
if (IS_FULL_BLOCK(src))
{
return const_cast<bm::word_t*>(FULL_BLOCK_FAKE_ADDR);
}
}
else
{
if (dst == 0)
{
return const_cast<bm::word_t*>(src);
}
}
}
return ret;
}
inline
bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
#ifdef BMVECTOPT
bm::id64_t acc = VECT_SUB_BLOCK(dst, src);
return acc;
#else
unsigned arr_sz = bm::set_block_size / 2;
const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)src;
bm::bit_block_t::bunion_t* BMRESTRICT dst_u = (bm::bit_block_t::bunion_t*)dst;
bm::id64_t acc = 0;
for (unsigned i = 0; i < arr_sz; i+=4)
{
acc |= dst_u->w64[i] &= ~src_u->w64[i];
acc |= dst_u->w64[i+1] &= ~src_u->w64[i+1];
acc |= dst_u->w64[i+2] &= ~src_u->w64[i+2];
acc |= dst_u->w64[i+3] &= ~src_u->w64[i+3];
}
return acc;
#endif
}
inline
bm::id64_t bit_block_sub(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
BM_ASSERT(dst != src);
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#if defined(VECT_SUB_DIGEST)
bool all_zero = VECT_SUB_DIGEST(&dst[off], &src[off]);
if (all_zero)
digest &= ~(mask << wave);
#else
const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
bm::bit_block_t::bunion_t* BMRESTRICT dst_u = (bm::bit_block_t::bunion_t*)(&dst[off]);
bm::id64_t acc = 0;
unsigned j = 0;
do
{
acc |= dst_u->w64[j+0] &= ~src_u->w64[j+0];
acc |= dst_u->w64[j+1] &= ~src_u->w64[j+1];
acc |= dst_u->w64[j+2] &= ~src_u->w64[j+2];
acc |= dst_u->w64[j+3] &= ~src_u->w64[j+3];
j+=4;
} while (j < bm::set_block_digest_wave_size/2);
if (!acc) digest &= ~(mask << wave);
#endif
d = bm::bmi_bslr_u64(d); }
return digest;
}
inline
bm::id64_t bit_block_sub_2way(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src1 && src2);
BM_ASSERT(dst != src1 && dst != src2);
const bm::id64_t mask(1ull);
bm::id64_t d = digest;
while (d)
{
bm::id64_t t = bm::bmi_blsi_u64(d);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
#if defined(VECT_SUB_DIGEST_2WAY)
bool all_zero = VECT_SUB_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
if (all_zero)
digest &= ~(mask << wave);
#else
const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
(const bm::bit_block_t::bunion_t*)(&src1[off]);
const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
(const bm::bit_block_t::bunion_t*)(&src2[off]);
bm::bit_block_t::bunion_t* BMRESTRICT dst_u =
(bm::bit_block_t::bunion_t*)(&dst[off]);
bm::id64_t acc = 0;
unsigned j = 0;
do
{
acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & ~src_u2->w64[j+0];
acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & ~src_u2->w64[j+1];
acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & ~src_u2->w64[j+2];
acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & ~src_u2->w64[j+3];
j+=4;
} while (j < bm::set_block_digest_wave_size/2);
if (!acc) digest &= ~(mask << wave);
#endif
d = bm::bmi_bslr_u64(d); }
return digest;
}
inline
bm::word_t* bit_operation_sub(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
bm::word_t* ret = dst;
if (IS_VALID_ADDR(dst)) {
if (!IS_VALID_ADDR(src))
{
if (IS_FULL_BLOCK(src))
{
return 0;
}
}
else
{
auto any = bm::bit_block_sub(dst, src);
if (!any)
ret = 0;
}
}
else {
if (!IS_VALID_ADDR(src))
{
if (IS_FULL_BLOCK(src))
{
return 0;
}
}
else
{
if (IS_FULL_BLOCK(dst))
{
return const_cast<bm::word_t*>(src);
}
}
}
return ret;
}
inline
bm::id64_t bit_block_xor(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst);
BM_ASSERT(src);
BM_ASSERT(dst != src);
#ifdef BMVECTOPT
bm::id64_t acc = VECT_XOR_BLOCK(dst, src);
#else
unsigned arr_sz = bm::set_block_size / 2;
const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)src;
bm::bit_block_t::bunion_t* BMRESTRICT dst_u = (bm::bit_block_t::bunion_t*)dst;
bm::id64_t acc = 0;
for (unsigned i = 0; i < arr_sz; i+=4)
{
acc |= dst_u->w64[i] ^= src_u->w64[i];
acc |= dst_u->w64[i+1] ^= src_u->w64[i+1];
acc |= dst_u->w64[i+2] ^= src_u->w64[i+2];
acc |= dst_u->w64[i+3] ^= src_u->w64[i+3];
}
#endif
return acc;
}
inline
void bit_andnot_arr_ffmask(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
const bm::word_t* BMRESTRICT src_end = src + bm::set_block_size;
#ifdef BMVECTOPT
VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, ~0u);
#else
bm::wordop_t* dst_ptr = (wordop_t*)dst;
const bm::wordop_t* wrd_ptr = (wordop_t*) src;
const bm::wordop_t* wrd_end = (wordop_t*) src_end;
do
{
dst_ptr[0] = bm::all_bits_mask & ~wrd_ptr[0];
dst_ptr[1] = bm::all_bits_mask & ~wrd_ptr[1];
dst_ptr[2] = bm::all_bits_mask & ~wrd_ptr[2];
dst_ptr[3] = bm::all_bits_mask & ~wrd_ptr[3];
dst_ptr+=4; wrd_ptr+=4;
} while (wrd_ptr < wrd_end);
#endif
}
inline
bm::word_t* bit_operation_xor(bm::word_t* BMRESTRICT dst,
const bm::word_t* BMRESTRICT src) BMNOEXCEPT
{
BM_ASSERT(dst || src);
if (src == dst) return 0;
bm::word_t* ret = dst;
if (IS_VALID_ADDR(dst)) {
if (!src) return dst;
bit_block_xor(dst, src);
}
else {
if (!src) return dst;
return const_cast<bm::word_t*>(src); }
return ret;
}
inline
bm::id_t bit_operation_xor_count(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (src1 == src2)
return 0;
if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
{
const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1;
if (IS_FULL_BLOCK(block))
return bm::gap_max_bits;
return bm::bit_block_count(block);
}
if (src1 == FULL_BLOCK_FAKE_ADDR)
src1 = FULL_BLOCK_REAL_ADDR;
if (src2 == FULL_BLOCK_FAKE_ADDR)
src2 = FULL_BLOCK_REAL_ADDR;
return bit_block_xor_count(src1, src2);
}
inline
bm::id_t bit_operation_xor_any(const bm::word_t* BMRESTRICT src1,
const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
{
if (src1 == src2)
return 0;
if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
{
const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1;
return !bit_is_all_zero(block);
}
return bm::bit_block_xor_any(src1, src2);
}
template<class T>
unsigned bit_count_nonzero_size(const T* blk, unsigned data_size) BMNOEXCEPT
{
BM_ASSERT(blk && data_size);
unsigned count = 0;
const T* blk_end = blk + data_size - 2;
do
{
if (*blk == 0) {
const T* blk_j = blk + 1;
for (; blk_j < blk_end; ++blk_j)
{
if (*blk_j != 0)
break;
} blk = blk_j-1;
count += (unsigned)sizeof(gap_word_t);
}
else
{
const T* blk_j = blk + 1; for ( ; blk_j < blk_end; ++blk_j)
{
if (*blk_j == 0)
{
if (blk_j[1] | blk_j[2]) {
++blk_j; continue;
}
break;
}
} count += unsigned(sizeof(gap_word_t));
count += unsigned(blk_j - blk) * unsigned(sizeof(T));
blk = blk_j;
}
++blk;
}
while(blk < blk_end);
return count + unsigned(2 * sizeof(T));
}
inline
unsigned bit_block_find(const bm::word_t* BMRESTRICT block,
unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(pos);
unsigned nword = unsigned(nbit >> bm::set_word_shift);
unsigned bit_pos = (nbit & bm::set_word_mask);
bm::word_t w = block[nword];
w &= (1u << bit_pos);
if (w)
{
*pos = nbit;
return 1;
}
w = block[nword] >> bit_pos;
w <<= bit_pos; if (w)
{
bit_pos = bm::bit_scan_forward32(w); *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
return 1;
}
for (unsigned i = nword+1; i < bm::set_block_size; ++i)
{
w = block[i];
if (w)
{
bit_pos = bm::bit_scan_forward32(w); *pos = unsigned(bit_pos + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
} return 0u;
}
inline
unsigned bit_find_last(const bm::word_t* BMRESTRICT block,
unsigned* BMRESTRICT last) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(last);
for (unsigned i = bm::set_block_size-1; true; --i)
{
bm::word_t w = block[i];
if (w)
{
unsigned idx = bm::bit_scan_reverse(w);
*last = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
if (i == 0)
break;
} return 0u;
}
inline
bool bit_find_first(const bm::word_t* BMRESTRICT block,
unsigned* BMRESTRICT pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(pos);
#ifdef VECT_BIT_FIND_FIRST
return VECT_BIT_FIND_FIRST(block, pos);
#else
for (unsigned i = 0; i < bm::set_block_size; ++i)
{
bm::word_t w = block[i];
if (w)
{
unsigned idx = bm::bit_scan_forward32(w); *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
} return false;
#endif
}
inline
unsigned bit_find_first(const bm::word_t* BMRESTRICT block,
unsigned* BMRESTRICT first,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(first);
BM_ASSERT(digest);
bm::id64_t t = bm::bmi_blsi_u64(digest);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
for (unsigned i = off; i < bm::set_block_size; ++i)
{
bm::word_t w = block[i];
if (w)
{
unsigned idx = bit_scan_forward32(w); *first = unsigned(idx + (i * 8u * unsigned(sizeof(bm::word_t))));
return w;
}
} return 0u;
}
inline
bool bit_find_first_if_1(const bm::word_t* BMRESTRICT block,
unsigned* BMRESTRICT first,
bm::id64_t digest) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(first);
unsigned bc = bm::word_bitcount64(digest);
if (bc != 1)
return false;
bool found = false;
bm::id64_t t = bm::bmi_blsi_u64(digest);
unsigned wave = bm::word_bitcount64(t - 1);
unsigned off = wave * bm::set_block_digest_wave_size;
unsigned i;
for (i = off; i < off + bm::set_block_digest_wave_size; ++i)
{
bm::word_t w = block[i];
if (w)
{
bc = bm::word_bitcount(w);
if (bc != 1)
return false;
unsigned idx = bit_scan_forward32(w); *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
found = true;
break;
}
}
for (++i; i < off + bm::set_block_digest_wave_size; ++i)
{
if (block[i])
return false;
}
return found;
}
template<typename SIZE_TYPE>
SIZE_TYPE bit_find_rank(const bm::word_t* const block,
SIZE_TYPE rank,
unsigned nbit_from,
unsigned& nbit_pos) BMNOEXCEPT
{
BM_ASSERT(block);
BM_ASSERT(rank);
unsigned nword = nbit_from >> bm::set_word_shift;
BM_ASSERT(nword < bm::set_block_size);
unsigned pos = nbit_from;
bm::id_t nbit = (nbit_from & bm::set_word_mask);
if (nbit)
{
bm::id_t w = block[nword];
w >>= nbit;
unsigned bc = bm::word_bitcount(w);
if (bc < rank) {
rank -= bc; pos += unsigned(32u - nbit);
++nword;
}
else {
unsigned idx = bm::word_select32(w, unsigned(rank));
nbit_pos = pos + idx;
return 0;
}
}
#if defined(BM64OPT) || defined(BM64_SSE4) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
{
for (; nword < bm::set_block_size-1; nword+=2)
{
bm::id64_t w =
(bm::id64_t(block[nword+1]) << 32) | bm::id64_t(block[nword]);
bm::id_t bc = bm::word_bitcount64(w);
if (bc >= rank) {
unsigned idx = bm::word_select64(w, unsigned(rank));
nbit_pos = pos + idx;
return 0;
}
rank -= bc;
pos += 64u;
}
}
#endif
for (; nword < bm::set_block_size; ++nword)
{
bm::id_t w = block[nword];
bm::id_t bc = bm::word_bitcount(w);
if (rank > bc)
{
rank -= bc; pos += 32u;
continue;
}
unsigned idx = bm::word_select32(w, unsigned(rank));
nbit_pos = pos + idx;
return 0;
} return rank;
}
template<typename SIZE_TYPE>
SIZE_TYPE block_find_rank(const bm::word_t* const block,
SIZE_TYPE rank,
unsigned nbit_from,
unsigned& nbit_pos) BMNOEXCEPT
{
if (BM_IS_GAP(block))
{
const bm::gap_word_t* const gap_block = BMGAP_PTR(block);
rank = bm::gap_find_rank(gap_block, rank, nbit_from, nbit_pos);
}
else
{
rank = bm::bit_find_rank(block, rank, nbit_from, nbit_pos);
}
return rank;
}
inline
bm::set_representation best_representation(unsigned bit_count,
unsigned total_possible_bitcount,
unsigned gap_count,
unsigned block_size) BMNOEXCEPT
{
unsigned arr_size = unsigned(sizeof(bm::gap_word_t) * bit_count + sizeof(bm::gap_word_t));
unsigned gap_size = unsigned(sizeof(bm::gap_word_t) * gap_count + sizeof(bm::gap_word_t));
unsigned inv_arr_size = unsigned(sizeof(bm::gap_word_t) * (total_possible_bitcount - bit_count) + sizeof(bm::gap_word_t));
if ((gap_size < block_size) && (gap_size < arr_size) && (gap_size < inv_arr_size))
{
return bm::set_gap;
}
if (arr_size < inv_arr_size)
{
if ((arr_size < block_size) && (arr_size < gap_size))
{
return bm::set_array1;
}
}
else
{
if ((inv_arr_size < block_size) && (inv_arr_size < gap_size))
{
return bm::set_array0;
}
}
return bm::set_bitset;
}
template<typename T>
unsigned bit_block_convert_to_arr(T* BMRESTRICT dest,
const unsigned* BMRESTRICT src,
bool inverted) BMNOEXCEPT
{
bm::id64_t imask64 = inverted ? ~0ull : 0;
T* BMRESTRICT pcurr = dest;
for (unsigned bit_idx = 0; bit_idx < bm::gap_max_bits;
src+=2, bit_idx += unsigned(sizeof(*src) * 8 * 2))
{
bm::id64_t w =
(bm::id64_t(src[0]) | (bm::id64_t(src[1]) << 32u)) ^ imask64;
while (w)
{
bm::id64_t t = bmi_blsi_u64(w); *pcurr++ = (T)(bm::word_bitcount64(t - 1) + bit_idx);
w = bmi_bslr_u64(w); }
} return (unsigned)(pcurr - dest);
}
inline
bool check_block_zero(const bm::word_t* blk, bool deep_scan) BMNOEXCEPT
{
if (!blk) return true;
if (IS_FULL_BLOCK(blk)) return false;
bool ret;
if (BM_IS_GAP(blk))
ret = gap_is_all_zero(BMGAP_PTR(blk));
else
ret = deep_scan ? bm::bit_is_all_zero(blk) : 0;
return ret;
}
inline
bool check_block_one(const bm::word_t* blk, bool deep_scan) BMNOEXCEPT
{
if (blk == 0) return false;
if (BM_IS_GAP(blk))
return bm::gap_is_all_one(BMGAP_PTR(blk));
if (IS_FULL_BLOCK(blk))
return true;
if (!deep_scan)
return false;
return bm::is_bits_one((wordop_t*)blk);
}
template<typename T>
unsigned gap_overhead(const T* length,
const T* length_end,
const T* glevel_len) BMNOEXCEPT
{
BM_ASSERT(length && length_end && glevel_len);
unsigned overhead = 0;
for (;length < length_end; ++length)
{
unsigned len = *length;
int level = gap_calc_level(len, glevel_len);
BM_ASSERT(level >= 0 && level < (int)bm::gap_levels);
unsigned capacity = glevel_len[level];
BM_ASSERT(capacity >= len);
overhead += capacity - len;
}
return overhead;
}
template<typename T>
bool improve_gap_levels(const T* length,
const T* length_end,
T* glevel_len) BMNOEXCEPT
{
BM_ASSERT(length && length_end && glevel_len);
size_t lsize = size_t(length_end - length);
BM_ASSERT(lsize);
gap_word_t max_len = 0;
unsigned i;
for (i = 0; i < lsize; ++i)
{
if (length[i] > max_len)
max_len = length[i];
}
if (max_len < 5 || lsize <= bm::gap_levels)
{
glevel_len[0] = T(max_len + 4);
for (i = 1; i < bm::gap_levels; ++i)
{
glevel_len[i] = bm::gap_max_buff_len;
}
return true;
}
glevel_len[bm::gap_levels-1] = T(max_len + 5);
unsigned min_overhead = gap_overhead(length, length_end, glevel_len);
bool is_improved = false;
for (i = bm::gap_levels-2; ; --i)
{
unsigned opt_len = 0;
unsigned j;
bool imp_flag = false;
gap_word_t gap_saved_value = glevel_len[i];
for (j = 0; j < lsize; ++j)
{
glevel_len[i] = T(length[j] + 4);
unsigned ov = gap_overhead(length, length_end, glevel_len);
if (ov <= min_overhead)
{
min_overhead = ov;
opt_len = length[j]+4;
imp_flag = true;
}
}
if (imp_flag)
{
glevel_len[i] = (T)opt_len; is_improved = true;
}
else
{
glevel_len[i] = gap_saved_value;
}
if (i == 0)
break;
}
T val = *glevel_len;
T* gp = glevel_len;
T* res = glevel_len;
for (i = 0; i < bm::gap_levels; ++i)
{
if (val != *gp)
{
val = *gp;
*++res = val;
}
++gp;
}
while (++res < (glevel_len + bm::gap_levels))
{
*res = bm::gap_max_buff_len;
}
return is_improved;
}
inline
bool block_find_first_diff(const bm::word_t* BMRESTRICT blk,
const bm::word_t* BMRESTRICT arg_blk,
unsigned* BMRESTRICT pos) BMNOEXCEPT
{
if (!blk || !arg_blk)
{
const bm::word_t* pblk; bool is_gap;
if (blk)
{
pblk = blk;
is_gap = BM_IS_GAP(blk);
}
else
{
pblk = arg_blk;
is_gap = BM_IS_GAP(arg_blk);
}
if (is_gap)
{
unsigned found = bm::gap_find_first(BMGAP_PTR(pblk), pos);
if (found)
return true;
}
else
{
unsigned found = bm::bit_find_first(pblk, pos);
if (found)
return true;
}
return false;
}
bool arg_gap = BM_IS_GAP(arg_blk);
bool gap = BM_IS_GAP(blk);
if (arg_gap != gap)
{
bm::bit_block_t temp_blk;
bm::word_t* blk1; bm::word_t* blk2;
if (gap)
{
bm::gap_convert_to_bitset((bm::word_t*)temp_blk,
BMGAP_PTR(blk));
blk1 = (bm::word_t*)temp_blk;
blk2 = (bm::word_t*)arg_blk;
}
else
{
bm::gap_convert_to_bitset((bm::word_t*)temp_blk,
BMGAP_PTR(arg_blk));
blk1 = (bm::word_t*)blk;
blk2 = (bm::word_t*)temp_blk;
}
bool found = bm::bit_find_first_diff(blk1, blk2, pos);
if (found)
return true;
}
else
{
if (gap)
{
bool found =
bm::gap_find_first_diff(BMGAP_PTR(blk),
BMGAP_PTR(arg_blk), pos);
if (found)
return true;
}
else
{
bool found = bm::bit_find_first_diff(blk, arg_blk, pos);
if (found)
return true;
}
}
return false;
}
class bitblock_get_adapter
{
public:
bitblock_get_adapter(const bm::word_t* bit_block) : b_(bit_block) {}
BMFORCEINLINE
bm::word_t get_32() BMNOEXCEPT { return *b_++; }
private:
const bm::word_t* b_;
};
class bitblock_store_adapter
{
public:
bitblock_store_adapter(bm::word_t* bit_block) : b_(bit_block) {}
BMFORCEINLINE
void push_back(bm::word_t w) { *b_++ = w; }
private:
bm::word_t* b_;
};
class bitblock_sum_adapter
{
public:
bitblock_sum_adapter() : sum_(0) {}
BMFORCEINLINE
void push_back(bm::word_t w) BMNOEXCEPT { this->sum_+= w; }
bm::word_t sum() const BMNOEXCEPT { return this->sum_; }
private:
bm::word_t sum_;
};
template<class DEC> class decoder_range_adapter
{
public:
decoder_range_adapter(DEC& dec, unsigned from_idx, unsigned to_idx)
: decoder_(dec),
from_(from_idx),
to_(to_idx),
cnt_(0)
{}
bm::word_t get_32() BMNOEXCEPT
{
if (cnt_ < from_ || cnt_ > to_)
{
++cnt_; return 0;
}
++cnt_;
return decoder_.get_32();
}
private:
DEC& decoder_;
unsigned from_;
unsigned to_;
unsigned cnt_;
};
template<class It1, class It2, class BinaryOp, class Encoder>
void bit_recomb(It1& it1, It2& it2,
BinaryOp& op,
Encoder& enc,
unsigned block_size = bm::set_block_size) BMNOEXCEPT
{
for (unsigned i = 0; i < block_size; ++i)
{
bm::word_t w1 = it1.get_32();
bm::word_t w2 = it2.get_32();
bm::word_t w = op(w1, w2);
enc.push_back( w );
} }
template<typename W> struct bit_AND
{
W operator()(W w1, W w2) BMNOEXCEPT { return w1 & w2; }
};
template<typename W> struct bit_OR
{
W operator()(W w1, W w2) BMNOEXCEPT { return w1 | w2; }
};
template<typename W> struct bit_SUB
{
W operator()(W w1, W w2) BMNOEXCEPT { return w1 & ~w2; }
};
template<typename W> struct bit_XOR
{
W operator()(W w1, W w2) BMNOEXCEPT { return w1 ^ w2; }
};
template<typename W> struct bit_ASSIGN
{
W operator()(W, W w2) BMNOEXCEPT { return w2; }
};
template<typename W> struct bit_COUNT
{
W operator()(W w1, W w2) BMNOEXCEPT
{
w1 = 0;
BM_INCWORD_BITCOUNT(w1, w2);
return w1;
}
};
template<typename W> struct bit_COUNT_AND
{
W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 & w2);
return r;
}
};
template<typename W> struct bit_COUNT_XOR
{
W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 ^ w2);
return r;
}
};
template<typename W> struct bit_COUNT_OR
{
W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 | w2);
return r;
}
};
template<typename W> struct bit_COUNT_SUB_AB
{
W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1 & (~w2));
return r;
}
};
template<typename W> struct bit_COUNT_SUB_BA
{
W operator()(W w1, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w2 & (~w1));
return r;
}
};
template<typename W> struct bit_COUNT_A
{
W operator()(W w1, W ) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w1);
return r;
}
};
template<typename W> struct bit_COUNT_B
{
W operator()(W, W w2) BMNOEXCEPT
{
W r = 0;
BM_INCWORD_BITCOUNT(r, w2);
return r;
}
};
typedef
void (*gap_operation_to_bitset_func_type)(unsigned*,
const gap_word_t*);
typedef
gap_word_t* (*gap_operation_func_type)(const gap_word_t* BMRESTRICT,
const gap_word_t* BMRESTRICT,
gap_word_t* BMRESTRICT,
unsigned& );
typedef
bm::id_t (*bit_operation_count_func_type)(const bm::word_t* BMRESTRICT,
const bm::word_t* BMRESTRICT);
template<bool T>
struct operation_functions
{
static
gap_operation_to_bitset_func_type gap2bit_table_[bm::set_END];
static
gap_operation_func_type gapop_table_[bm::set_END];
static
bit_operation_count_func_type bit_op_count_table_[bm::set_END];
static
gap_operation_to_bitset_func_type gap_op_to_bit(unsigned i)
{
return gap2bit_table_[i];
}
static
gap_operation_func_type gap_operation(unsigned i)
{
return gapop_table_[i];
}
static
bit_operation_count_func_type bit_operation_count(unsigned i)
{
return bit_op_count_table_[i];
}
};
template<bool T>
gap_operation_to_bitset_func_type
operation_functions<T>::gap2bit_table_[bm::set_END] = {
&gap_and_to_bitset<bm::gap_word_t>, &gap_add_to_bitset<bm::gap_word_t>, &gap_sub_to_bitset<bm::gap_word_t>, &gap_xor_to_bitset<bm::gap_word_t>, 0
};
template<bool T>
gap_operation_func_type
operation_functions<T>::gapop_table_[bm::set_END] = {
&gap_operation_and, &gap_operation_or, &gap_operation_sub, &gap_operation_xor, 0
};
template<bool T>
bit_operation_count_func_type
operation_functions<T>::bit_op_count_table_[bm::set_END] = {
0, 0, 0, 0, 0, 0, &bit_operation_and_count, &bit_operation_xor_count, &bit_operation_or_count, &bit_operation_sub_count, &bit_operation_sub_count_inv, 0, 0, };
const unsigned short set_bitscan_wave_size = 4;
inline
unsigned short
bitscan_wave(const bm::word_t* BMRESTRICT w_ptr,
unsigned char* BMRESTRICT bits) BMNOEXCEPT
{
bm::word_t w0, w1;
unsigned int cnt0;
w0 = w_ptr[0]; w1 = w_ptr[1];
#if defined(BMAVX512OPT) || defined(BMAVX2OPT) || defined(BM64OPT) || defined(BM64_SSE4)
bm::id64_t w = (bm::id64_t(w1) << 32) | w0;
cnt0 = bm::bitscan_popcnt64(w, bits);
w0 = w_ptr[2]; w1 = w_ptr[3];
w = (bm::id64_t(w1) << 32) | w0;
cnt0 += bm::bitscan_popcnt64(w, bits + cnt0, 64);
#else
#if (defined(__arm__) || defined(__aarch64__))
cnt0 = bm::bitscan_bsf(w0, bits, (unsigned short)0);
cnt0 += bm::bitscan_bsf(w1, bits + cnt0, (unsigned short)32);
w0 = w_ptr[2]; w1 = w_ptr[3];
cnt0 += bm::bitscan_bsf(w0, bits + cnt0, (unsigned short)64);
cnt0 += bm::bitscan_bsf(w1, bits + cnt0, (unsigned short)(64+32));
#else
cnt0 = bm::bitscan_popcnt(w0, bits);
cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 32);
w0 = w_ptr[2]; w1 = w_ptr[3];
cnt0 += bm::bitscan_popcnt(w0, bits + cnt0, 64);
cnt0 += bm::bitscan_popcnt(w1, bits + cnt0, 64+32);
#endif
#endif
return static_cast<unsigned short>(cnt0);
}
#if defined (BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
inline
void bit_block_gather_scatter(unsigned* BMRESTRICT arr,
const bm::word_t* BMRESTRICT blk,
const unsigned* BMRESTRICT idx,
unsigned size, unsigned start,
unsigned bit_idx) BMNOEXCEPT
{
typedef unsigned TRGW;
typedef unsigned IDX;
#if defined(BM64_SSE4)
if constexpr (sizeof(TRGW)==4 && sizeof(IDX)==4)
{
sse4_bit_block_gather_scatter(arr, blk, idx, size, start, bit_idx);
return;
}
#elif defined(BM64_AVX2) || defined(BM64_AVX512)
if constexpr (sizeof(TRGW)==4 && sizeof(IDX)==4)
{
avx2_bit_block_gather_scatter(arr, blk, idx, size, start, bit_idx);
return;
}
#else
BM_ASSERT(0);
#endif
}
#endif
template<typename TRGW, typename IDX, typename SZ>
void bit_block_gather_scatter(TRGW* BMRESTRICT arr,
const bm::word_t* BMRESTRICT blk,
const IDX* BMRESTRICT idx,
SZ size, SZ start, unsigned bit_idx) BMNOEXCEPT
{
TRGW mask1 = 1;
const SZ len = (size - start);
const SZ len_unr = len - (len % 2);
SZ k = 0;
for (; k < len_unr; k+=2)
{
const SZ base = start + k;
const unsigned nbitA = unsigned(idx[base] & bm::set_block_mask);
arr[base] |= (TRGW(bool(blk[nbitA >> bm::set_word_shift] &
(mask1 << (nbitA & bm::set_word_mask)))) << bit_idx);
const unsigned nbitB = unsigned(idx[base + 1] & bm::set_block_mask);
arr[base+1] |= (TRGW(bool(blk[nbitB >> bm::set_word_shift] &
(mask1 << (nbitB & bm::set_word_mask)))) << bit_idx);
} for (; k < len; ++k)
{
unsigned nbit = unsigned(idx[start + k] & bm::set_block_mask);
arr[start + k] |= (TRGW(bool(blk[nbit >> bm::set_word_shift] &
(mask1 << (nbit & bm::set_word_mask)))) << bit_idx);
} }
inline
bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t* idx,
bm::id64_t size, bm::id64_t nb, bm::id64_t start) BMNOEXCEPT
{
BM_ASSERT(idx);
BM_ASSERT(start < size);
for (;(start < size) &&
(nb == (idx[start] >> bm::set_block_shift)); ++start)
{}
return start;
}
inline
unsigned idx_arr_block_lookup_u32(const unsigned* idx,
unsigned size, unsigned nb, unsigned start) BMNOEXCEPT
{
BM_ASSERT(idx);
BM_ASSERT(start < size);
#if defined(VECT_ARR_BLOCK_LOOKUP)
return VECT_ARR_BLOCK_LOOKUP(idx, size, nb, start);
#else
for (;(start < size) &&
(nb == unsigned(idx[start] >> bm::set_block_shift)); ++start)
{}
return start;
#endif
}
inline
void set_block_bits_u64(bm::word_t* BMRESTRICT block,
const bm::id64_t* BMRESTRICT idx,
bm::id64_t start, bm::id64_t stop) BMNOEXCEPT
{
for (bm::id64_t i = start; i < stop; ++i)
{
bm::id64_t n = idx[i];
unsigned nbit = unsigned(n & bm::set_block_mask);
unsigned nword = nbit >> bm::set_word_shift;
nbit &= bm::set_word_mask;
block[nword] |= (1u << nbit);
} }
inline
void set_block_bits_u32(bm::word_t* BMRESTRICT block,
const unsigned* BMRESTRICT idx,
unsigned start, unsigned stop ) BMNOEXCEPT
{
BM_ASSERT(start < stop);
#if defined(VECT_SET_BLOCK_BITS)
VECT_SET_BLOCK_BITS(block, idx, start, stop);
#else
do
{
unsigned n = idx[start++];
unsigned nbit = unsigned(n & bm::set_block_mask);
unsigned nword = nbit >> bm::set_word_shift;
nbit &= bm::set_word_mask;
block[nword] |= (1u << nbit);
} while (start < stop);
#endif
}
inline
bool block_ptr_array_range(bm::word_t** arr,
unsigned& left, unsigned& right) BMNOEXCEPT
{
BM_ASSERT(arr);
unsigned i, j;
for (i = 0; i < bm::set_sub_array_size; ++i)
{
if (arr[i])
{
left = i;
break;
}
}
if (i == bm::set_sub_array_size)
{
left = right = 0;
return false; }
for (j = bm::set_sub_array_size-1; j != i; --j)
if (arr[j])
break;
right = j;
return true;
}
inline
unsigned lower_bound_linear_u32(const unsigned* arr, unsigned target,
unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
#if defined(VECT_LOWER_BOUND_SCAN_U32)
return VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to);
#else
for (; from <= to; ++from)
{
if (arr[from] >= target)
break;
}
return from;
#endif
}
inline
unsigned lower_bound_linear_u64(const unsigned long long* arr,
unsigned long long target,
unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
for (; from <= to; ++from)
{
if (arr[from] >= target)
break;
}
return from;
}
inline
unsigned lower_bound_u32(const unsigned* arr, unsigned target,
unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
const unsigned linear_cutoff = 32;
unsigned l = from; unsigned r = to;
unsigned dist = r - l;
if (dist < linear_cutoff)
{
return bm::lower_bound_linear_u32(arr, target, l, r);
}
while (l <= r)
{
unsigned mid = (r-l)/2+l;
if (arr[mid] < target)
l = mid+1;
else
r = mid-1;
dist = r - l;
if (dist < linear_cutoff)
{
return bm::lower_bound_linear_u32(arr, target, l, r);
}
}
return l;
}
inline
unsigned lower_bound_u64(const unsigned long long* arr,
unsigned long long target,
unsigned from, unsigned to) BMNOEXCEPT
{
BM_ASSERT(arr);
BM_ASSERT(from <= to);
const unsigned linear_cutoff = 32;
unsigned l = from; unsigned r = to;
unsigned dist = r - l;
if (dist < linear_cutoff)
{
return bm::lower_bound_linear_u64(arr, target, l, r);
}
while (l <= r)
{
unsigned mid = (r - l) / 2 + l;
if (arr[mid] < target)
l = mid + 1;
else
r = mid - 1;
dist = r - l;
if (dist < linear_cutoff)
{
return bm::lower_bound_linear_u64(arr, target, l, r);
}
}
return l;
}
inline
bool find_ptr(const void* const * p_arr, size_t arr_size,
const void* ptr, size_t *idx) BMNOEXCEPT
{
for (size_t i = 0; i < arr_size; ++i)
if (ptr == p_arr[i])
{
*idx = i; return true;
}
return false;
}
#ifdef BM64ADDR
inline
bm::id64_t block_to_global_index(unsigned i, unsigned j,
unsigned block_idx) BMNOEXCEPT
{
bm::id64_t base_idx = bm::id64_t(i) * bm::set_sub_array_size * bm::gap_max_bits;
base_idx += j * bm::gap_max_bits;
return block_idx + base_idx;
}
#else
inline
bm::id_t block_to_global_index(unsigned i, unsigned j,
unsigned block_idx) BMNOEXCEPT
{
unsigned base_idx = i * bm::set_sub_array_size * bm::gap_max_bits;
base_idx += j * bm::gap_max_bits;
return block_idx + base_idx;
}
#endif
inline
unsigned min_delta_u32(const unsigned* arr, size_t arr_size)
{
if (arr_size < 2)
return 0;
unsigned md = arr[1] - arr[0]; for (size_t i = 1; i < arr_size; ++i)
{
unsigned prev = arr[i-1];
unsigned curr = arr[i];
BM_ASSERT(prev <= curr - 1);
unsigned delta = curr - prev;
if (delta < md)
{
md = delta;
}
} return md;
}
inline
void min_delta_apply(unsigned* arr, size_t arr_size, unsigned delta) BMNOEXCEPT
{
BM_ASSERT(delta > 0);
--delta;
for (size_t i = 1; i < arr_size; ++i)
{
arr[i] -= delta;
BM_ASSERT(arr[i] > arr[i-1]);
} }
template<typename VT, typename SZ>
bool find_max_nz(const VT* arr, SZ arr_size, SZ* found_idx) BMNOEXCEPT
{
bool found = false;
VT max_v = 0;
for (SZ i = 0; i < arr_size; ++i)
{
VT v = arr[i];
if (v > max_v)
{
max_v = v; *found_idx = i;
found = true;
}
} return found;
}
template<typename VT, typename SZ>
bool find_first_nz(const VT* arr, SZ arr_size, SZ* found_idx) BMNOEXCEPT
{
for (SZ i = 0; i < arr_size; ++i)
{
VT v = arr[i];
if (v)
{
*found_idx = i;
return true;
}
} return false;
}
template<typename VT, typename SZ>
SZ count_nz(const VT* arr, SZ arr_size) BMNOEXCEPT
{
SZ cnt = 0;
for (SZ i = 0; i < arr_size; ++i)
cnt += (arr[i] != 0);
return cnt;
}
enum bit_representation
{
e_bit_GAP = 0, e_bit_INT, e_bit_IINT, e_bit_1, e_bit_0, e_bit_bit, e_bit_end
};
inline
bm::bit_representation best_representation(unsigned gc,
unsigned bc,
unsigned max_bits,
float bie_bits_per_int,
unsigned* best_metric) BMNOEXCEPT
{
if (!bc)
{
*best_metric = bc;
return e_bit_0;
}
if (bc == max_bits)
{
*best_metric = 0;
return e_bit_1;
}
unsigned ibc = max_bits - bc;
if (gc < bc) {
if (gc <= ibc)
{
*best_metric = gc;
float cost_in_bits = float(gc) * bie_bits_per_int;
if (cost_in_bits >= float(max_bits))
return e_bit_bit;
return e_bit_GAP;
}
}
else {
if (bc <= ibc)
{
*best_metric = bc;
float cost_in_bits = float(bc) * bie_bits_per_int;
if (cost_in_bits >= float(max_bits))
return e_bit_bit;
return e_bit_INT;
}
}
*best_metric = ibc;
float cost_in_bits = float(ibc) * bie_bits_per_int;
if (cost_in_bits >= float(max_bits))
return e_bit_bit;
return e_bit_IINT;
}
union ptr_payload_t
{
bm::word_t* blk;
bm::id64_t i64;
unsigned short i16[4];
};
inline
bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v) BMNOEXCEPT
{
if (v == 0)
{
return (ptr.i16[1] == 0);
}
bm::id64_t r = (ptr.i16[1] == v) | (ptr.i16[2] == v) | (ptr.i16[3] == v);
return r;
}
}
#endif