use std::mem::size_of;
#[cfg(all(
feature = "simd",
target_arch = "x86_64",
target_feature = "avx",
target_feature = "avx2",
target_feature = "avx512f",
target_feature = "avx512bw",
))]
pub use bitset::*;
pub use iter::*;
use crate::util::impl_vector_iterator;
use crate::BitVec;
use super::WORD_SIZE;
const BLOCK_SIZE: usize = 512;
const SUPER_BLOCK_SIZE: usize = 1 << 13;
const SELECT_BLOCK_SIZE: usize = 1 << 13;
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
#[cfg_attr(feature = "mem_dbg", mem_size(flat))]
struct BlockDescriptor {
zeros: u16,
}
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
#[cfg_attr(feature = "mem_dbg", mem_size(flat))]
struct SuperBlockDescriptor {
zeros: usize,
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
#[cfg_attr(feature = "mem_dbg", mem_size(flat))]
struct SelectSuperBlockDescriptor {
index_0: usize,
index_1: usize,
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
pub struct RsVec {
data: Vec<u64>,
len: usize,
blocks: Vec<BlockDescriptor>,
super_blocks: Vec<SuperBlockDescriptor>,
select_blocks: Vec<SelectSuperBlockDescriptor>,
pub(crate) rank0: usize,
pub(crate) rank1: usize,
}
impl RsVec {
#[must_use]
pub fn from_bit_vec(vec: BitVec) -> RsVec {
let mut blocks = Vec::with_capacity(vec.len() / BLOCK_SIZE + 1);
let mut super_blocks = Vec::with_capacity(vec.len() / SUPER_BLOCK_SIZE + 1);
let mut select_blocks = Vec::new();
select_blocks.push(SelectSuperBlockDescriptor {
index_0: 0,
index_1: 0,
});
let mut total_zeros: usize = 0;
let mut current_zeros: usize = 0;
let mut last_zero_select_block: usize = 0;
let mut last_one_select_block: usize = 0;
for (idx, &word) in vec.data.iter().enumerate() {
if idx % (BLOCK_SIZE / WORD_SIZE) == 0 {
if idx % (SUPER_BLOCK_SIZE / WORD_SIZE) == 0 {
total_zeros += current_zeros;
current_zeros = 0;
super_blocks.push(SuperBlockDescriptor { zeros: total_zeros });
}
#[allow(clippy::cast_possible_truncation)]
blocks.push(BlockDescriptor {
zeros: current_zeros as u16,
});
}
let mut new_zeros = word.count_zeros() as usize;
if idx == vec.data.len() - 1 && !vec.len.is_multiple_of(WORD_SIZE) {
let mask = (1 << (vec.len % WORD_SIZE)) - 1;
new_zeros -= (word | mask).count_zeros() as usize;
}
let all_zeros = total_zeros + current_zeros + new_zeros;
if all_zeros / SELECT_BLOCK_SIZE > (total_zeros + current_zeros) / SELECT_BLOCK_SIZE {
if all_zeros / SELECT_BLOCK_SIZE == select_blocks.len() {
select_blocks.push(SelectSuperBlockDescriptor {
index_0: super_blocks.len() - 1,
index_1: 0,
});
} else {
select_blocks[all_zeros / SELECT_BLOCK_SIZE].index_0 = super_blocks.len() - 1;
}
last_zero_select_block += 1;
}
let total_bits = (idx + 1) * WORD_SIZE;
let all_ones = total_bits - all_zeros;
if all_ones / SELECT_BLOCK_SIZE
> (idx * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE
{
if all_ones / SELECT_BLOCK_SIZE == select_blocks.len() {
select_blocks.push(SelectSuperBlockDescriptor {
index_0: 0,
index_1: super_blocks.len() - 1,
});
} else {
select_blocks[all_ones / SELECT_BLOCK_SIZE].index_1 = super_blocks.len() - 1;
}
last_one_select_block += 1;
}
current_zeros += new_zeros;
}
if last_zero_select_block == select_blocks.len() - 1 {
select_blocks.push(SelectSuperBlockDescriptor {
index_0: select_blocks[last_zero_select_block].index_0,
index_1: 0,
});
} else {
debug_assert!(select_blocks[last_zero_select_block + 1].index_0 == 0);
select_blocks[last_zero_select_block + 1].index_0 =
select_blocks[last_zero_select_block].index_0;
}
if last_one_select_block == select_blocks.len() - 1 {
select_blocks.push(SelectSuperBlockDescriptor {
index_0: 0,
index_1: select_blocks[last_one_select_block].index_1,
});
} else {
debug_assert!(select_blocks[last_one_select_block + 1].index_1 == 0);
select_blocks[last_one_select_block + 1].index_1 =
select_blocks[last_one_select_block].index_1;
}
total_zeros += current_zeros;
RsVec {
data: vec.data,
len: vec.len,
blocks,
super_blocks,
select_blocks,
rank0: total_zeros,
rank1: vec.len - total_zeros,
}
}
#[must_use]
pub fn rank0(&self, pos: usize) -> usize {
self.rank(true, pos)
}
#[must_use]
pub fn rank1(&self, pos: usize) -> usize {
self.rank(false, pos)
}
#[allow(clippy::inline_always)]
#[inline(always)]
fn rank(&self, zero: bool, pos: usize) -> usize {
#[allow(clippy::collapsible_else_if)]
if zero {
if pos >= self.len() {
return self.rank0;
}
} else {
if pos >= self.len() {
return self.rank1;
}
}
let index = pos / WORD_SIZE;
let block_index = pos / BLOCK_SIZE;
let super_block_index = pos / SUPER_BLOCK_SIZE;
let mut rank = 0;
rank += if zero {
self.super_blocks[super_block_index].zeros
} else {
(super_block_index * SUPER_BLOCK_SIZE) - self.super_blocks[super_block_index].zeros
};
rank += if zero {
self.blocks[block_index].zeros as usize
} else {
((block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE)
- self.blocks[block_index].zeros as usize
};
for &i in &self.data[(block_index * BLOCK_SIZE) / WORD_SIZE..index] {
rank += if zero {
i.count_zeros() as usize
} else {
i.count_ones() as usize
};
}
rank += if zero {
(!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
} else {
(self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
};
rank
}
#[must_use]
pub fn len(&self) -> usize {
self.len
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn get(&self, pos: usize) -> Option<u64> {
if pos >= self.len() {
None
} else {
Some(self.get_unchecked(pos))
}
}
#[must_use]
pub fn get_unchecked(&self, pos: usize) -> u64 {
(self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
}
#[must_use]
pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
if len > WORD_SIZE {
return None;
}
if pos + len > self.len {
None
} else {
Some(self.get_bits_unchecked(pos, len))
}
}
#[must_use]
#[allow(clippy::comparison_chain)] #[allow(clippy::cast_possible_truncation)] pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
debug_assert!(len <= WORD_SIZE);
let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
if pos % WORD_SIZE + len <= WORD_SIZE {
partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
} else {
(partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
& 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
}
}
#[must_use]
pub fn into_bit_vec(self) -> BitVec {
BitVec {
data: self.data,
len: self.len,
}
}
#[must_use]
pub fn sparse_equals<const ZERO: bool>(&self, other: &Self) -> bool {
if self.len() != other.len() {
return false;
}
if self.rank0 != other.rank0 || self.rank1 != other.rank1 {
return false;
}
let iter: SelectIter<ZERO> = self.select_iter();
for (rank, bit_index) in iter.enumerate() {
if (other.get_unchecked(bit_index) == 0) != ZERO || other.rank(ZERO, bit_index) != rank
{
return false;
}
}
true
}
#[must_use]
pub fn full_equals(&self, other: &Self) -> bool {
if self.len() != other.len() {
return false;
}
if self.rank0 != other.rank0 || self.rank1 != other.rank1 {
return false;
}
if self.data[..self.len / 64]
.iter()
.zip(other.data[..other.len / 64].iter())
.any(|(a, b)| a != b)
{
return false;
}
if !self.len.is_multiple_of(WORD_SIZE)
&& self.data[self.len / WORD_SIZE] & ((1 << (self.len % WORD_SIZE)) - 1)
!= other.data[self.len / WORD_SIZE] & ((1 << (other.len % WORD_SIZE)) - 1)
{
return false;
}
true
}
#[must_use]
pub fn heap_size(&self) -> usize {
self.data.len() * size_of::<u64>()
+ self.blocks.len() * size_of::<BlockDescriptor>()
+ self.super_blocks.len() * size_of::<SuperBlockDescriptor>()
+ self.select_blocks.len() * size_of::<SelectSuperBlockDescriptor>()
}
}
impl_vector_iterator! { RsVec, RsVecIter, RsVecRefIter }
impl PartialEq for RsVec {
fn eq(&self, other: &Self) -> bool {
if self.len > 4_000_000 {
if self.rank1 > self.rank0 {
self.sparse_equals::<true>(other)
} else {
self.sparse_equals::<false>(other)
}
} else {
self.full_equals(other)
}
}
}
impl From<BitVec> for RsVec {
fn from(vec: BitVec) -> Self {
RsVec::from_bit_vec(vec)
}
}
impl From<RsVec> for BitVec {
fn from(value: RsVec) -> Self {
value.into_bit_vec()
}
}
mod iter;
mod select;
#[cfg(all(
feature = "simd",
target_arch = "x86_64",
target_feature = "avx",
target_feature = "avx2",
target_feature = "avx512f",
target_feature = "avx512bw",
))]
mod bitset;
#[cfg(test)]
mod tests;