use crate::bit_vec::mask::MaskedBitVec;
use crate::util::impl_vector_iterator;
use std::cmp::min;
use std::hash::{Hash, Hasher};
use std::mem::size_of;
pub mod fast_rs_vec;
pub mod sparse;
pub mod mask;
const WORD_SIZE: usize = 64;
pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemSize, mem_dbg::MemDbg))]
pub struct BitVec {
data: Vec<u64>,
len: usize,
}
impl BitVec {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
data: Vec::with_capacity(capacity / WORD_SIZE + 1),
len: 0,
}
}
#[must_use]
pub fn from_zeros(len: usize) -> Self {
let mut data = vec![0; len / WORD_SIZE];
if !len.is_multiple_of(WORD_SIZE) {
data.push(0);
}
Self { data, len }
}
#[must_use]
pub fn from_ones(len: usize) -> Self {
let mut data = vec![u64::MAX; len / WORD_SIZE];
if !len.is_multiple_of(WORD_SIZE) {
data.push((1 << (len % WORD_SIZE)) - 1);
}
Self { data, len }
}
#[must_use]
pub fn from_bits(bits: &[u8]) -> Self {
let mut bv = Self::with_capacity(bits.len());
bits.iter().for_each(|&b| bv.append_bit(b.into()));
bv
}
#[must_use]
pub fn from_bits_u16(bits: &[u16]) -> Self {
let mut bv = Self::with_capacity(bits.len());
bits.iter().for_each(|&b| bv.append_bit_u16(b));
bv
}
#[must_use]
pub fn from_bits_u32(bits: &[u32]) -> Self {
let mut bv = Self::with_capacity(bits.len());
bits.iter().for_each(|&b| bv.append_bit_u32(b));
bv
}
#[must_use]
pub fn from_bits_u64(bits: &[u64]) -> Self {
let mut bv = Self::with_capacity(bits.len());
bits.iter().for_each(|&b| bv.append_bit(b));
bv
}
#[must_use]
pub fn from_bits_iter<I, E>(iter: I) -> Self
where
E: Into<u64>,
I: IntoIterator<Item = E>,
{
let iter = iter.into_iter();
let mut bv = Self::with_capacity(iter.size_hint().0);
for bit in iter {
bv.append_bit(bit.into());
}
bv
}
#[must_use]
pub fn from_limbs(words: &[u64]) -> Self {
let len = words.len() * WORD_SIZE;
Self {
data: words.to_vec(),
len,
}
}
pub fn from_limbs_iter<I, E>(iter: I) -> Self
where
E: Into<u64>,
I: IntoIterator<Item = E>,
{
let vec = iter.into_iter().map(Into::into).collect();
Self::from_vec(vec)
}
#[must_use]
pub fn from_vec(data: Vec<u64>) -> Self {
let len = data.len() * WORD_SIZE;
Self { data, len }
}
fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
where
T: Into<u64> + Copy,
{
let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
for &word in sequence {
Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
}
bv
}
fn pack_bits_iter<T, I: IntoIterator<Item = T>, const MAX_BITS: usize>(
iter: I,
bits_per_element: usize,
) -> Self
where
T: Into<u64> + Copy,
{
let mut bv = Self::new();
for word in iter {
Self::pack_word_into_vector::<MAX_BITS>(&mut bv, word.into(), bits_per_element)
}
bv
}
#[inline(always)]
fn pack_word_into_vector<const MAX_BITS: usize>(bv: &mut BitVec, word: u64, num_bits: usize) {
if num_bits <= MAX_BITS {
bv.append_bits(word.into(), num_bits);
} else {
bv.append_bits(word.into(), MAX_BITS);
let mut rest = num_bits - MAX_BITS;
while rest > 0 {
bv.append_bits(0, min(rest, MAX_BITS));
rest = rest.saturating_sub(MAX_BITS);
}
}
}
#[must_use]
pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
Self::pack_bits::<_, 64>(sequence, bits_per_element)
}
#[must_use]
pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
Self::pack_bits::<_, 32>(sequence, bits_per_element)
}
#[must_use]
pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
Self::pack_bits::<_, 16>(sequence, bits_per_element)
}
#[must_use]
pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
Self::pack_bits::<_, 8>(sequence, bits_per_element)
}
pub fn pack_from_iter_u64<I: IntoIterator<Item = u64>>(
iter: I,
bits_per_element: usize,
) -> Self {
Self::pack_bits_iter::<_, _, 64>(iter, bits_per_element)
}
pub fn pack_from_iter_u32<I: IntoIterator<Item = u32>>(
iter: I,
bits_per_element: usize,
) -> Self {
Self::pack_bits_iter::<_, _, 32>(iter, bits_per_element)
}
pub fn pack_from_iter_u16<I: IntoIterator<Item = u16>>(
iter: I,
bits_per_element: usize,
) -> Self {
Self::pack_bits_iter::<_, _, 16>(iter, bits_per_element)
}
pub fn pack_from_iter_u8<I: IntoIterator<Item = u8>>(iter: I, bits_per_element: usize) -> Self {
Self::pack_bits_iter::<_, _, 8>(iter, bits_per_element)
}
pub fn from_bools(bools: &[bool]) -> Self {
let mut bv = BitVec::with_capacity(bools.len());
bools.iter().for_each(|&b| bv.append(b));
bv
}
pub fn from_bool_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
let mut bv = BitVec::new();
iter.into_iter().for_each(|b| bv.append(b));
bv
}
pub fn append(&mut self, bit: bool) {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(0);
}
if bit {
self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
} else {
self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
}
self.len += 1;
}
pub fn drop_last(&mut self, n: usize) {
if n > self.len {
self.data.clear();
self.len = 0;
return;
}
let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
if new_limb_count < self.data.len() {
self.data.truncate(new_limb_count);
}
self.len -= n;
}
pub fn append_bit(&mut self, bit: u64) {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(0);
}
if bit % 2 == 1 {
self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
} else {
self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
}
self.len += 1;
}
pub fn append_bit_u32(&mut self, bit: u32) {
self.append_bit(u64::from(bit));
}
pub fn append_bit_u16(&mut self, bit: u16) {
self.append_bit(u64::from(bit));
}
pub fn append_bit_u8(&mut self, bit: u8) {
self.append_bit(u64::from(bit));
}
pub fn append_word(&mut self, word: u64) {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(word);
} else {
self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
}
self.len += WORD_SIZE;
}
pub fn append_bits(&mut self, bits: u64, len: usize) {
assert!(len <= 64, "Cannot append more than 64 bits");
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(bits);
} else {
self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
if self.len % WORD_SIZE + len > WORD_SIZE {
self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
}
}
self.len += len;
}
pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(bits);
} else {
self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
if self.len % WORD_SIZE + len > WORD_SIZE {
self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
}
}
self.len += len;
}
pub fn extend_bitvec(&mut self, other: &Self) {
self.data
.reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
let full_limbs = other.len() / WORD_SIZE;
for i in 0..full_limbs {
self.append_bits(other.data[i], WORD_SIZE);
}
let partial_bits = other.len % WORD_SIZE;
if partial_bits > 0 {
self.append_bits(other.data[full_limbs], partial_bits);
}
}
#[must_use]
pub fn len(&self) -> usize {
self.len
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len == 0
}
pub fn flip_bit(&mut self, pos: usize) {
assert!(pos < self.len, "Index out of bounds");
self.flip_bit_unchecked(pos);
}
pub fn flip_bit_unchecked(&mut self, pos: usize) {
self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
}
#[must_use]
pub fn get(&self, pos: usize) -> Option<u64> {
if pos >= self.len {
None
} else {
Some(self.get_unchecked(pos))
}
}
#[must_use]
pub fn get_unchecked(&self, pos: usize) -> u64 {
(self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
}
pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
if pos >= self.len {
Err("out of range")
} else {
self.set_unchecked(pos, value);
Ok(())
}
}
pub fn set_unchecked(&mut self, pos: usize, value: u64) {
self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
| ((value & 0x1) << (pos % WORD_SIZE));
}
#[must_use]
pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
if pos >= self.len {
None
} else {
Some(self.is_bit_set_unchecked(pos))
}
}
#[must_use]
pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
self.get_unchecked(pos) != 0
}
#[must_use]
pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
if len > WORD_SIZE || len == 0 {
return None;
}
if pos + len > self.len {
None
} else {
Some(self.get_bits_unchecked(pos, len))
}
}
#[must_use]
#[allow(clippy::inline_always)]
#[allow(clippy::comparison_chain)] #[inline(always)] #[allow(clippy::cast_possible_truncation)] pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
debug_assert!(len <= WORD_SIZE);
let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
if pos % WORD_SIZE + len <= WORD_SIZE {
partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
} else {
(partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
& 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
}
}
#[must_use]
#[allow(clippy::inline_always)]
#[inline(always)] pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
self.get_bits(index * n, n)
}
#[must_use]
#[allow(clippy::inline_always)]
#[inline(always)] pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
self.get_bits_unchecked(index * n, n)
}
#[must_use]
#[allow(clippy::missing_panics_doc)] pub fn count_ones(&self) -> u64 {
let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
.iter()
.map(|limb| u64::from(limb.count_ones()))
.sum();
if !self.len.is_multiple_of(WORD_SIZE) {
ones += u64::from(
(self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
);
}
ones
}
#[must_use]
pub fn count_zeros(&self) -> u64 {
self.len as u64 - self.count_ones()
}
#[inline]
pub fn mask_or<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
MaskedBitVec::new(self, mask, |a, b| a | b)
}
pub fn apply_mask_or(&mut self, mask: &BitVec) -> Result<(), String> {
if self.len != mask.len {
return Err(String::from(
"mask cannot have different length than vector",
));
}
for i in 0..self.data.len() {
self.data[i] |= mask.data[i];
}
Ok(())
}
#[inline]
pub fn mask_and<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
MaskedBitVec::new(self, mask, |a, b| a & b)
}
pub fn apply_mask_and(&mut self, mask: &BitVec) -> Result<(), String> {
if self.len != mask.len {
return Err(String::from(
"mask cannot have different length than vector",
));
}
for i in 0..self.data.len() {
self.data[i] &= mask.data[i];
}
Ok(())
}
#[inline]
pub fn mask_xor<'s, 'b>(&'s self, mask: &'b BitVec) -> Result<BitMask<'s, 'b>, String> {
MaskedBitVec::new(self, mask, |a, b| a ^ b)
}
pub fn apply_mask_xor(&mut self, mask: &BitVec) -> Result<(), String> {
if self.len != mask.len {
return Err(String::from(
"mask cannot have different length than vector",
));
}
for i in 0..self.data.len() {
self.data[i] ^= mask.data[i];
}
Ok(())
}
#[inline]
pub fn mask_custom<'s, 'b, F>(
&'s self,
mask: &'b BitVec,
mask_op: F,
) -> Result<MaskedBitVec<'s, 'b, F>, String>
where
F: Fn(u64, u64) -> u64,
{
MaskedBitVec::new(self, mask, mask_op)
}
#[inline]
pub fn apply_mask_custom(
&mut self,
mask: &BitVec,
mask_op: fn(u64, u64) -> u64,
) -> Result<(), String> {
if self.len != mask.len {
return Err(String::from(
"mask cannot have different length than vector",
));
}
for i in 0..self.data.len() {
self.data[i] = mask_op(self.data[i], mask.data[i]);
}
Ok(())
}
#[must_use]
pub fn heap_size(&self) -> usize {
self.data.len() * size_of::<u64>()
}
pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
if at > self.len {
Err(self)
} else {
Ok(self.split_at_unchecked(at))
}
}
#[must_use]
pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
let other_len = self.len - at;
let mut other = Self::with_capacity(other_len);
if other_len == 0 {
return (self, other);
}
let first_limb = at / WORD_SIZE;
let last_limb = self.len / WORD_SIZE;
let leading_partial = at % WORD_SIZE;
if first_limb == last_limb {
other.append_bits_unchecked(self.data[first_limb] >> leading_partial, other_len);
} else {
let full_limbs = if leading_partial > 0 {
other.append_bits_unchecked(
self.data[first_limb] >> leading_partial,
WORD_SIZE - leading_partial,
);
first_limb + 1..last_limb
} else {
first_limb..last_limb
};
for i in full_limbs {
other.append_bits_unchecked(self.data[i], WORD_SIZE);
}
let trailing_partial = self.len % WORD_SIZE;
if trailing_partial > 0 {
other.append_bits_unchecked(self.data[last_limb], trailing_partial);
}
}
self.drop_last(other_len);
(self, other)
}
#[inline]
pub fn iter_limbs(&self) -> impl Iterator<Item = u64> + use<'_> {
self.data.iter().copied()
}
}
impl_vector_iterator! { BitVec, BitVecIter, BitVecRefIter }
impl From<&[u64]> for BitVec {
fn from(data: &[u64]) -> Self {
BitVec::from_limbs(data)
}
}
impl From<Vec<u64>> for BitVec {
fn from(data: Vec<u64>) -> Self {
BitVec::from_limbs(&data)
}
}
impl Extend<BitVec> for BitVec {
fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
for v in iter {
self.extend_bitvec(&v)
}
}
}
impl<'t> Extend<&'t BitVec> for BitVec {
fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
for v in iter {
self.extend_bitvec(v)
}
}
}
impl FromIterator<u64> for BitVec {
fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
BitVec::from_limbs_iter(iter)
}
}
impl PartialEq for BitVec {
fn eq(&self, other: &Self) -> bool {
if self.len != other.len {
return false;
}
if self.len == 0 {
return true;
}
for i in 0..self.data.len() - 1 {
if self.data[i] != other.data[i] {
return false;
}
}
let mask = (1 << (self.len % WORD_SIZE)) - 1;
if self.data[self.data.len() - 1] & mask != other.data[self.data.len() - 1] & mask {
return false;
}
true
}
}
impl Eq for BitVec {}
impl Hash for BitVec {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_usize(self.len);
if self.len > 0 {
self.data[0..self.data.len() - 1]
.iter()
.for_each(|x| state.write_u64(*x));
let masked_last_limb = self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1);
state.write_u64(masked_last_limb);
}
}
}
#[cfg(test)]
mod tests;