#[cfg(not(test))]
use alloc::vec::Vec;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::bits::popcount::{popcount_word, popcount_words};
use crate::bits::rank::RankDirectory;
use crate::bits::select::SelectIndex;
use crate::util::broadword::select_in_word;
use crate::{Config, RankSelect};
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct BitVec {
words: Vec<u64>,
len: usize,
ones_count: usize,
rank_dir: RankDirectory,
select_idx: SelectIndex,
}
impl BitVec {
pub fn from_words(words: Vec<u64>, len: usize) -> Self {
Self::with_config(words, len, Config::default())
}
pub fn with_config(mut words: Vec<u64>, len: usize, config: Config) -> Self {
assert!(
len <= words.len().saturating_mul(64),
"len {} exceeds capacity {}",
len,
words.len().saturating_mul(64)
);
if len > 0 {
let last_word_bits = len % 64;
if last_word_bits > 0 && !words.is_empty() {
let last_idx = words.len() - 1;
let mask = (1u64 << last_word_bits) - 1;
words[last_idx] &= mask;
}
}
let ones_count: usize = popcount_words(&words);
let rank_dir = RankDirectory::build(&words);
let select_idx = SelectIndex::build(&words, ones_count, config.select_sample_rate);
Self {
words,
len,
ones_count,
rank_dir,
select_idx,
}
}
pub fn new() -> Self {
Self {
words: Vec::new(),
len: 0,
ones_count: 0,
rank_dir: RankDirectory::empty(),
select_idx: SelectIndex::empty(),
}
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn count_ones(&self) -> usize {
self.ones_count
}
#[inline]
pub fn count_zeros(&self) -> usize {
self.len - self.ones_count
}
#[inline]
pub fn get(&self, i: usize) -> bool {
assert!(i < self.len, "index {} out of bounds (len={})", i, self.len);
let word_idx = i / 64;
let bit_idx = i % 64;
(self.words[word_idx] >> bit_idx) & 1 == 1
}
#[inline]
pub unsafe fn get_unchecked(&self, i: usize) -> bool {
let word_idx = i / 64;
let bit_idx = i % 64;
unsafe { (*self.words.get_unchecked(word_idx) >> bit_idx) & 1 == 1 }
}
#[inline]
pub fn word(&self, idx: usize) -> u64 {
self.words[idx]
}
#[inline]
pub fn word_count(&self) -> usize {
self.words.len()
}
#[inline]
pub fn words(&self) -> &[u64] {
&self.words
}
}
impl Default for BitVec {
fn default() -> Self {
Self::new()
}
}
impl RankSelect for BitVec {
#[inline]
fn rank1(&self, i: usize) -> usize {
if i == 0 {
return 0;
}
if i >= self.len {
return self.ones_count;
}
let word_idx = i / 64;
let bit_idx = i % 64;
let dir_rank = self.rank_dir.rank_at_word(word_idx);
let word = self.words[word_idx];
let mask = (1u64 << bit_idx) - 1;
let partial = popcount_word(word & mask) as usize;
dir_rank + partial
}
fn select1(&self, k: usize) -> Option<usize> {
if k >= self.ones_count {
return None;
}
let (start_word, mut remaining) = self.select_idx.jump_to(k);
for word_idx in start_word..self.words.len() {
let word = self.words[word_idx];
let pop = popcount_word(word) as usize;
if pop > remaining {
let bit_pos = select_in_word(word, remaining as u32) as usize;
let result = word_idx * 64 + bit_pos;
return if result < self.len {
Some(result)
} else {
None
};
}
remaining -= pop;
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_from_words_empty() {
let bv = BitVec::from_words(vec![], 0);
assert_eq!(bv.len(), 0);
assert_eq!(bv.count_ones(), 0);
assert!(bv.is_empty());
}
#[test]
fn test_from_words_single() {
let bv = BitVec::from_words(vec![0b1010_1010], 8);
assert_eq!(bv.len(), 8);
assert_eq!(bv.count_ones(), 4);
assert!(!bv.get(0));
assert!(bv.get(1));
assert!(!bv.get(2));
assert!(bv.get(3));
}
#[test]
fn test_get_all_positions() {
let bv = BitVec::from_words(vec![0b1100_0011], 8);
assert!(bv.get(0));
assert!(bv.get(1));
assert!(!bv.get(2));
assert!(!bv.get(3));
assert!(!bv.get(4));
assert!(!bv.get(5));
assert!(bv.get(6));
assert!(bv.get(7));
}
#[test]
#[should_panic(expected = "out of bounds")]
fn test_get_out_of_bounds() {
let bv = BitVec::from_words(vec![0xFF], 8);
bv.get(8);
}
#[test]
fn test_count_ones_all_zeros() {
let bv = BitVec::from_words(vec![0, 0, 0], 192);
assert_eq!(bv.count_ones(), 0);
assert_eq!(bv.count_zeros(), 192);
}
#[test]
fn test_count_ones_all_ones() {
let bv = BitVec::from_words(vec![u64::MAX, u64::MAX], 128);
assert_eq!(bv.count_ones(), 128);
assert_eq!(bv.count_zeros(), 0);
}
#[test]
fn test_partial_word() {
let bv = BitVec::from_words(vec![0b11111_11111], 10);
assert_eq!(bv.len(), 10);
assert_eq!(bv.count_ones(), 10);
}
#[test]
fn test_partial_word_masks_unused() {
let bv = BitVec::from_words(vec![u64::MAX], 10);
assert_eq!(bv.count_ones(), 10);
}
#[test]
fn test_rank1_empty() {
let bv = BitVec::from_words(vec![], 0);
assert_eq!(bv.rank1(0), 0);
}
#[test]
fn test_rank1_at_zero() {
let bv = BitVec::from_words(vec![0b1111], 4);
assert_eq!(bv.rank1(0), 0);
}
#[test]
fn test_rank1_simple() {
let bv = BitVec::from_words(vec![0b0100_1101], 8);
assert_eq!(bv.rank1(1), 1); assert_eq!(bv.rank1(2), 1); assert_eq!(bv.rank1(3), 2); assert_eq!(bv.rank1(4), 3); assert_eq!(bv.rank1(8), 4); }
#[test]
fn test_rank1_word_boundary() {
let bv = BitVec::from_words(vec![u64::MAX, u64::MAX], 128);
assert_eq!(bv.rank1(64), 64);
assert_eq!(bv.rank1(65), 65);
assert_eq!(bv.rank1(128), 128);
}
#[test]
fn test_rank1_beyond_len() {
let bv = BitVec::from_words(vec![u64::MAX], 64);
assert_eq!(bv.rank1(100), 64);
}
#[test]
fn test_rank0_simple() {
let bv = BitVec::from_words(vec![0b0100_1101], 8);
assert_eq!(bv.rank0(4), 1); assert_eq!(bv.rank0(8), 4); }
#[test]
fn test_select1_empty() {
let bv = BitVec::from_words(vec![], 0);
assert_eq!(bv.select1(0), None);
}
#[test]
fn test_select1_all_zeros() {
let bv = BitVec::from_words(vec![0, 0], 128);
assert_eq!(bv.select1(0), None);
}
#[test]
fn test_select1_simple() {
let bv = BitVec::from_words(vec![0b0100_1101], 8);
assert_eq!(bv.select1(0), Some(0)); assert_eq!(bv.select1(1), Some(2)); assert_eq!(bv.select1(2), Some(3)); assert_eq!(bv.select1(3), Some(6)); assert_eq!(bv.select1(4), None); }
#[test]
fn test_select1_word_boundary() {
let bv = BitVec::from_words(vec![u64::MAX, u64::MAX], 128);
assert_eq!(bv.select1(63), Some(63));
assert_eq!(bv.select1(64), Some(64));
assert_eq!(bv.select1(127), Some(127));
assert_eq!(bv.select1(128), None);
}
#[test]
fn test_rank_select_roundtrip() {
let bv = BitVec::from_words(vec![0xAAAA_AAAA_AAAA_AAAA, 0x5555_5555_5555_5555], 128);
for i in 0..128 {
if bv.get(i) {
let rank = bv.rank1(i);
let select = bv.select1(rank);
assert_eq!(select, Some(i), "roundtrip failed at i={}", i);
}
}
}
#[test]
fn test_large_bitvector() {
let words: Vec<u64> = (0..16).map(|_| 0xAAAA_AAAA_AAAA_AAAA).collect();
let bv = BitVec::from_words(words, 1024);
assert_eq!(bv.count_ones(), 512);
assert_eq!(bv.rank1(512), 256);
assert_eq!(bv.select1(255), Some(511));
}
#[test]
fn test_single_bit_bitvector() {
let bv = BitVec::from_words(vec![1u64], 1);
assert_eq!(bv.len(), 1);
assert_eq!(bv.count_ones(), 1);
assert!(bv.get(0));
assert_eq!(bv.rank1(0), 0);
assert_eq!(bv.rank1(1), 1);
assert_eq!(bv.select1(0), Some(0));
assert_eq!(bv.select1(1), None);
let bv = BitVec::from_words(vec![0u64], 1);
assert_eq!(bv.count_ones(), 0);
assert_eq!(bv.count_zeros(), 1);
assert!(!bv.get(0));
assert_eq!(bv.select1(0), None);
}
#[test]
fn test_63_bit_bitvector() {
let bv = BitVec::from_words(vec![u64::MAX], 63);
assert_eq!(bv.len(), 63);
assert_eq!(bv.count_ones(), 63);
assert_eq!(bv.count_zeros(), 0);
assert_eq!(bv.rank1(63), 63);
assert_eq!(bv.select1(62), Some(62));
assert_eq!(bv.select1(63), None);
}
#[test]
fn test_65_bit_bitvector() {
let bv = BitVec::from_words(vec![u64::MAX, 1u64], 65);
assert_eq!(bv.len(), 65);
assert_eq!(bv.count_ones(), 65);
assert_eq!(bv.rank1(64), 64);
assert_eq!(bv.rank1(65), 65);
assert_eq!(bv.select1(64), Some(64));
assert_eq!(bv.select1(65), None);
}
#[test]
fn test_various_partial_lengths() {
for len in [
1usize, 7, 8, 15, 16, 31, 32, 33, 63, 64, 65, 100, 127, 128, 129,
] {
let num_words = len.div_ceil(64);
let words: Vec<u64> = vec![u64::MAX; num_words];
let bv = BitVec::from_words(words, len);
assert_eq!(bv.len(), len, "len mismatch for {}", len);
assert_eq!(bv.count_ones(), len, "count_ones mismatch for len={}", len);
assert_eq!(bv.rank1(len), len, "rank1(len) mismatch for len={}", len);
if len > 0 {
assert_eq!(
bv.select1(len - 1),
Some(len - 1),
"select1 last bit for len={}",
len
);
}
assert_eq!(bv.select1(len), None, "select1 beyond for len={}", len);
}
}
#[test]
fn test_first_bit_only() {
for num_words in [1, 2, 8, 16] {
let mut words = vec![0u64; num_words];
words[0] = 1; let len = num_words * 64;
let bv = BitVec::from_words(words, len);
assert_eq!(bv.count_ones(), 1);
assert_eq!(bv.select1(0), Some(0));
assert_eq!(bv.rank1(1), 1);
assert_eq!(bv.rank1(len), 1);
}
}
#[test]
fn test_last_bit_only() {
for num_words in [1, 2, 8, 16] {
let mut words = vec![0u64; num_words];
let len = num_words * 64;
words[num_words - 1] = 1u64 << 63; let bv = BitVec::from_words(words, len);
assert_eq!(bv.count_ones(), 1);
assert_eq!(bv.select1(0), Some(len - 1));
assert_eq!(bv.rank1(len - 1), 0);
assert_eq!(bv.rank1(len), 1);
}
}
#[test]
fn test_last_bit_partial_word() {
for len in [10usize, 33, 63, 100] {
let num_words = len.div_ceil(64);
let mut words = vec![0u64; num_words];
let bit_in_last_word = (len - 1) % 64;
words[num_words - 1] = 1u64 << bit_in_last_word;
let bv = BitVec::from_words(words, len);
assert_eq!(bv.count_ones(), 1, "count_ones for len={}", len);
assert_eq!(bv.select1(0), Some(len - 1), "select1 for len={}", len);
assert_eq!(bv.rank1(len - 1), 0, "rank1(len-1) for len={}", len);
assert_eq!(bv.rank1(len), 1, "rank1(len) for len={}", len);
}
}
#[test]
fn test_first_and_last_bit_only() {
let len = 1024;
let num_words = len / 64;
let mut words = vec![0u64; num_words];
words[0] = 1; words[num_words - 1] = 1u64 << 63;
let bv = BitVec::from_words(words, len);
assert_eq!(bv.count_ones(), 2);
assert_eq!(bv.select1(0), Some(0));
assert_eq!(bv.select1(1), Some(len - 1));
assert_eq!(bv.rank1(1), 1);
assert_eq!(bv.rank1(len - 1), 1);
assert_eq!(bv.rank1(len), 2);
}
#[test]
fn test_block_boundary_512_bits() {
let mut words = vec![0u64; 16]; words[7] = 1u64 << 63; words[8] = 1u64;
let bv = BitVec::from_words(words, 1024);
assert_eq!(bv.count_ones(), 2);
assert_eq!(bv.rank1(511), 0);
assert_eq!(bv.rank1(512), 1);
assert_eq!(bv.rank1(513), 2);
assert_eq!(bv.select1(0), Some(511));
assert_eq!(bv.select1(1), Some(512));
}
#[test]
fn test_select_crossing_blocks() {
let words: Vec<u64> = (0..64).map(|i| 1u64 << (i % 64)).collect();
let bv = BitVec::from_words(words, 64 * 64);
assert_eq!(bv.count_ones(), 64);
assert_eq!(bv.select1(7), Some(7 * 64 + 7)); assert_eq!(bv.select1(8), Some(8 * 64 + 8)); assert_eq!(bv.select1(63), Some(63 * 64 + 63)); }
#[test]
fn test_rank_at_every_word_boundary() {
let words: Vec<u64> = (0..32)
.map(|i| if i % 2 == 0 { u64::MAX } else { 0 })
.collect();
let bv = BitVec::from_words(words, 32 * 64);
for word_idx in 0..32 {
let pos = word_idx * 64;
let expected = (word_idx / 2 + word_idx % 2) * 64;
assert_eq!(
bv.rank1(pos),
expected,
"rank1({}) at word {} boundary",
pos,
word_idx
);
}
}
#[test]
fn test_with_sample_rate_64() {
let words: Vec<u64> = vec![0xAAAA_AAAA_AAAA_AAAA; 16];
let config = Config {
select_sample_rate: 64,
};
let bv = BitVec::with_config(words, 1024, config);
assert_eq!(bv.count_ones(), 512);
assert_eq!(bv.select1(0), Some(1));
assert_eq!(bv.select1(255), Some(511));
assert_eq!(bv.select1(511), Some(1023));
}
#[test]
fn test_with_sample_rate_1() {
let words: Vec<u64> = vec![0xAAAA_AAAA_AAAA_AAAA; 8];
let config = Config {
select_sample_rate: 1,
};
let bv = BitVec::with_config(words, 512, config);
assert_eq!(bv.count_ones(), 256);
for k in 0..256 {
assert!(bv.select1(k).is_some(), "select1({}) should be Some", k);
}
}
#[test]
fn test_with_large_sample_rate() {
let words: Vec<u64> = vec![1u64; 8]; let config = Config {
select_sample_rate: 1024,
};
let bv = BitVec::with_config(words, 512, config);
assert_eq!(bv.count_ones(), 8);
for k in 0..8 {
assert_eq!(bv.select1(k), Some(k * 64), "select1({}) failed", k);
}
}
#[test]
fn test_empty_all_operations() {
let bv = BitVec::new();
assert_eq!(bv.len(), 0);
assert_eq!(bv.count_ones(), 0);
assert_eq!(bv.count_zeros(), 0);
assert!(bv.is_empty());
assert_eq!(bv.rank1(0), 0);
assert_eq!(bv.rank0(0), 0);
assert_eq!(bv.rank1(100), 0);
assert_eq!(bv.select1(0), None);
}
#[test]
fn test_get_unchecked_valid() {
let bv = BitVec::from_words(vec![0b1010_1010u64], 8);
unsafe {
assert!(!bv.get_unchecked(0));
assert!(bv.get_unchecked(1));
assert!(!bv.get_unchecked(2));
assert!(bv.get_unchecked(3));
assert!(!bv.get_unchecked(4));
assert!(bv.get_unchecked(5));
assert!(!bv.get_unchecked(6));
assert!(bv.get_unchecked(7));
}
}
#[test]
fn test_rank_select_consistency_sparse() {
let mut words = vec![0u64; 32]; for i in 0..20 {
let pos = i * 100;
if pos < 2048 {
words[pos / 64] |= 1u64 << (pos % 64);
}
}
let bv = BitVec::from_words(words, 2048);
for k in 0..bv.count_ones() {
if let Some(pos) = bv.select1(k) {
assert_eq!(bv.rank1(pos), k, "rank1(select1({})) should be {}", k, k);
}
}
}
}