type SubblockBits = u128;
const BITS_PER_SUB_BLOCK: usize = SubblockBits::BITS as usize;
const SUB_BLOCKS_PER_BLOCK: usize = 64;
const BITS_PER_BLOCK: usize = SUB_BLOCKS_PER_BLOCK * BITS_PER_SUB_BLOCK;
#[derive(Clone, Debug)]
struct Block {
rank: u64,
sub_blocks: [u16; SUB_BLOCKS_PER_BLOCK],
bits: [SubblockBits; SUB_BLOCKS_PER_BLOCK],
}
impl Block {
fn set(&mut self, index: usize) {
debug_assert!(index < BITS_PER_BLOCK);
let chunk_idx = index / BITS_PER_SUB_BLOCK;
let bit_idx = index % BITS_PER_SUB_BLOCK;
let mask = 1 << bit_idx;
debug_assert_eq!(self.bits[chunk_idx] & mask, 0, "toggling bits off indicates that the original data was incorrect, most likely containing duplicate values.");
self.bits[chunk_idx] |= mask;
}
fn rank(&self, local_idx: usize) -> usize {
let mut rank = self.rank as usize;
let sub_block = local_idx / BITS_PER_SUB_BLOCK;
rank += self.sub_blocks[sub_block] as usize;
let remainder = local_idx % BITS_PER_SUB_BLOCK;
let last_chunk = local_idx / BITS_PER_SUB_BLOCK;
let masked = self.bits[last_chunk] & !(SubblockBits::MAX << remainder);
rank + masked.count_ones() as usize
}
fn total_rank(&self) -> usize {
self.sub_blocks[SUB_BLOCKS_PER_BLOCK - 1] as usize
+ self.rank as usize
+ self.bits[SUB_BLOCKS_PER_BLOCK - 1..]
.iter()
.map(|c| c.count_ones() as usize)
.sum::<usize>()
}
}
#[derive(Default)]
pub struct BitRankBuilder {
blocks: Vec<Block>,
}
impl BitRankBuilder {
pub fn with_capacity(cap: usize) -> Self {
const ZERO_BLOCK: Block = Block {
rank: 0,
sub_blocks: [0; SUB_BLOCKS_PER_BLOCK],
bits: [0; SUB_BLOCKS_PER_BLOCK],
};
Self {
blocks: vec![ZERO_BLOCK; cap.div_ceil(BITS_PER_BLOCK)],
}
}
pub fn push(&mut self, position: usize) {
let block_id = position / BITS_PER_BLOCK;
self.blocks[block_id].set(position % BITS_PER_BLOCK);
}
pub fn finish(mut self) -> BitRank {
let mut total_rank = 0;
for block in &mut self.blocks {
block.rank = total_rank;
let mut local_rank = 0;
for (i, chunk) in block.bits.iter().enumerate() {
block.sub_blocks[i] = local_rank;
local_rank += chunk.count_ones() as u16;
}
total_rank += local_rank as u64
}
BitRank {
blocks: self.blocks,
}
}
}
#[derive(Clone)]
pub struct BitRank {
blocks: Vec<Block>,
}
impl BitRank {
pub fn rank(&self, idx: usize) -> usize {
let block_num = idx / BITS_PER_BLOCK;
if block_num >= self.blocks.len() {
self.max_rank() } else {
self.blocks[block_num].rank(idx % BITS_PER_BLOCK)
}
}
pub fn max_rank(&self) -> usize {
self.blocks
.last()
.map(|b| b.total_rank())
.unwrap_or_default() }
}
#[cfg(test)]
mod tests {
use super::*;
use rand::distr::Uniform;
use rand::prelude::*;
use rand_chacha::rand_core::SeedableRng;
use rand_chacha::ChaCha8Rng;
pub fn bitrank<I>(iter: I) -> BitRank
where
I: IntoIterator<Item = usize>,
I::IntoIter: DoubleEndedIterator,
{
let mut iter = iter.into_iter().rev();
if let Some(last) = iter.next() {
let mut builder = BitRankBuilder::with_capacity(last + 1);
builder.push(last);
for position in iter {
builder.push(position);
}
builder.finish()
} else {
BitRank { blocks: vec![] }
}
}
#[test]
fn test_rank_zero() {
let br = bitrank([0]);
assert_eq!(br.rank(0), 0);
assert_eq!(br.rank(1), 1);
}
#[test]
fn test_empty() {
let br = bitrank([]);
assert!(br.blocks.is_empty());
}
#[test]
fn test_index_out_of_bounds() {
let br = bitrank([BITS_PER_BLOCK - 1]);
assert_eq!(br.rank(BITS_PER_BLOCK), 1);
}
#[test]
#[should_panic]
fn test_duplicate_position() {
bitrank([64, 66, 68, 68, 90]);
}
#[test]
fn test_rank_exclusive() {
let br = bitrank(0..132);
assert_eq!(br.blocks.len(), 1);
assert_eq!(br.rank(64), 64);
assert_eq!(br.rank(132), 132);
}
#[test]
fn test_rank() {
let mut positions: Vec<usize> = (0..132).collect();
positions.append(&mut vec![138usize, 140, 146]);
let br = bitrank(positions);
assert_eq!(br.rank(135), 132);
let br2 = bitrank(0..BITS_PER_BLOCK - 5);
assert_eq!(br2.rank(169), 169);
let br3 = bitrank(0..BITS_PER_BLOCK + 5);
assert_eq!(br3.rank(BITS_PER_BLOCK), BITS_PER_BLOCK);
}
#[test]
fn test_rank_idx() {
let mut positions: Vec<usize> = (0..132).collect();
positions.append(&mut vec![138usize, 140, 146]);
let br = bitrank(positions);
assert_eq!(br.rank(135), 132);
let bits2: Vec<usize> = (0..BITS_PER_BLOCK - 5).collect();
let br2 = bitrank(bits2);
assert_eq!(br2.rank(169), 169);
let bits3: Vec<usize> = (0..BITS_PER_BLOCK + 5).collect();
let br3 = bitrank(bits3);
assert_eq!(br3.rank(BITS_PER_BLOCK), BITS_PER_BLOCK);
let bits4: Vec<usize> = vec![1, 1000, 7777, BITS_PER_BLOCK + 1];
let br4 = bitrank(bits4);
assert_eq!(br4.rank(8000), 3);
let bits5: Vec<usize> = vec![1, 1000, 7777, BITS_PER_BLOCK + 1];
let br5 = bitrank(bits5);
assert_eq!(br5.rank(BITS_PER_BLOCK), 3);
}
#[test]
fn test_rank_large_random() {
let mut rng = ChaCha8Rng::seed_from_u64(2);
let uniform = Uniform::new(0, 1_000_000).unwrap();
let mut random_bits = Vec::with_capacity(100_000);
for _ in 0..100_000 {
random_bits.push(uniform.sample(&mut rng));
}
random_bits.sort_unstable();
random_bits.dedup();
let br = bitrank(random_bits.iter().copied());
let mut rank = 0;
for i in 0..random_bits.capacity() {
assert_eq!(br.rank(i), rank);
if i == random_bits[rank] {
rank += 1;
}
}
}
#[test]
fn test_rank_out_of_bounds() {
for i in 1..30 {
let br = bitrank([BITS_PER_BLOCK * i - 1]);
assert_eq!(br.max_rank(), 1);
assert_eq!(br.rank(BITS_PER_BLOCK * i - 1), 0);
for j in 0..10 {
assert_eq!(br.rank(BITS_PER_BLOCK * (i + j)), 1);
}
}
}
#[test]
fn test_large_gap() {
let br = bitrank((3..4).chain(BITS_PER_BLOCK * 15..BITS_PER_BLOCK * 15 + 17));
for i in 1..15 {
assert_eq!(br.rank(BITS_PER_BLOCK * i), 1);
}
for i in 0..18 {
assert_eq!(br.rank(BITS_PER_BLOCK * 15 + i), 1 + i);
}
}
#[test]
fn test_with_capacity() {
let mut b = BitRankBuilder::with_capacity(BITS_PER_BLOCK * 3 - 1);
let initial_capacity = b.blocks.capacity();
assert!(initial_capacity >= 3);
b.push(BITS_PER_BLOCK * 3 - 2); assert_eq!(b.blocks.capacity(), initial_capacity);
let mut b = BitRankBuilder::with_capacity(BITS_PER_BLOCK * 3 + 1);
let initial_capacity = b.blocks.capacity();
assert!(initial_capacity >= 4);
b.push(BITS_PER_BLOCK * 3); assert_eq!(b.blocks.capacity(), initial_capacity);
}
}