#[cfg(not(test))]
use alloc::vec::Vec;
const L1_WORDS: usize = 128;
const L2_WORDS: usize = 8;
#[derive(Clone, Debug)]
pub struct CompactRank {
l1: Vec<u32>,
l2: Vec<u16>,
total: u32,
}
impl CompactRank {
pub fn empty() -> Self {
Self {
l1: Vec::new(),
l2: Vec::new(),
total: 0,
}
}
pub fn build(words: &[u64]) -> Self {
if words.is_empty() {
return Self::empty();
}
let num_superblocks = words.len().div_ceil(L1_WORDS);
let num_blocks = words.len().div_ceil(L2_WORDS);
let mut l1 = Vec::with_capacity(num_superblocks);
let mut l2 = Vec::with_capacity(num_blocks);
let mut absolute_rank: u32 = 0;
for sb in 0..num_superblocks {
l1.push(absolute_rank);
let sb_start = sb * L1_WORDS;
let sb_end = (sb_start + L1_WORDS).min(words.len());
let mut relative_rank: u16 = 0;
let blocks_in_sb = (sb_end - sb_start).div_ceil(L2_WORDS);
for b in 0..blocks_in_sb {
l2.push(relative_rank);
let block_start = sb_start + b * L2_WORDS;
let block_end = (block_start + L2_WORDS).min(sb_end);
for &word in &words[block_start..block_end] {
let ones = word.count_ones() as u16;
relative_rank += ones;
absolute_rank += ones as u32;
}
}
}
Self {
l1,
l2,
total: absolute_rank,
}
}
#[inline]
pub fn rank_at_word(&self, words: &[u64], word_idx: usize) -> usize {
if self.l1.is_empty() {
return 0;
}
if word_idx >= words.len() {
return self.total as usize;
}
let sb_idx = word_idx / L1_WORDS;
let block_idx = word_idx / L2_WORDS;
let mut count = self.l1[sb_idx] as usize + self.l2[block_idx] as usize;
let block_start = block_idx * L2_WORDS;
for &word in &words[block_start..word_idx] {
count += word.count_ones() as usize;
}
count
}
pub fn heap_size(&self) -> usize {
self.l1.len() * 4 + self.l2.len() * 2
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty() {
let words: Vec<u64> = vec![];
let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
}
#[test]
fn test_single_word() {
let words = vec![0b1010_1010u64]; let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
assert_eq!(cr.rank_at_word(&words, 1), 4);
}
#[test]
fn test_multiple_words_single_block() {
let words: Vec<u64> = vec![0xFF; 8]; let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
assert_eq!(cr.rank_at_word(&words, 1), 8);
assert_eq!(cr.rank_at_word(&words, 2), 16);
assert_eq!(cr.rank_at_word(&words, 7), 56);
assert_eq!(cr.rank_at_word(&words, 8), 64);
}
#[test]
fn test_multiple_blocks() {
let words: Vec<u64> = vec![u64::MAX; 16]; let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
assert_eq!(cr.rank_at_word(&words, 1), 64);
assert_eq!(cr.rank_at_word(&words, 8), 64 * 8);
assert_eq!(cr.rank_at_word(&words, 15), 64 * 15);
assert_eq!(cr.rank_at_word(&words, 16), 64 * 16);
}
#[test]
fn test_cross_superblock_boundary() {
let words: Vec<u64> = vec![1u64; 256]; let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
assert_eq!(cr.rank_at_word(&words, 128), 128);
assert_eq!(cr.rank_at_word(&words, 256), 256);
}
#[test]
fn test_sparse_words() {
let words: Vec<u64> = vec![1; 16]; let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
assert_eq!(cr.rank_at_word(&words, 1), 1);
assert_eq!(cr.rank_at_word(&words, 8), 8);
assert_eq!(cr.rank_at_word(&words, 15), 15);
assert_eq!(cr.rank_at_word(&words, 16), 16);
}
#[test]
fn test_partial_block() {
let words: Vec<u64> = vec![0xFF; 5]; let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
assert_eq!(cr.rank_at_word(&words, 1), 8);
assert_eq!(cr.rank_at_word(&words, 4), 32);
assert_eq!(cr.rank_at_word(&words, 5), 40);
}
#[test]
fn test_matches_naive_cumulative() {
let words: Vec<u64> = (0..300).map(|i| ((i * 7 + 3) % 256) as u64).collect();
let cr = CompactRank::build(&words);
let mut naive = vec![0u32];
let mut cum = 0u32;
for &w in &words {
cum += w.count_ones();
naive.push(cum);
}
for (i, &expected) in naive.iter().enumerate().take(words.len() + 1) {
assert_eq!(
cr.rank_at_word(&words, i),
expected as usize,
"mismatch at word {}",
i
);
}
}
#[test]
fn test_large_superblock_boundary() {
let words: Vec<u64> = vec![0xFF; 128]; let cr = CompactRank::build(&words);
assert_eq!(cr.rank_at_word(&words, 0), 0);
assert_eq!(cr.rank_at_word(&words, 64), 64 * 8);
assert_eq!(cr.rank_at_word(&words, 128), 128 * 8);
}
#[test]
fn test_overhead() {
let words: Vec<u64> = vec![0; 1024]; let cr = CompactRank::build(&words);
let bitmap_bytes = words.len() * 8;
let index_bytes = cr.heap_size();
let overhead_pct = (index_bytes as f64 / bitmap_bytes as f64) * 100.0;
assert!(
overhead_pct < 5.0,
"Overhead {:.1}% exceeds 5% target (bitmap={}, index={})",
overhead_pct,
bitmap_bytes,
index_bytes
);
}
}