use super::RankSelectOps;
use crate::error::{Result, ZiporaError};
use crate::succinct::BitVector;
const LINE_BITS: usize = 256;
const WORDS_PER_LINE: usize = 4;
#[derive(Debug, Clone, Copy)]
#[repr(C)]
struct DimLine {
rlev1: u32,
rlev2: [u8; 4],
bit64: [u64; 4],
}
impl DimLine {
fn new() -> Self {
Self {
rlev1: 0,
rlev2: [0; 4],
bit64: [0; 4],
}
}
#[inline(always)]
fn rank1_within(&self, bit_offset: usize) -> usize {
let word_idx = bit_offset / 64;
let bit_in_word = bit_offset % 64;
let mut rank = self.rlev2[word_idx] as usize;
if bit_in_word > 0 {
let mask = (1u64 << bit_in_word) - 1;
rank += (self.bit64[word_idx] & mask).count_ones() as usize;
}
rank
}
}
#[derive(Debug, Clone, Copy)]
#[repr(C)]
struct MixedLine {
dim: [DimLine; 2],
}
impl MixedLine {
fn new() -> Self {
Self {
dim: [DimLine::new(), DimLine::new()],
}
}
}
pub struct RankSelectMixedIL256 {
lines: Vec<MixedLine>,
size: [usize; 2], max_rank1: [usize; 2],
}
pub struct MixedDimView<'a> {
parent: &'a RankSelectMixedIL256,
dim: usize,
}
impl RankSelectMixedIL256 {
pub fn new(bv0: BitVector, bv1: BitVector) -> Result<Self> {
let size0 = bv0.len();
let size1 = bv1.len();
let max_size = size0.max(size1);
let nlines = max_size.div_ceil(LINE_BITS);
let blocks0 = bv0.blocks();
let blocks1 = bv1.blocks();
let mut lines = Vec::with_capacity(nlines);
let mut cum_rank = [0u32; 2];
for i in 0..nlines {
let mut ml = MixedLine::new();
for d in 0..2 {
ml.dim[d].rlev1 = cum_rank[d];
let blocks = if d == 0 { blocks0 } else { blocks1 };
let dim_size = if d == 0 { size0 } else { size1 };
let mut r = 0u32;
for j in 0..WORDS_PER_LINE {
ml.dim[d].rlev2[j] = r as u8;
let word_idx = i * WORDS_PER_LINE + j;
let word = if word_idx < blocks.len() {
blocks[word_idx]
} else {
0
};
let global_bit = i * LINE_BITS + j * 64;
let effective_word = if global_bit + 64 > dim_size && global_bit < dim_size {
let valid_bits = dim_size - global_bit;
word & ((1u64 << valid_bits) - 1)
} else if global_bit >= dim_size {
0
} else {
word
};
ml.dim[d].bit64[j] = effective_word;
r += effective_word.count_ones();
}
cum_rank[d] += r;
}
lines.push(ml);
}
Ok(Self {
lines,
size: [size0, size1],
max_rank1: [cum_rank[0] as usize, cum_rank[1] as usize],
})
}
#[inline]
pub fn dim0(&self) -> MixedDimView<'_> {
MixedDimView {
parent: self,
dim: 0,
}
}
#[inline]
pub fn dim1(&self) -> MixedDimView<'_> {
MixedDimView {
parent: self,
dim: 1,
}
}
#[inline]
pub fn rank1_dim(&self, dim: usize, pos: usize) -> usize {
assert!(dim < 2);
assert!(pos <= self.size[dim]);
if pos == 0 {
return 0;
}
let line_idx = pos / LINE_BITS;
let bit_in_line = pos % LINE_BITS;
let dl = &self.lines[line_idx].dim[dim];
dl.rlev1 as usize + dl.rank1_within(bit_in_line)
}
#[inline]
pub fn rank0_dim(&self, dim: usize, pos: usize) -> usize {
pos - self.rank1_dim(dim, pos)
}
#[inline]
pub fn select1_dim(&self, dim: usize, k: usize) -> Result<usize> {
assert!(dim < 2);
if k >= self.max_rank1[dim] {
return Err(ZiporaError::invalid_data("select1 out of range"));
}
let nlines = self.lines.len();
let mut lo = 0usize;
let mut hi = nlines;
while lo < hi {
let mid = (lo + hi) / 2;
if (self.lines[mid].dim[dim].rlev1 as usize) <= k {
lo = mid + 1;
} else {
hi = mid;
}
}
let block = lo - 1;
let dl = &self.lines[block].dim[dim];
let base = dl.rlev1 as usize;
let remaining = k - base;
let base_bitpos = block * LINE_BITS;
for j in (0..WORDS_PER_LINE).rev() {
if remaining >= dl.rlev2[j] as usize {
let in_word = remaining - dl.rlev2[j] as usize;
let word = dl.bit64[j];
return Ok(base_bitpos + j * 64 + select_in_word(word, in_word));
}
}
Err(ZiporaError::invalid_data("select1 internal error"))
}
#[inline]
pub fn get_dim(&self, dim: usize, index: usize) -> Option<bool> {
if index >= self.size[dim] {
return None;
}
let line_idx = index / LINE_BITS;
let word_idx = (index % LINE_BITS) / 64;
let bit_idx = index % 64;
Some((self.lines[line_idx].dim[dim].bit64[word_idx] >> bit_idx) & 1 == 1)
}
#[inline]
pub fn size_dim(&self, dim: usize) -> usize {
self.size[dim]
}
pub fn max_rank1_dim(&self, dim: usize) -> usize {
self.max_rank1[dim]
}
#[inline]
pub fn mem_size(&self) -> usize {
self.lines.len() * std::mem::size_of::<MixedLine>()
}
}
impl RankSelectOps for MixedDimView<'_> {
#[inline]
fn rank1(&self, pos: usize) -> usize {
self.parent.rank1_dim(self.dim, pos)
}
#[inline]
fn rank0(&self, pos: usize) -> usize {
self.parent.rank0_dim(self.dim, pos)
}
fn select1(&self, k: usize) -> Result<usize> {
self.parent.select1_dim(self.dim, k)
}
#[inline]
fn select0(&self, _k: usize) -> Result<usize> {
Err(ZiporaError::invalid_data(
"select0 not yet implemented for mixed",
))
}
fn len(&self) -> usize {
self.parent.size[self.dim]
}
fn count_ones(&self) -> usize {
self.parent.max_rank1[self.dim]
}
fn get(&self, index: usize) -> Option<bool> {
self.parent.get_dim(self.dim, index)
}
fn space_overhead_percent(&self) -> f64 {
if self.parent.size[self.dim] == 0 {
return 0.0;
}
let bit_bytes = self.parent.size[self.dim].div_ceil(8);
let cache_bytes = self.parent.lines.len() * 8; (cache_bytes as f64 / bit_bytes as f64) * 100.0
}
}
#[inline]
fn select_in_word(word: u64, k: usize) -> usize {
crate::algorithms::bit_ops::select_in_word(word, k)
}
impl std::fmt::Debug for RankSelectMixedIL256 {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RankSelectMixedIL256")
.field("size", &self.size)
.field("max_rank1", &self.max_rank1)
.field("lines", &self.lines.len())
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_bv(pattern: &[bool]) -> BitVector {
let mut bv = BitVector::new();
for &b in pattern {
bv.push(b).unwrap();
}
bv
}
#[test]
fn test_basic_two_dims() {
let bv0 = make_bv(&[true, false, true, false, true]); let bv1 = make_bv(&[false, true, false, true, false]); let rs = RankSelectMixedIL256::new(bv0, bv1).unwrap();
assert_eq!(rs.dim0().len(), 5);
assert_eq!(rs.dim0().count_ones(), 3);
assert_eq!(rs.dim0().rank1(1), 1);
assert_eq!(rs.dim0().rank1(5), 3);
assert_eq!(rs.dim0().select1(0).unwrap(), 0);
assert_eq!(rs.dim0().select1(2).unwrap(), 4);
assert_eq!(rs.dim1().len(), 5);
assert_eq!(rs.dim1().count_ones(), 2);
assert_eq!(rs.dim1().rank1(2), 1);
assert_eq!(rs.dim1().select1(0).unwrap(), 1);
assert_eq!(rs.dim1().select1(1).unwrap(), 3);
}
#[test]
fn test_different_sizes() {
let bv0 = make_bv(&[true; 100]);
let bv1 = make_bv(&[false; 50]);
let rs = RankSelectMixedIL256::new(bv0, bv1).unwrap();
assert_eq!(rs.dim0().len(), 100);
assert_eq!(rs.dim1().len(), 50);
assert_eq!(rs.dim0().count_ones(), 100);
assert_eq!(rs.dim1().count_ones(), 0);
}
#[test]
fn test_invariant() {
let p0: Vec<bool> = (0..1000).map(|i| i % 5 == 0).collect();
let p1: Vec<bool> = (0..1000).map(|i| i % 7 == 0).collect();
let rs = RankSelectMixedIL256::new(make_bv(&p0), make_bv(&p1)).unwrap();
for i in 0..=1000 {
assert_eq!(
rs.dim0().rank0(i) + rs.dim0().rank1(i),
i,
"dim0 invariant at {}",
i
);
assert_eq!(
rs.dim1().rank0(i) + rs.dim1().rank1(i),
i,
"dim1 invariant at {}",
i
);
}
}
#[test]
fn test_roundtrip() {
let p0: Vec<bool> = (0..500).map(|i| i % 3 == 0).collect();
let p1: Vec<bool> = (0..500).map(|i| i % 11 == 0).collect();
let rs = RankSelectMixedIL256::new(make_bv(&p0), make_bv(&p1)).unwrap();
for k in 0..rs.dim0().count_ones() {
let pos = rs.dim0().select1(k).unwrap();
assert_eq!(rs.dim0().get(pos), Some(true));
}
for k in 0..rs.dim1().count_ones() {
let pos = rs.dim1().select1(k).unwrap();
assert_eq!(rs.dim1().get(pos), Some(true));
}
}
#[test]
fn test_get() {
let bv0 = make_bv(&[true, false, true]);
let bv1 = make_bv(&[false, true, false]);
let rs = RankSelectMixedIL256::new(bv0, bv1).unwrap();
assert_eq!(rs.get_dim(0, 0), Some(true));
assert_eq!(rs.get_dim(0, 1), Some(false));
assert_eq!(rs.get_dim(1, 0), Some(false));
assert_eq!(rs.get_dim(1, 1), Some(true));
assert_eq!(rs.get_dim(0, 3), None);
}
#[test]
fn test_large() {
let p0: Vec<bool> = (0..5000).map(|i| i % 13 == 0).collect();
let p1: Vec<bool> = (0..5000).map(|i| i % 17 == 0).collect();
let rs = RankSelectMixedIL256::new(make_bv(&p0), make_bv(&p1)).unwrap();
let e0 = (0..5000).filter(|i| i % 13 == 0).count();
let e1 = (0..5000).filter(|i| i % 17 == 0).count();
assert_eq!(rs.dim0().count_ones(), e0);
assert_eq!(rs.dim1().count_ones(), e1);
assert_eq!(rs.dim0().select1(0).unwrap(), 0);
assert_eq!(rs.dim0().select1(1).unwrap(), 13);
assert_eq!(rs.dim1().select1(0).unwrap(), 0);
assert_eq!(rs.dim1().select1(1).unwrap(), 17);
}
#[test]
fn test_empty() {
let rs = RankSelectMixedIL256::new(make_bv(&[]), make_bv(&[])).unwrap();
assert_eq!(rs.dim0().len(), 0);
assert_eq!(rs.dim1().len(), 0);
assert_eq!(rs.dim0().rank1(0), 0);
assert_eq!(rs.dim1().rank1(0), 0);
}
#[test]
fn test_all_zeros_all_ones() {
let rs = RankSelectMixedIL256::new(make_bv(&vec![false; 100]), make_bv(&vec![true; 100]))
.unwrap();
assert_eq!(rs.dim0().count_ones(), 0);
assert_eq!(rs.dim1().count_ones(), 100);
assert!(rs.dim0().select1(0).is_err());
assert_eq!(rs.dim1().select1(50).unwrap(), 50);
}
#[test]
fn test_crossing_block_boundary() {
let mut p0 = vec![false; 300];
p0[0] = true;
p0[255] = true;
p0[256] = true;
p0[299] = true;
let rs = RankSelectMixedIL256::new(make_bv(&p0), make_bv(&vec![true; 300])).unwrap();
assert_eq!(rs.dim0().count_ones(), 4);
assert_eq!(rs.dim0().select1(0).unwrap(), 0);
assert_eq!(rs.dim0().select1(1).unwrap(), 255);
assert_eq!(rs.dim0().select1(2).unwrap(), 256);
assert_eq!(rs.dim0().select1(3).unwrap(), 299);
}
}