use crate::error::{Result, ZiporaError};
use super::RankSelectOps;
pub struct RankSelectFewOne {
positions: Vec<u32>,
size: usize,
}
impl RankSelectFewOne {
pub fn new(positions: Vec<u32>, size: usize) -> Result<Self> {
for (i, &pos) in positions.iter().enumerate() {
if pos as usize >= size {
return Err(ZiporaError::invalid_data("position out of bounds"));
}
if i > 0 && pos <= positions[i - 1] {
return Err(ZiporaError::invalid_data("positions must be strictly sorted"));
}
}
Ok(Self { positions, size })
}
pub fn from_bitvector(bv: &crate::succinct::BitVector) -> Result<Self> {
let mut positions = Vec::new();
for i in 0..bv.len() {
if bv.get(i) == Some(true) {
positions.push(i as u32);
}
}
Self::new(positions, bv.len())
}
pub fn num_ones(&self) -> usize { self.positions.len() }
pub fn num_zeros(&self) -> usize { self.size - self.positions.len() }
#[inline]
pub fn mem_size(&self) -> usize {
std::mem::size_of::<Self>() + self.positions.len() * 4
}
#[inline]
fn lower_bound(&self, val: usize) -> usize {
self.positions.partition_point(|&p| (p as usize) < val)
}
}
impl RankSelectOps for RankSelectFewOne {
#[inline]
fn rank1(&self, pos: usize) -> usize {
assert!(pos <= self.size);
self.lower_bound(pos)
}
#[inline]
fn rank0(&self, pos: usize) -> usize {
pos - self.rank1(pos)
}
#[inline]
fn select1(&self, k: usize) -> Result<usize> {
if k >= self.positions.len() {
return Err(ZiporaError::invalid_data("select1 out of range"));
}
Ok(self.positions[k] as usize)
}
#[inline]
fn select0(&self, k: usize) -> Result<usize> {
let num_zeros = self.num_zeros();
if k >= num_zeros {
return Err(ZiporaError::invalid_data("select0 out of range"));
}
let mut lo = 0usize;
let mut hi = self.size;
while lo < hi {
let mid = (lo + hi) / 2;
let zeros_before_mid = mid - self.lower_bound(mid);
if zeros_before_mid <= k {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo > 0 { Ok(lo - 1) } else { Err(ZiporaError::invalid_data("select0 error")) }
}
fn len(&self) -> usize { self.size }
fn count_ones(&self) -> usize { self.positions.len() }
fn get(&self, index: usize) -> Option<bool> {
if index >= self.size { return None; }
Some(self.positions.binary_search(&(index as u32)).is_ok())
}
fn space_overhead_percent(&self) -> f64 {
if self.size == 0 { return 0.0; }
let minimal_bytes = (self.size + 7) / 8;
let actual_bytes = self.mem_size();
if actual_bytes < minimal_bytes {
-((minimal_bytes - actual_bytes) as f64 / minimal_bytes as f64) * 100.0
} else {
((actual_bytes - minimal_bytes) as f64 / minimal_bytes as f64) * 100.0
}
}
}
pub struct RankSelectFewZero {
positions: Vec<u32>,
size: usize,
}
impl RankSelectFewZero {
pub fn new(positions: Vec<u32>, size: usize) -> Result<Self> {
for (i, &pos) in positions.iter().enumerate() {
if pos as usize >= size {
return Err(ZiporaError::invalid_data("position out of bounds"));
}
if i > 0 && pos <= positions[i - 1] {
return Err(ZiporaError::invalid_data("positions must be strictly sorted"));
}
}
Ok(Self { positions, size })
}
pub fn from_bitvector(bv: &crate::succinct::BitVector) -> Result<Self> {
let mut positions = Vec::new();
for i in 0..bv.len() {
if bv.get(i) == Some(false) {
positions.push(i as u32);
}
}
Self::new(positions, bv.len())
}
pub fn num_zeros(&self) -> usize { self.positions.len() }
pub fn num_ones(&self) -> usize { self.size - self.positions.len() }
#[inline]
pub fn mem_size(&self) -> usize {
std::mem::size_of::<Self>() + self.positions.len() * 4
}
#[inline]
fn lower_bound(&self, val: usize) -> usize {
self.positions.partition_point(|&p| (p as usize) < val)
}
}
impl RankSelectOps for RankSelectFewZero {
#[inline]
fn rank1(&self, pos: usize) -> usize {
pos - self.rank0(pos)
}
#[inline]
fn rank0(&self, pos: usize) -> usize {
assert!(pos <= self.size);
self.lower_bound(pos)
}
#[inline]
fn select1(&self, k: usize) -> Result<usize> {
let num_ones = self.num_ones();
if k >= num_ones {
return Err(ZiporaError::invalid_data("select1 out of range"));
}
let mut lo = 0usize;
let mut hi = self.size;
while lo < hi {
let mid = (lo + hi) / 2;
let ones_before_mid = mid - self.lower_bound(mid);
if ones_before_mid <= k { lo = mid + 1; } else { hi = mid; }
}
if lo > 0 { Ok(lo - 1) } else { Err(ZiporaError::invalid_data("select1 error")) }
}
#[inline]
fn select0(&self, k: usize) -> Result<usize> {
if k >= self.positions.len() {
return Err(ZiporaError::invalid_data("select0 out of range"));
}
Ok(self.positions[k] as usize)
}
fn len(&self) -> usize { self.size }
fn count_ones(&self) -> usize { self.num_ones() }
fn get(&self, index: usize) -> Option<bool> {
if index >= self.size { return None; }
Some(self.positions.binary_search(&(index as u32)).is_err())
}
fn space_overhead_percent(&self) -> f64 {
if self.size == 0 { return 0.0; }
let minimal_bytes = (self.size + 7) / 8;
let actual_bytes = self.mem_size();
if actual_bytes < minimal_bytes {
-((minimal_bytes - actual_bytes) as f64 / minimal_bytes as f64) * 100.0
} else {
((actual_bytes - minimal_bytes) as f64 / minimal_bytes as f64) * 100.0
}
}
}
impl std::fmt::Debug for RankSelectFewOne {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RankSelectFewOne")
.field("size", &self.size)
.field("ones", &self.positions.len())
.finish()
}
}
impl std::fmt::Debug for RankSelectFewZero {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RankSelectFewZero")
.field("size", &self.size)
.field("zeros", &self.positions.len())
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_few_one_basic() {
let rs = RankSelectFewOne::new(vec![3, 10], 12).unwrap();
assert_eq!(rs.len(), 12);
assert_eq!(rs.count_ones(), 2);
assert_eq!(rs.rank1(0), 0);
assert_eq!(rs.rank1(3), 0);
assert_eq!(rs.rank1(4), 1);
assert_eq!(rs.rank1(10), 1);
assert_eq!(rs.rank1(11), 2);
assert_eq!(rs.rank1(12), 2);
assert_eq!(rs.select1(0).unwrap(), 3);
assert_eq!(rs.select1(1).unwrap(), 10);
assert!(rs.select1(2).is_err());
}
#[test]
fn test_few_one_get() {
let rs = RankSelectFewOne::new(vec![0, 5, 99], 100).unwrap();
assert_eq!(rs.get(0), Some(true));
assert_eq!(rs.get(1), Some(false));
assert_eq!(rs.get(5), Some(true));
assert_eq!(rs.get(99), Some(true));
assert_eq!(rs.get(100), None);
}
#[test]
fn test_few_one_invariant() {
let rs = RankSelectFewOne::new(vec![10, 20, 30, 40, 50], 100).unwrap();
for i in 0..=100 {
assert_eq!(rs.rank0(i) + rs.rank1(i), i, "invariant at {}", i);
}
}
#[test]
fn test_few_one_select0() {
let rs = RankSelectFewOne::new(vec![2, 7], 10).unwrap();
assert_eq!(rs.select0(0).unwrap(), 0);
assert_eq!(rs.select0(1).unwrap(), 1);
assert_eq!(rs.select0(2).unwrap(), 3);
assert_eq!(rs.select0(7).unwrap(), 9);
}
#[test]
fn test_few_one_empty_ones() {
let rs = RankSelectFewOne::new(vec![], 50).unwrap();
assert_eq!(rs.count_ones(), 0);
assert!(rs.select1(0).is_err());
assert_eq!(rs.select0(0).unwrap(), 0);
assert_eq!(rs.rank1(25), 0);
assert_eq!(rs.rank0(25), 25);
}
#[test]
fn test_few_zero_basic() {
let rs = RankSelectFewZero::new(vec![1, 5], 10).unwrap();
assert_eq!(rs.len(), 10);
assert_eq!(rs.count_ones(), 8);
assert_eq!(rs.rank0(0), 0);
assert_eq!(rs.rank0(2), 1);
assert_eq!(rs.rank0(6), 2);
assert_eq!(rs.rank0(10), 2);
assert_eq!(rs.select0(0).unwrap(), 1);
assert_eq!(rs.select0(1).unwrap(), 5);
assert!(rs.select0(2).is_err());
}
#[test]
fn test_few_zero_get() {
let rs = RankSelectFewZero::new(vec![3, 7], 10).unwrap();
assert_eq!(rs.get(0), Some(true));
assert_eq!(rs.get(3), Some(false));
assert_eq!(rs.get(7), Some(false));
assert_eq!(rs.get(9), Some(true));
}
#[test]
fn test_few_zero_select1() {
let rs = RankSelectFewZero::new(vec![2, 5], 8).unwrap();
assert_eq!(rs.select1(0).unwrap(), 0);
assert_eq!(rs.select1(1).unwrap(), 1);
assert_eq!(rs.select1(2).unwrap(), 3);
assert_eq!(rs.select1(3).unwrap(), 4);
assert_eq!(rs.select1(4).unwrap(), 6);
assert_eq!(rs.select1(5).unwrap(), 7);
}
#[test]
fn test_few_space_savings() {
let positions: Vec<u32> = (0..10).map(|i| i * 10000).collect();
let rs = RankSelectFewOne::new(positions, 100_000).unwrap();
assert!(rs.mem_size() < 200);
assert!(rs.space_overhead_percent() < 0.0);
}
#[test]
fn test_few_from_bitvector() {
let mut bv = crate::succinct::BitVector::new();
for i in 0..1000 {
bv.push(i % 100 == 0).unwrap();
}
let rs = RankSelectFewOne::from_bitvector(&bv).unwrap();
assert_eq!(rs.count_ones(), 10);
assert_eq!(rs.select1(0).unwrap(), 0);
assert_eq!(rs.select1(1).unwrap(), 100);
assert_eq!(rs.select1(9).unwrap(), 900);
}
#[test]
fn test_few_zero_invariant() {
let rs = RankSelectFewZero::new(vec![5, 15, 25], 50).unwrap();
for i in 0..=50 {
assert_eq!(rs.rank0(i) + rs.rank1(i), i, "invariant at {}", i);
}
}
#[test]
fn test_few_zero_empty() {
let rs = RankSelectFewZero::new(vec![], 100).unwrap();
assert_eq!(rs.count_ones(), 100);
assert_eq!(rs.num_zeros(), 0);
assert!(rs.select0(0).is_err());
assert_eq!(rs.select1(50).unwrap(), 50);
assert_eq!(rs.rank1(100), 100);
}
#[test]
fn test_few_one_all_set() {
let positions: Vec<u32> = (0..100).collect();
let rs = RankSelectFewOne::new(positions, 100).unwrap();
assert_eq!(rs.count_ones(), 100);
assert_eq!(rs.rank1(50), 50);
assert_eq!(rs.select1(99).unwrap(), 99);
assert!(rs.select0(0).is_err());
}
#[test]
fn test_few_roundtrip_select_rank() {
let rs = RankSelectFewOne::new(vec![10, 20, 30, 40, 50], 100).unwrap();
for k in 0..rs.count_ones() {
let pos = rs.select1(k).unwrap();
assert_eq!(rs.get(pos), Some(true));
assert_eq!(rs.rank1(pos + 1), k + 1);
}
}
}