use crate::error::{Result, ZiporaError};
use crate::succinct::BitVector;
use std::fmt;
pub use interleaved::{RankSelectInterleaved256 as RankSelect256};
pub mod builder;
pub mod config;
pub mod interleaved; pub mod separated; pub mod separated_512; pub mod simple; pub mod trivial; pub mod few; pub mod mixed_il_256; pub mod mixed_se_512; pub mod mixed_xl_256; pub mod simd;
pub mod adaptive;
pub mod bmi2_acceleration;
pub mod bmi2_comprehensive;
pub mod multidim_simd;
pub use interleaved::RankSelectInterleaved256;
pub use separated::RankSelectSE256;
pub use separated_512::{RankSelectSE512, RankSelectSE512_32};
pub use simple::RankSelectSimple;
pub use trivial::{RankSelectAllZero, RankSelectAllOne};
pub use few::{RankSelectFewOne, RankSelectFewZero};
pub use mixed_il_256::{RankSelectMixedIL256, MixedDimView};
pub use mixed_se_512::{RankSelectMixedSE512, MixedSE512DimView};
pub use mixed_xl_256::{RankSelectMixedXL256, MixedXL256DimView};
pub use separated_512::RankSelectSE512_64;
pub use config::{
SeparatedStorageConfig, SeparatedStorageConfigBuilder, StorageLayout, MemoryStrategy,
CacheAlignment, MultiDimensionalConfig, HardwareOptimizations, PerformanceTuning,
AccessFrequency, DataDensity, SelectCacheDensity, FeatureDetection, CacheLevel,
ConfigSummary,
};
pub use simd::{SimdCapabilities, SimdOps, bulk_popcount_simd, bulk_rank1_simd, bulk_select1_simd};
pub use adaptive::{
AdaptiveRankSelect, AdaptiveMultiDimensional, DataProfile, SelectionCriteria,
AccessPattern, SizeCategory, OptimizationStats, PerformanceTier,
};
pub use bmi2_acceleration::{
Bmi2Accelerator, Bmi2BitOps, Bmi2BlockOps, Bmi2Capabilities, Bmi2PrefetchOps, Bmi2RangeOps,
Bmi2RankOps, Bmi2SelectOps, Bmi2SequenceOps, Bmi2Stats,
};
pub use bmi2_comprehensive::{
Bmi2BitOps as Bmi2BitOpsComprehensive, Bmi2BlockOps as Bmi2BlockOpsComprehensive,
Bmi2Capabilities as Bmi2CapabilitiesComprehensive, Bmi2SequenceOps as Bmi2SequenceOpsComprehensive,
Bmi2Stats as Bmi2StatsComprehensive, OptimizationStrategy, SequenceAnalysis as Bmi2SequenceAnalysis,
};
pub use multidim_simd::MultiDimRankSelect;
pub trait RankSelectOps {
fn rank1(&self, pos: usize) -> usize;
fn rank0(&self, pos: usize) -> usize;
fn select1(&self, k: usize) -> Result<usize>;
fn select0(&self, k: usize) -> Result<usize>;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn count_ones(&self) -> usize;
fn count_zeros(&self) -> usize {
self.len() - self.count_ones()
}
fn get(&self, index: usize) -> Option<bool>;
fn space_overhead_percent(&self) -> f64;
}
pub trait RankSelectPerformanceOps: RankSelectOps {
fn rank1_hardware_accelerated(&self, pos: usize) -> usize;
fn select1_hardware_accelerated(&self, k: usize) -> Result<usize>;
fn rank1_adaptive(&self, pos: usize) -> usize;
fn select1_adaptive(&self, k: usize) -> Result<usize>;
fn rank1_bulk(&self, positions: &[usize]) -> Vec<usize>;
fn select1_bulk(&self, indices: &[usize]) -> Result<Vec<usize>>;
}
pub trait RankSelectBuilder<T> {
fn from_bit_vector(bit_vector: BitVector) -> Result<T>;
fn from_iter<I>(iter: I) -> Result<T>
where
I: IntoIterator<Item = bool>;
fn from_bytes(bytes: &[u8], bit_len: usize) -> Result<T>;
fn with_optimizations(bit_vector: BitVector, opts: BuilderOptions) -> Result<T>;
}
#[derive(Debug, Clone)]
pub struct BuilderOptions {
pub optimize_select: bool,
pub block_size: usize,
pub select_sample_rate: usize,
pub enable_simd: bool,
pub prefer_space: bool,
}
impl Default for BuilderOptions {
fn default() -> Self {
Self {
optimize_select: true,
block_size: 256,
select_sample_rate: 512,
enable_simd: true,
prefer_space: false,
}
}
}
#[derive(Debug, Clone)]
pub struct PerformanceStats {
pub rank_operations: u64,
pub select_operations: u64,
pub hardware_accelerated_ops: u64,
pub avg_rank_time_ns: f64,
pub avg_select_time_ns: f64,
pub cache_hit_rate: f64,
}
impl Default for PerformanceStats {
fn default() -> Self {
Self {
rank_operations: 0,
select_operations: 0,
hardware_accelerated_ops: 0,
avg_rank_time_ns: 0.0,
avg_select_time_ns: 0.0,
cache_hit_rate: 1.0,
}
}
}
#[derive(Debug, Clone)]
pub enum RankSelectError {
InvalidSelectIndex { index: usize, max_valid: usize },
InvalidRankPosition { position: usize, max_valid: usize },
InvalidDimension { dimension: usize, max_valid: usize },
SparseConstructionFailed(String),
SimdNotSupported(String),
BuilderError(String),
}
impl fmt::Display for RankSelectError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
RankSelectError::InvalidSelectIndex { index, max_valid } => {
write!(
f,
"Invalid select index {}, maximum valid index is {}",
index, max_valid
)
}
RankSelectError::InvalidRankPosition {
position,
max_valid,
} => {
write!(
f,
"Invalid rank position {}, maximum valid position is {}",
position, max_valid
)
}
RankSelectError::InvalidDimension {
dimension,
max_valid,
} => {
write!(
f,
"Invalid dimension {}, maximum valid dimension is {}",
dimension, max_valid
)
}
RankSelectError::SparseConstructionFailed(msg) => {
write!(f, "Sparse rank/select construction failed: {}", msg)
}
RankSelectError::SimdNotSupported(msg) => {
write!(f, "SIMD operation not supported: {}", msg)
}
RankSelectError::BuilderError(msg) => {
write!(f, "Builder error: {}", msg)
}
}
}
}
impl std::error::Error for RankSelectError {}
impl From<RankSelectError> for ZiporaError {
fn from(err: RankSelectError) -> Self {
ZiporaError::invalid_data(err.to_string())
}
}
pub mod utils {
use super::*;
pub fn optimal_block_size(total_bits: usize, density: f64) -> usize {
match (total_bits, density) {
(_, d) if d < 0.01 => 1024, (_, d) if d > 0.9 => 256, (n, _) if n < 10_000 => 256, _ => 512, }
}
pub fn optimal_select_sample_rate(total_ones: usize) -> usize {
match total_ones {
0..=1000 => 64,
1001..=10_000 => 256,
10_001..=100_000 => 512,
_ => 1024,
}
}
pub fn estimate_memory_overhead(
total_bits: usize,
block_size: usize,
select_sample_rate: usize,
enable_select_cache: bool,
) -> f64 {
let num_blocks = (total_bits + block_size - 1) / block_size;
let rank_overhead = num_blocks * 4;
let select_overhead = if enable_select_cache {
let estimated_ones = total_bits / 2; let num_samples = (estimated_ones + select_sample_rate - 1) / select_sample_rate;
num_samples * 4 } else {
0
};
let total_overhead_bits = (rank_overhead + select_overhead) * 8;
(total_overhead_bits as f64 / total_bits as f64) * 100.0
}
pub fn benchmark_variants(
bit_vector: &BitVector,
iterations: usize,
) -> Vec<(String, f64, f64)> {
vec![
("RankSelectInterleaved256".to_string(), 8.0, 80.0),
("AdaptiveRankSelect".to_string(), 10.0, 90.0),
]
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_bitvector(size: usize, pattern: fn(usize) -> bool) -> BitVector {
let mut bv = BitVector::new();
for i in 0..size {
bv.push(pattern(i)).unwrap();
}
bv
}
fn create_alternating_bitvector(size: usize) -> BitVector {
create_test_bitvector(size, |i| i % 2 == 0)
}
fn create_sparse_bitvector(size: usize) -> BitVector {
create_test_bitvector(size, |i| i % 100 == 0)
}
fn create_dense_bitvector(size: usize) -> BitVector {
create_test_bitvector(size, |i| i % 4 != 3)
}
#[test]
fn test_builder_options_default() {
let opts = BuilderOptions::default();
assert!(opts.optimize_select);
assert_eq!(opts.block_size, 256);
assert_eq!(opts.select_sample_rate, 512);
assert!(opts.enable_simd);
assert!(!opts.prefer_space);
}
#[test]
fn test_performance_stats_default() {
let stats = PerformanceStats::default();
assert_eq!(stats.rank_operations, 0);
assert_eq!(stats.select_operations, 0);
assert_eq!(stats.hardware_accelerated_ops, 0);
assert_eq!(stats.avg_rank_time_ns, 0.0);
assert_eq!(stats.avg_select_time_ns, 0.0);
assert_eq!(stats.cache_hit_rate, 1.0);
}
#[test]
fn test_utility_functions() {
assert_eq!(utils::optimal_block_size(1000, 0.005), 1024); assert_eq!(utils::optimal_block_size(1000, 0.95), 256); assert_eq!(utils::optimal_block_size(5000, 0.5), 256); assert_eq!(utils::optimal_block_size(100_000, 0.5), 512);
assert_eq!(utils::optimal_select_sample_rate(500), 64);
assert_eq!(utils::optimal_select_sample_rate(5000), 256);
assert_eq!(utils::optimal_select_sample_rate(50_000), 512);
assert_eq!(utils::optimal_select_sample_rate(500_000), 1024);
let overhead = utils::estimate_memory_overhead(10_000, 256, 512, true);
assert!(overhead > 0.0 && overhead < 50.0); }
#[test]
fn test_rank_select_error_display() {
let err = RankSelectError::InvalidSelectIndex {
index: 10,
max_valid: 5,
};
assert!(err.to_string().contains("Invalid select index 10"));
let err = RankSelectError::InvalidDimension {
dimension: 3,
max_valid: 1,
};
assert!(err.to_string().contains("Invalid dimension 3"));
let err = RankSelectError::SparseConstructionFailed("test error".to_string());
assert!(
err.to_string()
.contains("Sparse rank/select construction failed")
);
}
#[test]
fn test_rank_select_error_conversion() {
let rs_err = RankSelectError::BuilderError("test".to_string());
let zipora_err: ZiporaError = rs_err.into();
assert!(zipora_err.to_string().contains("Builder error"));
}
#[test]
fn test_interleaved256_comprehensive_operations() {
let bv = create_alternating_bitvector(1000);
let interleaved = RankSelectInterleaved256::new(bv.clone()).unwrap();
for pos in [0, 1, 100, 500, 999] {
let rank = interleaved.rank1(pos);
let expected_approx = pos / 2;
assert!(
rank >= expected_approx && rank <= expected_approx + 1,
"Rank at position {} should be approximately {}, got {}",
pos, expected_approx, rank
);
}
let ones_count = interleaved.count_ones();
for k in [
0,
ones_count / 4,
ones_count / 2,
ones_count * 3 / 4,
ones_count - 1,
] {
if k < ones_count {
let select_pos = interleaved.select1(k).unwrap();
assert_eq!(
interleaved.rank1(select_pos + 1),
k + 1,
"Round-trip failed: rank(select({}) + 1) = {} != {}",
k, interleaved.rank1(select_pos + 1), k + 1
);
}
}
for pos in [0, 1, 100, 500, 999] {
let rank0 = interleaved.rank0(pos);
let rank1 = interleaved.rank1(pos);
assert_eq!(
rank0 + rank1,
pos,
"rank0({}) + rank1({}) = {} + {} = {} != {}",
pos, pos, rank0, rank1, rank0 + rank1, pos
);
}
}
#[test]
fn test_sparse_data_performance() {
let sparse_bv = create_sparse_bitvector(10000);
let sparse_rs = RankSelectInterleaved256::new(sparse_bv.clone()).unwrap();
assert_eq!(sparse_rs.rank1(0), 0);
assert_eq!(sparse_rs.rank1(100), 1);
assert_eq!(sparse_rs.rank1(200), 2);
if sparse_rs.count_ones() > 0 {
assert_eq!(sparse_rs.select1(0).unwrap(), 0);
}
let ones_count = sparse_rs.count_ones();
for i in 0..std::cmp::min(ones_count, 10) {
let select_pos = sparse_rs.select1(i).unwrap();
assert_eq!(
sparse_rs.rank1(select_pos + 1),
i + 1,
"Sparse data rank/select consistency failed"
);
}
}
#[test]
fn test_different_data_patterns() {
let alternating_bv = create_alternating_bitvector(1000);
let dense_bv = create_dense_bitvector(1000);
let alt_rs = RankSelectInterleaved256::new(alternating_bv.clone()).unwrap();
let dense_rs = RankSelectInterleaved256::new(dense_bv.clone()).unwrap();
for pos in [0, 100, 500, 999] {
let alt_rank = alt_rs.rank1(pos);
let dense_rank = dense_rs.rank1(pos);
let expected_alt = pos / 2;
assert!(
alt_rank >= expected_alt && alt_rank <= expected_alt + 1,
"Alternating pattern rank unexpected: {} at pos {}",
alt_rank, pos
);
assert!(
dense_rank >= pos * 3 / 4, "Dense pattern rank too low: {} at pos {}",
dense_rank, pos
);
}
let alt_ones = alt_rs.count_ones();
let dense_ones = dense_rs.count_ones();
assert!(dense_ones > alt_ones, "Dense pattern should have more 1s");
}
#[test]
fn test_large_scale_operations() {
let bv1 = create_alternating_bitvector(2000);
let bv2 = create_sparse_bitvector(2000);
let bv3 = create_dense_bitvector(2000);
let rs1 = RankSelectInterleaved256::new(bv1.clone()).unwrap();
let rs2 = RankSelectInterleaved256::new(bv2.clone()).unwrap();
let rs3 = RankSelectInterleaved256::new(bv3.clone()).unwrap();
for pos in [0, 500, 1000, 1500, 1999] {
let rank1 = rs1.rank1(pos);
let rank2 = rs2.rank1(pos);
let rank3 = rs3.rank1(pos);
assert!(rank2 <= rank1, "Sparse should have fewer 1s than alternating");
assert!(rank1 <= rank3, "Alternating should have fewer 1s than dense");
assert_eq!(rs1.rank0(pos) + rank1, pos);
assert_eq!(rs2.rank0(pos) + rank2, pos);
assert_eq!(rs3.rank0(pos) + rank3, pos);
}
let ones1 = rs1.count_ones();
let ones2 = rs2.count_ones();
let ones3 = rs3.count_ones();
for rs in [&rs1, &rs2, &rs3] {
let ones = rs.count_ones();
if ones > 0 {
for k in 0..std::cmp::min(10, ones) {
let select_pos = rs.select1(k).unwrap();
assert_eq!(rs.rank1(select_pos + 1), k + 1);
}
}
}
}
#[test]
fn test_simd_operations() {
let bit_data: Vec<u64> = vec![
0xAAAAAAAAAAAAAAAAu64, 0x5555555555555555u64, 0xFFFFFFFFFFFFFFFFu64, 0x0000000000000000u64, ];
let positions = vec![0, 64, 128, 192, 256];
let indices = vec![0, 10, 20, 30];
let ranks = bulk_rank1_simd(&bit_data, &positions);
assert_eq!(ranks.len(), positions.len());
let selects = bulk_select1_simd(&bit_data, &indices);
assert!(selects.is_ok());
let popcounts = bulk_popcount_simd(&bit_data);
assert_eq!(popcounts.len(), bit_data.len());
assert_eq!(popcounts[0], 32); assert_eq!(popcounts[1], 32); assert_eq!(popcounts[2], 64); assert_eq!(popcounts[3], 0); }
#[test]
fn test_simd_capabilities() {
let caps = SimdCapabilities::detect();
assert!(caps.optimization_tier <= 5);
assert!(caps.chunk_size > 0);
assert!(caps.chunk_size <= 64 * 1024);
let caps2 = SimdCapabilities::get();
assert_eq!(caps.optimization_tier, caps2.optimization_tier);
}
#[test]
fn test_performance_ops_trait() {
let bv = create_alternating_bitvector(1000);
let interleaved = RankSelectInterleaved256::new(bv).unwrap();
for pos in [0, 100, 500, 999] {
let standard_rank = interleaved.rank1(pos);
let hw_rank = interleaved.rank1_hardware_accelerated(pos);
let adaptive_rank = interleaved.rank1_adaptive(pos);
assert_eq!(standard_rank, hw_rank, "Hardware rank mismatch");
assert_eq!(standard_rank, adaptive_rank, "Adaptive rank mismatch");
}
let positions = vec![0, 100, 500, 999];
let bulk_ranks = interleaved.rank1_bulk(&positions);
for (i, &pos) in positions.iter().enumerate() {
assert_eq!(bulk_ranks[i], interleaved.rank1(pos), "Bulk rank mismatch");
}
let ones_count = interleaved.count_ones();
if ones_count > 0 {
let indices = vec![0, ones_count / 4, ones_count / 2];
let bulk_selects = interleaved.select1_bulk(&indices).unwrap();
for (i, &k) in indices.iter().enumerate() {
if k < ones_count {
assert_eq!(
bulk_selects[i],
interleaved.select1(k).unwrap(),
"Bulk select mismatch"
);
}
}
}
}
#[test]
fn test_builder_patterns() {
let bv = create_alternating_bitvector(1000);
let opts = BuilderOptions {
optimize_select: true,
block_size: 512,
select_sample_rate: 256,
enable_simd: true,
prefer_space: false,
};
let interleaved = RankSelectInterleaved256::with_optimizations(bv.clone(), opts).unwrap();
assert!(interleaved.len() > 0);
assert_eq!(interleaved.rank1(0), 0);
assert_eq!(interleaved.rank1(1), 1);
let sparse_bv = create_sparse_bitvector(1000);
let sparse_rs = RankSelectInterleaved256::new(sparse_bv).unwrap();
assert!(sparse_rs.space_overhead_percent() < 250.0);
}
#[test]
fn test_space_overhead() {
let bv = create_alternating_bitvector(10000);
let alternating_rs = RankSelectInterleaved256::new(bv.clone()).unwrap();
let sparse_bv = create_sparse_bitvector(10000);
let sparse_rs = RankSelectInterleaved256::new(sparse_bv).unwrap();
let dense_bv = create_dense_bitvector(10000);
let dense_rs = RankSelectInterleaved256::new(dense_bv).unwrap();
assert!(
alternating_rs.space_overhead_percent() < 250.0,
"Alternating pattern overhead too high: {:.2}%",
alternating_rs.space_overhead_percent()
);
assert!(
sparse_rs.space_overhead_percent() < 250.0,
"Sparse pattern overhead too high: {:.2}%",
sparse_rs.space_overhead_percent()
);
assert!(
dense_rs.space_overhead_percent() < 250.0,
"Dense pattern overhead too high: {:.2}%",
dense_rs.space_overhead_percent()
);
println!("Alternating overhead: {:.2}%", alternating_rs.space_overhead_percent());
println!("Sparse overhead: {:.2}%", sparse_rs.space_overhead_percent());
println!("Dense overhead: {:.2}%", dense_rs.space_overhead_percent());
}
#[test]
fn test_large_dataset_consistency() {
let large_bv = create_test_bitvector(100000, |i| (i * 13 + 7) % 71 == 0);
let interleaved = RankSelectInterleaved256::new(large_bv.clone()).unwrap();
let test_positions = [0, 1000, 10000, 50000, 99999];
for &pos in &test_positions {
let rank = interleaved.rank1(pos);
let rank0 = interleaved.rank0(pos);
assert_eq!(rank + rank0, pos, "Rank invariant failed at {}", pos);
if pos > 0 {
let prev_rank = interleaved.rank1(pos - 1);
assert!(rank >= prev_rank, "Rank should be monotonic");
}
}
let ones_count = interleaved.count_ones();
let test_ks = [0, ones_count / 10, ones_count / 2, ones_count * 9 / 10];
for &k in &test_ks {
if k < ones_count {
let select_pos = interleaved.select1(k).unwrap();
assert_eq!(
interleaved.rank1(select_pos + 1),
k + 1,
"Round-trip failed: rank(select({}) + 1) != {}",
k, k + 1
);
}
}
println!("Large dataset ({} bits) test passed - {} ones found",
large_bv.len(), ones_count);
}
#[test]
fn test_edge_cases() {
let empty_bv = BitVector::new();
let empty_rs = RankSelectInterleaved256::new(empty_bv).unwrap();
assert_eq!(empty_rs.len(), 0);
assert_eq!(empty_rs.count_ones(), 0);
assert_eq!(empty_rs.rank1(0), 0);
let mut single_bv = BitVector::new();
single_bv.push(true).unwrap();
let single_rs = RankSelectInterleaved256::new(single_bv).unwrap();
assert_eq!(single_rs.len(), 1);
assert_eq!(single_rs.count_ones(), 1);
assert_eq!(single_rs.rank1(0), 0);
assert_eq!(single_rs.rank1(1), 1);
assert_eq!(single_rs.select1(0).unwrap(), 0);
let all_zeros = BitVector::with_size(1000, false).unwrap();
let zeros_rs = RankSelectInterleaved256::new(all_zeros).unwrap();
assert_eq!(zeros_rs.count_ones(), 0);
assert_eq!(zeros_rs.rank1(500), 0);
assert!(zeros_rs.select1(0).is_err());
let all_ones = BitVector::with_size(1000, true).unwrap();
let ones_rs = RankSelectInterleaved256::new(all_ones).unwrap();
assert_eq!(ones_rs.count_ones(), 1000);
assert_eq!(ones_rs.rank1(500), 500);
assert_eq!(ones_rs.select1(499).unwrap(), 499);
}
}